# =============================================================================
# 企微IT智能服务台 — 内容审核服务
# =============================================================================
# 说明：#81 v0.6.0 内容审核 — 检测敏感词 + 提示坐席优化语气
# 用途：坐席发送消息前自动审核,避免发送违规内容
# 设计：基于 wordfilter 开源库 + 自定义敏感词库
# =============================================================================

from dataclasses import dataclass
from enum import Enum
from typing import List, Optional, Tuple

from wordfilter import Wordfilter

from app.utils.logger import get_logger

logger = get_logger(__name__)


class ModerationAction(str, Enum):
    """内容审核动作"""
    PASS = "pass"                    # 通过
    WARN = "warn"                    # 警告(允许发送,但标记)
    BLOCK = "block"                  # 阻断(必须修改)


class ModerationCategory(str, Enum):
    """审核分类"""
    PROFANITY = "profanity"          # 脏话
    POLITICS = "politics"            # 政治敏感
    PORN = "porn"                    # 色情
    AD = "ad"                        # 广告
    PRIVACY = "privacy"              # 隐私泄露(身份证/电话)
    OTHER = "other"                  # 其他


@dataclass
class ModerationResult:
    """审核结果"""
    action: ModerationAction
    category: Optional[ModerationCategory]
    matched_words: List[str]
    suggestion: str = ""

    @property
    def is_blocked(self) -> bool:
        return self.action == ModerationAction.BLOCK

    @property
    def is_warned(self) -> bool:
        return self.action == ModerationAction.WARN


class ContentModerationService:
    """内容审核服务 — 检测 + 提示。

    设计要点:
    1. 加载 wordfilter + 自定义敏感词库
    2. 提供 3 个级别动作:pass / warn / block
    3. 返回命中的敏感词,给前端提示
    4. 异步不阻塞消息发送主流程
    """

    def __init__(self):
        # 初始化 wordfilter(新 API: Wordfilter() 实例,而非 init() 全局)
        self.wf = Wordfilter()
        # 加载自定义敏感词库(预留,生产环境从配置文件加载)
        self.custom_sensitive_words: List[str] = [
            # 坐席严禁发送的
            "投诉我",          # 暗示员工投诉自己
            "你爱找谁找谁",    # 不当推诿
            "自己不会百度吗",  # 不当反问
            "这点小事",        # 轻视员工问题
            # 隐私保护(后端检测,前端不知道)
            # 实际部署时从 system_config 加载
        ]
        if self.custom_sensitive_words:
            self.wf.addWords(self.custom_sensitive_words)

    # ==================================================================
    # 主入口
    # ==================================================================

    def moderate(self, text: str) -> ModerationResult:
        """审核文本。

        Args:
            text: 待审核文本(坐席准备发的消息)

        Returns:
            ModerationResult: 审核结果
        """
        if not text or not text.strip():
            return ModerationResult(
                action=ModerationAction.PASS,
                category=None,
                matched_words=[],
            )

        text = text.strip()

        # 1. wordfilter 检测
        matched: List[str] = []
        if self.wf.blacklisted(text):
            # 找出具体哪些词命中
            matched = self._extract_matched(text)

        if not matched:
            return ModerationResult(
                action=ModerationAction.PASS,
                category=None,
                matched_words=[],
            )

        # 2. 分类(简单规则:有命中就给 warn,后续可分级)
        category = self._classify(matched)

        # 3. 决定动作(目前策略:命中即 warn,后续可升级 block)
        # 后续决策点:是否给某些类(政治/色情)直接 block
        action = ModerationAction.WARN
        suggestion = self._generate_suggestion(category, matched)

        logger.info(
            f"[ContentModeration] 检测到敏感词 text={text[:30]}... "
            f"matched={matched} category={category}"
        )

        return ModerationResult(
            action=action,
            category=category,
            matched_words=matched,
            suggestion=suggestion,
        )

    # ==================================================================
    # 隐私信息检测(基于正则,跟敏感词无关)
    # ==================================================================

    def check_privacy_leak(self, text: str) -> List[str]:
        """检测文本是否包含隐私信息(身份证 / 电话 / 银行卡)。

        Returns:
            命中的隐私字段列表(描述性,如 ["phone", "id_card"])
        """
        import re
        leaked = []

        # 手机号(11 位 1 开头)
        if re.search(r"\b1[3-9]\d{9}\b", text):
            leaked.append("phone")

        # 身份证号(18 位)
        if re.search(r"\b\d{17}[\dXx]\b", text):
            leaked.append("id_card")

        # 银行卡(16-19 位连续数字,简单判断)
        if re.search(r"\b\d{16,19}\b", text):
            leaked.append("bank_card")

        # 邮箱(个人邮箱,非公司邮箱)
        personal_email_pattern = (
            r"\b[a-zA-Z0-9._%+-]+@(?!servyou-it\.com|"
            r"servyou\.com\.cn)[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b"
        )
        if re.search(personal_email_pattern, text):
            leaked.append("personal_email")

        return leaked

    # ==================================================================
    # 工具方法
    # ==================================================================

    def _extract_matched(self, text: str) -> List[str]:
        """提取命中的敏感词。"""
        # wordfilter 没有直接的 "提取所有命中词" API,只能 replace 看
        matched = []
        # 遍历自建词库看哪些命中
        for word in self.custom_sensitive_words:
            if word in text:
                matched.append(word)
        return matched

    def _classify(self, matched: List[str]) -> ModerationCategory:
        """根据命中的词分类。"""
        # 简单分类:命中"投诉""爱找谁"等 → profanity
        # 后续可扩展
        return ModerationCategory.PROFANITY

    def _generate_suggestion(
        self, category: ModerationCategory, matched: List[str]
    ) -> str:
        """生成修改建议。"""
        suggestions_map = {
            ModerationCategory.PROFANITY: (
                "建议改为更专业的表达,例如:"
                "「我理解您的问题,我们一起想办法解决」"
            ),
            ModerationCategory.POLITICS: (
                "请避免讨论政治话题,保持服务专业性"
            ),
            ModerationCategory.PORN: "请使用正式语言",
            ModerationCategory.AD: "请勿发送广告内容",
            ModerationCategory.PRIVACY: (
                "请勿发送员工隐私信息(电话/身份证),如需联系请走企微"
            ),
            ModerationCategory.OTHER: "请检查并修改表达",
        }
        return suggestions_map.get(category, "请检查并修改表达")

    @staticmethod
    def _get_fallback_question(keywords: List[str]) -> dict:
        """Dify 失败时的兜底题(从预置题池随机抽一道)。

        注意:这里写死 10 道 IT 基础题,生产环境可改成查 quiz_questions.source='manual'
        """
        import random

        fallback_pool = [
            {
                "question": "电脑突然黑屏,最安全的做法是?",
                "options": ["强制关机重启", "拔电源重启", "等几分钟看是否恢复", "砸电脑"],
                "correct_index": 0,
                "hint": "想想最稳妥的第一步",
                "explanation": "黑屏可能是系统卡死,强制重启通常能恢复,拔电源可能损坏硬件",
                "source": "manual",
            },
            {
                "question": "打印机不响应,首先应该检查?",
                "options": ["打印机电源", "重装系统", "换台电脑", "直接呼叫维修"],
                "correct_index": 0,
                "hint": "最基础的物理连接",
                "explanation": "80% 故障是电源/线缆问题,先排除最简单的再考虑复杂方案",
                "source": "manual",
            },
            {
                "question": "密码忘了应该怎么办?",
                "options": ["自己猜", "暴力破解", "找 IT 重置", "不用了"],
                "correct_index": 2,
                "hint": "走正规流程最安全",
                "explanation": "找 IT 重置是最快最安全的做法,自己猜可能锁账号,暴力破解违法",
                "source": "manual",
            },
            {
                "question": "无法连接公司 VPN,首选排查?",
                "options": ["检查网络是否通", "重装系统", "换电脑", "联系运营商"],
                "correct_index": 0,
                "hint": "从外到内排查",
                "explanation": "先确认能上网,再排查 VPN 客户端,最后才是公司 VPN 服务器",
                "source": "manual",
            },
            {
                "question": "Outlook 收不到邮件,先看哪里?",
                "options": ["垃圾邮件箱", "重装 Office", "换邮箱", "打电话给 IT"],
                "correct_index": 0,
                "hint": "最容易被忽略的",
                "explanation": "新邮件被误判到垃圾箱是常见原因,先看再排查服务器",
                "source": "manual",
            },
            {
                "question": "Office 软件打开慢,先做什么?",
                "options": ["清理开机启动项", "换电脑", "买新硬盘", "卸载重装"],
                "correct_index": 0,
                "hint": "性能问题先减负",
                "explanation": "开机启动项太多会拖慢所有应用,清理后再观察",
                "source": "manual",
            },
            {
                "question": "电脑提示磁盘空间不足,应该?",
                "options": ["清理回收站和临时文件", "关机", "重装系统", "不处理"],
                "correct_index": 0,
                "hint": "先释放空间再判断",
                "explanation": "90% 的情况清理回收站 + temp 目录就能解决,严重才需要重装",
                "source": "manual",
            },
            {
                "question": "网页打不开,首先排查?",
                "options": ["检查网络连接", "换浏览器", "重装系统", "砸键盘"],
                "correct_index": 0,
                "hint": "从最基础的开始",
                "explanation": "先看能不能打开其他网页,排除是网站问题还是网络问题",
                "source": "manual",
            },
            {
                "question": "U 盘插入电脑没反应,先检查?",
                "options": ["换个 USB 接口", "格式化 U 盘", "扔了", "拆电脑"],
                "correct_index": 0,
                "hint": "先排除最简单的问题",
                "explanation": "USB 接口可能松动或供电不足,先换接口试,不要先动数据",
                "source": "manual",
            },
            {
                "question": "电脑突然变卡,第一步应该?",
                "options": ["看任务管理器占用", "砸电脑", "重装系统", "关机睡觉"],
                "correct_index": 0,
                "hint": "数据先行",
                "explanation": "任务管理器能看到 CPU/内存/磁盘占用,定位是哪个进程在吃资源",
                "source": "manual",
            },
        ]

        chosen = random.choice(fallback_pool)
        return chosen

    def add_custom_word(self, word: str) -> None:
        """动态添加敏感词(运营后台调用)。"""
        self.wf.addWords([word])
        if word not in self.custom_sensitive_words:
            self.custom_sensitive_words.append(word)

    def remove_custom_word(self, word: str) -> None:
        """动态删除敏感词。"""
        # wordfilter 没有 remove API,降级用 replace 占位
        # wordfilter.remove(word)  # 实际库不一定支持
        if word in self.custom_sensitive_words:
            self.custom_sensitive_words.remove(word)


# 单例
_moderation_service: Optional[ContentModerationService] = None


def get_moderation_service() -> ContentModerationService:
    """获取内容审核服务单例。"""
    global _moderation_service
    if _moderation_service is None:
        _moderation_service = ContentModerationService()
    return _moderation_service