G3/src/deepseek_agent_optimized.py

"""优化版 DeepSeek API 驱动的智能情感分析 Agent"""

import asyncio
import json
import re
from typing import Optional, Dict, Any
from functools import lru_cache
import httpx
from pydantic import BaseModel, Field

from src.config import Config


class APIError(Exception):
    """API 错误异常类"""
    def __init__(self, message: str, status_code: Optional[int] = None):
        self.message = message
        self.status_code = status_code
        super().__init__(self.message)


class DeepSeekClient:
    """优化版 DeepSeek API 客户端"""
    
    def __init__(self):
        api_key = Config.get_api_key()
        if not api_key:
            raise ValueError("DeepSeek API Key 未配置")
        
        self.api_key = api_key
        self.base_url = Config.get_base_url()
        self.model = Config.DEEPSEEK_MODEL
    
    async def chat_completion_with_retry(
        self, 
        messages: list, 
        temperature: float = 0.3,
        max_tokens: int = 500
    ) -> str:
        """带重试机制的 API 调用"""
        
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }
        
        data = {
            "model": self.model,
            "messages": messages,
            "temperature": temperature,
            "max_tokens": max_tokens,
            "stream": False
        }
        
        last_error = None
        
        for attempt in range(Config.MAX_RETRY_ATTEMPTS):
            try:
                async with httpx.AsyncClient(timeout=Config.REQUEST_TIMEOUT) as client:
                    response = await client.post(
                        f"{self.base_url}/chat/completions",
                        headers=headers,
                        json=data
                    )
                
                if response.status_code == 200:
                    result = response.json()
                    return result["choices"][0]["message"]["content"]
                elif response.status_code == 401:
                    raise APIError("API 密钥无效", response.status_code)
                elif response.status_code == 429:
                    # 限流，等待后重试
                    wait_time = 2 ** attempt  # 指数退避
                    await asyncio.sleep(wait_time)
                    continue
                else:
                    raise APIError(f"API 调用失败: {response.status_code}", response.status_code)
                    
            except (httpx.ConnectError, httpx.TimeoutException) as e:
                last_error = e
                if attempt < Config.MAX_RETRY_ATTEMPTS - 1:
                    await asyncio.sleep(1)  # 等待1秒后重试
                    continue
                else:
                    raise APIError(f"网络连接失败: {str(e)}")
            except Exception as e:
                last_error = e
                if attempt < Config.MAX_RETRY_ATTEMPTS - 1:
                    await asyncio.sleep(1)
                    continue
                else:
                    raise APIError(f"API 调用异常: {str(e)}")
        
        raise last_error or APIError("未知错误")


class SentimentAnalysisResult(BaseModel):
    """情感分析结果"""
    sentiment: str = Field(description="情感类别: negative/neutral/positive")
    confidence: float = Field(description="置信度 (0-1)")
    reasoning: str = Field(description="情感判断的推理过程")
    key_factors: list[str] = Field(description="影响情感判断的关键因素")
    intensity: str = Field(description="情感强度: mild/moderate/strong")


class DisposalPlan(BaseModel):
    """处置方案"""
    priority: str = Field(description="处理优先级: high/medium/low")
    action_type: str = Field(description="行动类型: response/investigate/monitor/ignore")
    suggested_response: Optional[str] = Field(description="建议回复内容", default=None)
    follow_up_actions: list[str] = Field(description="后续行动建议")
    reasoning: str = Field(description="处置方案制定的理由")
    urgency_level: str = Field(description="紧急程度: immediate/soon/normal")


class TweetAnalysisResult(BaseModel):
    """推文分析完整结果"""
    tweet_text: str = Field(description="原始推文文本")
    airline: str = Field(description="航空公司")
    sentiment_analysis: SentimentAnalysisResult = Field(description="情感分析结果")
    disposal_plan: DisposalPlan = Field(description="处置方案")
    processing_time: float = Field(description="处理耗时（秒）")
    api_used: bool = Field(description="是否使用了 API")


class ResponseParser:
    """API 响应解析器"""
    
    @staticmethod
    def parse_sentiment_response(response: str) -> Dict[str, Any]:
        """解析情感分析响应"""
        
        # 使用正则表达式进行更精确的解析
        patterns = {
            "sentiment": r"情感类别[：:]\s*(negative|neutral|positive)",
            "confidence": r"置信度[：:]\s*([0-9]*\.?[0-9]+)",
            "intensity": r"情感强度[：:]\s*(mild|moderate|strong)",
        }
        
        result = {}
        
        for key, pattern in patterns.items():
            match = re.search(pattern, response, re.IGNORECASE)
            if match:
                result[key] = match.group(1).lower() if key != "confidence" else float(match.group(1))
        
        # 解析关键因素
        factors_match = re.search(r"关键因素[：:]([^\n]*)(?:\n|$)", response)
        if factors_match:
            factors_text = factors_match.group(1).strip()
            result["key_factors"] = [f.strip() for f in factors_text.split(",") if f.strip()]
        else:
            result["key_factors"] = []
        
        # 提取推理过程
        reasoning_match = re.search(r"推理过程[：:]([^\n]*)(?:\n|$)", response)
        if reasoning_match:
            result["reasoning"] = reasoning_match.group(1).strip()
        else:
            # 如果找不到，使用默认推理
            result["reasoning"] = "基于推文内容和航空行业特点进行综合分析"
        
        return result
    
    @staticmethod
    def parse_disposal_response(response: str) -> Dict[str, Any]:
        """解析处置方案响应"""
        
        patterns = {
            "priority": r"优先级[：:]\s*(high|medium|low)",
            "action_type": r"行动类型[：:]\s*(response|investigate|monitor|ignore)",
            "urgency_level": r"紧急程度[：:]\s*(immediate|soon|normal)",
        }
        
        result = {}
        
        for key, pattern in patterns.items():
            match = re.search(pattern, response, re.IGNORECASE)
            if match:
                result[key] = match.group(1).lower()
        
        # 解析建议回复
        response_match = re.search(r"建议回复[：:]([^\n]*)(?:\n|$)", response)
        if response_match:
            result["suggested_response"] = response_match.group(1).strip()
        
        # 解析后续行动
        actions_match = re.search(r"后续行动[：:]([^\n]*)(?:\n|$)", response)
        if actions_match:
            actions_text = actions_match.group(1).strip()
            result["follow_up_actions"] = [a.strip() for a in actions_text.split(",") if a.strip()]
        else:
            result["follow_up_actions"] = []
        
        # 解析制定理由
        reasoning_match = re.search(r"制定理由[：:]([^\n]*)(?:\n|$)", response)
        if reasoning_match:
            result["reasoning"] = reasoning_match.group(1).strip()
        else:
            result["reasoning"] = "基于情感分析结果制定"
        
        return result


class OptimizedDeepSeekTweetAgent:
    """优化版 DeepSeek 推文分析 Agent"""
    
    def __init__(self):
        self.client = DeepSeekClient()
        self.parser = ResponseParser()
    
    async def analyze_sentiment(self, text: str, airline: str) -> SentimentAnalysisResult:
        """优化版情感分析"""
        
        prompt = f"""
你是一位专业的航空行业情感分析专家。请分析以下推文的情感倾向：

推文内容："{text}"
航空公司：{airline}

请严格按照以下JSON格式输出分析结果：
{{
    "sentiment": "negative/neutral/positive",
    "confidence": 0.0-1.0之间的数值,
    "intensity": "mild/moderate/strong",
    "key_factors": ["因素1", "因素2", "因素3"],
    "reasoning": "详细的情感判断推理过程"
}}

分析要求：
1. 情感判断要准确反映推文的真实情感
2. 置信度要基于推文的明确程度和情感强度
3. 关键因素要具体、相关
4. 推理过程要详细、有逻辑

请只输出JSON格式的结果，不要有其他内容。
"""
        
        messages = [
            {
                "role": "system", 
                "content": "你是一位专业的航空行业情感分析专家，擅长准确识别推文中的情感倾向。"
            },
            {"role": "user", "content": prompt}
        ]
        
        try:
            response = await self.client.chat_completion_with_retry(messages, temperature=0.1)
            
            # 清理响应文本，移除可能的标记和空白
            cleaned_response = response.strip()
            
            # 尝试解析JSON响应
            try:
                # 尝试提取JSON部分（如果响应包含其他文本）
                json_match = re.search(r'\{[^}]+\}', cleaned_response)
                if json_match:
                    json_text = json_match.group(0)
                    result_data = json.loads(json_text)
                else:
                    result_data = json.loads(cleaned_response)
                
                # 验证必需字段
                required_fields = ["sentiment", "confidence", "intensity"]
                for field in required_fields:
                    if field not in result_data:
                        raise ValueError(f"缺少必需字段: {field}")
                
                return SentimentAnalysisResult(**result_data)
                
            except (json.JSONDecodeError, ValueError) as json_error:
                # JSON解析失败，使用正则解析
                print(f"JSON解析失败，使用正则解析: {json_error}")
                parsed_data = self.parser.parse_sentiment_response(response)
                
                # 确保必需字段有默认值
                default_values = {
                    "sentiment": "neutral",
                    "confidence": 0.5,
                    "intensity": "moderate"
                }
                
                for field, default_value in default_values.items():
                    if field not in parsed_data or not parsed_data[field]:
                        parsed_data[field] = default_value
                
                return SentimentAnalysisResult(**parsed_data)
                
        except APIError as e:
            # API调用失败，返回默认结果
            print(f"API调用失败: {e.message}")
            return SentimentAnalysisResult(
                sentiment="neutral",
                confidence=0.5,
                intensity="moderate",
                key_factors=["API调用失败，使用默认分析"],
                reasoning=f"API调用失败: {e.message}"
            )
    
    async def generate_disposal_plan(
        self, 
        text: str, 
        airline: str, 
        sentiment_result: SentimentAnalysisResult
    ) -> DisposalPlan:
        """生成优化版处置方案"""
        
        prompt = f"""
基于以下推文分析和情感判断结果，为航空公司制定一个合理的处置方案：

推文内容："{text}"
航空公司：{airline}
情感分析结果：
- 情感类别：{sentiment_result.sentiment}
- 置信度：{sentiment_result.confidence:.1%}
- 情感强度：{sentiment_result.intensity}
- 关键因素：{', '.join(sentiment_result.key_factors)}

请严格按照以下JSON格式输出处置方案：
{{
    "priority": "high/medium/low",
    "action_type": "response/investigate/monitor/ignore",
    "suggested_response": "具体的回复建议（如适用）",
    "follow_up_actions": ["行动1", "行动2"],
    "reasoning": "制定此方案的理由",
    "urgency_level": "immediate/soon/normal"
}}

要求：
1. 优先级要基于情感强度和置信度
2. 行动类型要符合航空行业最佳实践
3. 建议回复要专业、有同理心
4. 后续行动要具体、可执行

请只输出JSON格式的结果。
"""
        
        messages = [
            {
                "role": "system", 
                "content": "你是一位航空公司的客户服务专家，擅长制定合理的客户反馈处置方案。"
            },
            {"role": "user", "content": prompt}
        ]
        
        try:
            response = await self.client.chat_completion_with_retry(messages, temperature=0.3)
            
            # 清理响应文本
            cleaned_response = response.strip()
            
            try:
                # 尝试提取JSON部分
                json_match = re.search(r'\{[^}]+\}', cleaned_response)
                if json_match:
                    json_text = json_match.group(0)
                    result_data = json.loads(json_text)
                else:
                    result_data = json.loads(cleaned_response)
                
                # 验证必需字段
                required_fields = ["priority", "action_type", "reasoning"]
                for field in required_fields:
                    if field not in result_data:
                        raise ValueError(f"缺少必需字段: {field}")
                
                return DisposalPlan(**result_data)
                
            except (json.JSONDecodeError, ValueError) as json_error:
                # JSON解析失败，使用正则解析
                print(f"处置方案JSON解析失败，使用正则解析: {json_error}")
                parsed_data = self.parser.parse_disposal_response(response)
                
                # 确保必需字段有默认值
                default_values = {
                    "priority": "medium",
                    "action_type": "monitor",
                    "reasoning": "基于情感分析结果制定",
                    "follow_up_actions": [],
                    "urgency_level": "normal"
                }
                
                for field, default_value in default_values.items():
                    if field not in parsed_data or not parsed_data[field]:
                        parsed_data[field] = default_value
                
                return DisposalPlan(**parsed_data)
                
        except APIError as e:
            # API调用失败，返回默认处置方案
            print(f"处置方案API调用失败: {e.message}")
            return self._generate_default_disposal_plan(sentiment_result)
    
    def _generate_default_disposal_plan(self, sentiment_result: SentimentAnalysisResult) -> DisposalPlan:
        """生成默认处置方案"""
        
        if sentiment_result.sentiment == "negative":
            return DisposalPlan(
                priority="medium",
                action_type="investigate",
                suggested_response=None,
                follow_up_actions=["进一步核实情况", "根据核实结果决定行动"],
                reasoning="负面情感需要进一步调查",
                urgency_level="soon"
            )
        elif sentiment_result.sentiment == "positive":
            return DisposalPlan(
                priority="low",
                action_type="monitor",
                suggested_response=None,
                follow_up_actions=["持续关注用户动态"],
                reasoning="正面情感保持关注即可",
                urgency_level="normal"
            )
        else:
            return DisposalPlan(
                priority="low",
                action_type="monitor",
                suggested_response=None,
                follow_up_actions=["常规关注"],
                reasoning="中性情感常规处理",
                urgency_level="normal"
            )
    
    async def analyze_tweet(self, text: str, airline: str) -> TweetAnalysisResult:
        """完整的推文分析流程"""
        import time
        start_time = time.time()
        
        # 1. 情感分析
        sentiment_result = await self.analyze_sentiment(text, airline)
        
        # 2. 生成处置方案
        disposal_plan = await self.generate_disposal_plan(text, airline, sentiment_result)
        
        # 3. 计算处理时间
        processing_time = time.time() - start_time
        
        # 返回完整结果
        return TweetAnalysisResult(
            tweet_text=text,
            airline=airline,
            sentiment_analysis=sentiment_result,
            disposal_plan=disposal_plan,
            processing_time=processing_time,
            api_used=True
        )


# 同步版本的包装函数
async def analyze_tweet_async(text: str, airline: str) -> TweetAnalysisResult:
    """异步版本的推文分析"""
    agent = OptimizedDeepSeekTweetAgent()
    return await agent.analyze_tweet(text, airline)


def analyze_tweet_sync(text: str, airline: str) -> TweetAnalysisResult:
    """同步版本的推文分析函数"""
    return asyncio.run(analyze_tweet_async(text, airline))


# 终极版本 - 完全不需要航空公司参数
async def analyze_tweet_ultimate_async(text: str) -> TweetAnalysisResult:
    """终极版本异步推文分析 - 无需航空公司参数"""
    agent = OptimizedDeepSeekTweetAgent()
    
    # 自动检测航空公司或使用通用标识
    airline = "通用航空公司"
    
    # 简单的航空公司检测逻辑
    airline_keywords = {
        "united": "United Airlines",
        "delta": "Delta Air Lines", 
        "american": "American Airlines",
        "southwest": "Southwest Airlines",
        "jetblue": "JetBlue Airways",
        "air china": "中国国际航空",
        "china eastern": "中国东方航空",
        "china southern": "中国南方航空"
    }
    
    text_lower = text.lower()
    for keyword, airline_name in airline_keywords.items():
        if keyword in text_lower:
            airline = airline_name
            break
    
    return await agent.analyze_tweet(text, airline)


def analyze_tweet_sync_ultimate(text: str) -> TweetAnalysisResult:
    """终极版本同步推文分析 - 完全无需航空公司参数"""
    return asyncio.run(analyze_tweet_ultimate_async(text))