493 lines
18 KiB
Python
493 lines
18 KiB
Python
"""优化版 DeepSeek API 驱动的智能情感分析 Agent"""
|
||
|
||
import asyncio
|
||
import json
|
||
import re
|
||
from typing import Optional, Dict, Any
|
||
from functools import lru_cache
|
||
import httpx
|
||
from pydantic import BaseModel, Field
|
||
|
||
from src.config import Config
|
||
|
||
|
||
class APIError(Exception):
|
||
"""API 错误异常类"""
|
||
def __init__(self, message: str, status_code: Optional[int] = None):
|
||
self.message = message
|
||
self.status_code = status_code
|
||
super().__init__(self.message)
|
||
|
||
|
||
class DeepSeekClient:
|
||
"""优化版 DeepSeek API 客户端"""
|
||
|
||
def __init__(self):
|
||
api_key = Config.get_api_key()
|
||
if not api_key:
|
||
raise ValueError("DeepSeek API Key 未配置")
|
||
|
||
self.api_key = api_key
|
||
self.base_url = Config.get_base_url()
|
||
self.model = Config.DEEPSEEK_MODEL
|
||
|
||
async def chat_completion_with_retry(
|
||
self,
|
||
messages: list,
|
||
temperature: float = 0.3,
|
||
max_tokens: int = 500
|
||
) -> str:
|
||
"""带重试机制的 API 调用"""
|
||
|
||
headers = {
|
||
"Authorization": f"Bearer {self.api_key}",
|
||
"Content-Type": "application/json"
|
||
}
|
||
|
||
data = {
|
||
"model": self.model,
|
||
"messages": messages,
|
||
"temperature": temperature,
|
||
"max_tokens": max_tokens,
|
||
"stream": False
|
||
}
|
||
|
||
last_error = None
|
||
|
||
for attempt in range(Config.MAX_RETRY_ATTEMPTS):
|
||
try:
|
||
async with httpx.AsyncClient(timeout=Config.REQUEST_TIMEOUT) as client:
|
||
response = await client.post(
|
||
f"{self.base_url}/chat/completions",
|
||
headers=headers,
|
||
json=data
|
||
)
|
||
|
||
if response.status_code == 200:
|
||
result = response.json()
|
||
return result["choices"][0]["message"]["content"]
|
||
elif response.status_code == 401:
|
||
raise APIError("API 密钥无效", response.status_code)
|
||
elif response.status_code == 429:
|
||
# 限流,等待后重试
|
||
wait_time = 2 ** attempt # 指数退避
|
||
await asyncio.sleep(wait_time)
|
||
continue
|
||
else:
|
||
raise APIError(f"API 调用失败: {response.status_code}", response.status_code)
|
||
|
||
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
||
last_error = e
|
||
if attempt < Config.MAX_RETRY_ATTEMPTS - 1:
|
||
await asyncio.sleep(1) # 等待1秒后重试
|
||
continue
|
||
else:
|
||
raise APIError(f"网络连接失败: {str(e)}")
|
||
except Exception as e:
|
||
last_error = e
|
||
if attempt < Config.MAX_RETRY_ATTEMPTS - 1:
|
||
await asyncio.sleep(1)
|
||
continue
|
||
else:
|
||
raise APIError(f"API 调用异常: {str(e)}")
|
||
|
||
raise last_error or APIError("未知错误")
|
||
|
||
|
||
class SentimentAnalysisResult(BaseModel):
|
||
"""情感分析结果"""
|
||
sentiment: str = Field(description="情感类别: negative/neutral/positive")
|
||
confidence: float = Field(description="置信度 (0-1)")
|
||
reasoning: str = Field(description="情感判断的推理过程")
|
||
key_factors: list[str] = Field(description="影响情感判断的关键因素")
|
||
intensity: str = Field(description="情感强度: mild/moderate/strong")
|
||
|
||
|
||
class DisposalPlan(BaseModel):
|
||
"""处置方案"""
|
||
priority: str = Field(description="处理优先级: high/medium/low")
|
||
action_type: str = Field(description="行动类型: response/investigate/monitor/ignore")
|
||
suggested_response: Optional[str] = Field(description="建议回复内容", default=None)
|
||
follow_up_actions: list[str] = Field(description="后续行动建议")
|
||
reasoning: str = Field(description="处置方案制定的理由")
|
||
urgency_level: str = Field(description="紧急程度: immediate/soon/normal")
|
||
|
||
|
||
class TweetAnalysisResult(BaseModel):
|
||
"""推文分析完整结果"""
|
||
tweet_text: str = Field(description="原始推文文本")
|
||
airline: str = Field(description="航空公司")
|
||
sentiment_analysis: SentimentAnalysisResult = Field(description="情感分析结果")
|
||
disposal_plan: DisposalPlan = Field(description="处置方案")
|
||
processing_time: float = Field(description="处理耗时(秒)")
|
||
api_used: bool = Field(description="是否使用了 API")
|
||
|
||
|
||
class ResponseParser:
|
||
"""API 响应解析器"""
|
||
|
||
@staticmethod
|
||
def parse_sentiment_response(response: str) -> Dict[str, Any]:
|
||
"""解析情感分析响应"""
|
||
|
||
# 使用正则表达式进行更精确的解析
|
||
patterns = {
|
||
"sentiment": r"情感类别[::]\s*(negative|neutral|positive)",
|
||
"confidence": r"置信度[::]\s*([0-9]*\.?[0-9]+)",
|
||
"intensity": r"情感强度[::]\s*(mild|moderate|strong)",
|
||
}
|
||
|
||
result = {}
|
||
|
||
for key, pattern in patterns.items():
|
||
match = re.search(pattern, response, re.IGNORECASE)
|
||
if match:
|
||
result[key] = match.group(1).lower() if key != "confidence" else float(match.group(1))
|
||
|
||
# 解析关键因素
|
||
factors_match = re.search(r"关键因素[::]([^\n]*)(?:\n|$)", response)
|
||
if factors_match:
|
||
factors_text = factors_match.group(1).strip()
|
||
result["key_factors"] = [f.strip() for f in factors_text.split(",") if f.strip()]
|
||
else:
|
||
result["key_factors"] = []
|
||
|
||
# 提取推理过程
|
||
reasoning_match = re.search(r"推理过程[::]([^\n]*)(?:\n|$)", response)
|
||
if reasoning_match:
|
||
result["reasoning"] = reasoning_match.group(1).strip()
|
||
else:
|
||
# 如果找不到,使用默认推理
|
||
result["reasoning"] = "基于推文内容和航空行业特点进行综合分析"
|
||
|
||
return result
|
||
|
||
@staticmethod
|
||
def parse_disposal_response(response: str) -> Dict[str, Any]:
|
||
"""解析处置方案响应"""
|
||
|
||
patterns = {
|
||
"priority": r"优先级[::]\s*(high|medium|low)",
|
||
"action_type": r"行动类型[::]\s*(response|investigate|monitor|ignore)",
|
||
"urgency_level": r"紧急程度[::]\s*(immediate|soon|normal)",
|
||
}
|
||
|
||
result = {}
|
||
|
||
for key, pattern in patterns.items():
|
||
match = re.search(pattern, response, re.IGNORECASE)
|
||
if match:
|
||
result[key] = match.group(1).lower()
|
||
|
||
# 解析建议回复
|
||
response_match = re.search(r"建议回复[::]([^\n]*)(?:\n|$)", response)
|
||
if response_match:
|
||
result["suggested_response"] = response_match.group(1).strip()
|
||
|
||
# 解析后续行动
|
||
actions_match = re.search(r"后续行动[::]([^\n]*)(?:\n|$)", response)
|
||
if actions_match:
|
||
actions_text = actions_match.group(1).strip()
|
||
result["follow_up_actions"] = [a.strip() for a in actions_text.split(",") if a.strip()]
|
||
else:
|
||
result["follow_up_actions"] = []
|
||
|
||
# 解析制定理由
|
||
reasoning_match = re.search(r"制定理由[::]([^\n]*)(?:\n|$)", response)
|
||
if reasoning_match:
|
||
result["reasoning"] = reasoning_match.group(1).strip()
|
||
else:
|
||
result["reasoning"] = "基于情感分析结果制定"
|
||
|
||
return result
|
||
|
||
|
||
class OptimizedDeepSeekTweetAgent:
|
||
"""优化版 DeepSeek 推文分析 Agent"""
|
||
|
||
def __init__(self):
|
||
self.client = DeepSeekClient()
|
||
self.parser = ResponseParser()
|
||
|
||
async def analyze_sentiment(self, text: str, airline: str) -> SentimentAnalysisResult:
|
||
"""优化版情感分析"""
|
||
|
||
prompt = f"""
|
||
你是一位专业的航空行业情感分析专家。请分析以下推文的情感倾向:
|
||
|
||
推文内容:"{text}"
|
||
航空公司:{airline}
|
||
|
||
请严格按照以下JSON格式输出分析结果:
|
||
{{
|
||
"sentiment": "negative/neutral/positive",
|
||
"confidence": 0.0-1.0之间的数值,
|
||
"intensity": "mild/moderate/strong",
|
||
"key_factors": ["因素1", "因素2", "因素3"],
|
||
"reasoning": "详细的情感判断推理过程"
|
||
}}
|
||
|
||
分析要求:
|
||
1. 情感判断要准确反映推文的真实情感
|
||
2. 置信度要基于推文的明确程度和情感强度
|
||
3. 关键因素要具体、相关
|
||
4. 推理过程要详细、有逻辑
|
||
|
||
请只输出JSON格式的结果,不要有其他内容。
|
||
"""
|
||
|
||
messages = [
|
||
{
|
||
"role": "system",
|
||
"content": "你是一位专业的航空行业情感分析专家,擅长准确识别推文中的情感倾向。"
|
||
},
|
||
{"role": "user", "content": prompt}
|
||
]
|
||
|
||
try:
|
||
response = await self.client.chat_completion_with_retry(messages, temperature=0.1)
|
||
|
||
# 清理响应文本,移除可能的标记和空白
|
||
cleaned_response = response.strip()
|
||
|
||
# 尝试解析JSON响应
|
||
try:
|
||
# 尝试提取JSON部分(如果响应包含其他文本)
|
||
json_match = re.search(r'\{[^}]+\}', cleaned_response)
|
||
if json_match:
|
||
json_text = json_match.group(0)
|
||
result_data = json.loads(json_text)
|
||
else:
|
||
result_data = json.loads(cleaned_response)
|
||
|
||
# 验证必需字段
|
||
required_fields = ["sentiment", "confidence", "intensity"]
|
||
for field in required_fields:
|
||
if field not in result_data:
|
||
raise ValueError(f"缺少必需字段: {field}")
|
||
|
||
return SentimentAnalysisResult(**result_data)
|
||
|
||
except (json.JSONDecodeError, ValueError) as json_error:
|
||
# JSON解析失败,使用正则解析
|
||
print(f"JSON解析失败,使用正则解析: {json_error}")
|
||
parsed_data = self.parser.parse_sentiment_response(response)
|
||
|
||
# 确保必需字段有默认值
|
||
default_values = {
|
||
"sentiment": "neutral",
|
||
"confidence": 0.5,
|
||
"intensity": "moderate"
|
||
}
|
||
|
||
for field, default_value in default_values.items():
|
||
if field not in parsed_data or not parsed_data[field]:
|
||
parsed_data[field] = default_value
|
||
|
||
return SentimentAnalysisResult(**parsed_data)
|
||
|
||
except APIError as e:
|
||
# API调用失败,返回默认结果
|
||
print(f"API调用失败: {e.message}")
|
||
return SentimentAnalysisResult(
|
||
sentiment="neutral",
|
||
confidence=0.5,
|
||
intensity="moderate",
|
||
key_factors=["API调用失败,使用默认分析"],
|
||
reasoning=f"API调用失败: {e.message}"
|
||
)
|
||
|
||
async def generate_disposal_plan(
|
||
self,
|
||
text: str,
|
||
airline: str,
|
||
sentiment_result: SentimentAnalysisResult
|
||
) -> DisposalPlan:
|
||
"""生成优化版处置方案"""
|
||
|
||
prompt = f"""
|
||
基于以下推文分析和情感判断结果,为航空公司制定一个合理的处置方案:
|
||
|
||
推文内容:"{text}"
|
||
航空公司:{airline}
|
||
情感分析结果:
|
||
- 情感类别:{sentiment_result.sentiment}
|
||
- 置信度:{sentiment_result.confidence:.1%}
|
||
- 情感强度:{sentiment_result.intensity}
|
||
- 关键因素:{', '.join(sentiment_result.key_factors)}
|
||
|
||
请严格按照以下JSON格式输出处置方案:
|
||
{{
|
||
"priority": "high/medium/low",
|
||
"action_type": "response/investigate/monitor/ignore",
|
||
"suggested_response": "具体的回复建议(如适用)",
|
||
"follow_up_actions": ["行动1", "行动2"],
|
||
"reasoning": "制定此方案的理由",
|
||
"urgency_level": "immediate/soon/normal"
|
||
}}
|
||
|
||
要求:
|
||
1. 优先级要基于情感强度和置信度
|
||
2. 行动类型要符合航空行业最佳实践
|
||
3. 建议回复要专业、有同理心
|
||
4. 后续行动要具体、可执行
|
||
|
||
请只输出JSON格式的结果。
|
||
"""
|
||
|
||
messages = [
|
||
{
|
||
"role": "system",
|
||
"content": "你是一位航空公司的客户服务专家,擅长制定合理的客户反馈处置方案。"
|
||
},
|
||
{"role": "user", "content": prompt}
|
||
]
|
||
|
||
try:
|
||
response = await self.client.chat_completion_with_retry(messages, temperature=0.3)
|
||
|
||
# 清理响应文本
|
||
cleaned_response = response.strip()
|
||
|
||
try:
|
||
# 尝试提取JSON部分
|
||
json_match = re.search(r'\{[^}]+\}', cleaned_response)
|
||
if json_match:
|
||
json_text = json_match.group(0)
|
||
result_data = json.loads(json_text)
|
||
else:
|
||
result_data = json.loads(cleaned_response)
|
||
|
||
# 验证必需字段
|
||
required_fields = ["priority", "action_type", "reasoning"]
|
||
for field in required_fields:
|
||
if field not in result_data:
|
||
raise ValueError(f"缺少必需字段: {field}")
|
||
|
||
return DisposalPlan(**result_data)
|
||
|
||
except (json.JSONDecodeError, ValueError) as json_error:
|
||
# JSON解析失败,使用正则解析
|
||
print(f"处置方案JSON解析失败,使用正则解析: {json_error}")
|
||
parsed_data = self.parser.parse_disposal_response(response)
|
||
|
||
# 确保必需字段有默认值
|
||
default_values = {
|
||
"priority": "medium",
|
||
"action_type": "monitor",
|
||
"reasoning": "基于情感分析结果制定",
|
||
"follow_up_actions": [],
|
||
"urgency_level": "normal"
|
||
}
|
||
|
||
for field, default_value in default_values.items():
|
||
if field not in parsed_data or not parsed_data[field]:
|
||
parsed_data[field] = default_value
|
||
|
||
return DisposalPlan(**parsed_data)
|
||
|
||
except APIError as e:
|
||
# API调用失败,返回默认处置方案
|
||
print(f"处置方案API调用失败: {e.message}")
|
||
return self._generate_default_disposal_plan(sentiment_result)
|
||
|
||
def _generate_default_disposal_plan(self, sentiment_result: SentimentAnalysisResult) -> DisposalPlan:
|
||
"""生成默认处置方案"""
|
||
|
||
if sentiment_result.sentiment == "negative":
|
||
return DisposalPlan(
|
||
priority="medium",
|
||
action_type="investigate",
|
||
suggested_response=None,
|
||
follow_up_actions=["进一步核实情况", "根据核实结果决定行动"],
|
||
reasoning="负面情感需要进一步调查",
|
||
urgency_level="soon"
|
||
)
|
||
elif sentiment_result.sentiment == "positive":
|
||
return DisposalPlan(
|
||
priority="low",
|
||
action_type="monitor",
|
||
suggested_response=None,
|
||
follow_up_actions=["持续关注用户动态"],
|
||
reasoning="正面情感保持关注即可",
|
||
urgency_level="normal"
|
||
)
|
||
else:
|
||
return DisposalPlan(
|
||
priority="low",
|
||
action_type="monitor",
|
||
suggested_response=None,
|
||
follow_up_actions=["常规关注"],
|
||
reasoning="中性情感常规处理",
|
||
urgency_level="normal"
|
||
)
|
||
|
||
async def analyze_tweet(self, text: str, airline: str) -> TweetAnalysisResult:
|
||
"""完整的推文分析流程"""
|
||
import time
|
||
start_time = time.time()
|
||
|
||
# 1. 情感分析
|
||
sentiment_result = await self.analyze_sentiment(text, airline)
|
||
|
||
# 2. 生成处置方案
|
||
disposal_plan = await self.generate_disposal_plan(text, airline, sentiment_result)
|
||
|
||
# 3. 计算处理时间
|
||
processing_time = time.time() - start_time
|
||
|
||
# 返回完整结果
|
||
return TweetAnalysisResult(
|
||
tweet_text=text,
|
||
airline=airline,
|
||
sentiment_analysis=sentiment_result,
|
||
disposal_plan=disposal_plan,
|
||
processing_time=processing_time,
|
||
api_used=True
|
||
)
|
||
|
||
|
||
# 同步版本的包装函数
|
||
async def analyze_tweet_async(text: str, airline: str) -> TweetAnalysisResult:
|
||
"""异步版本的推文分析"""
|
||
agent = OptimizedDeepSeekTweetAgent()
|
||
return await agent.analyze_tweet(text, airline)
|
||
|
||
|
||
def analyze_tweet_sync(text: str, airline: str) -> TweetAnalysisResult:
|
||
"""同步版本的推文分析函数"""
|
||
return asyncio.run(analyze_tweet_async(text, airline))
|
||
|
||
|
||
# 终极版本 - 完全不需要航空公司参数
|
||
async def analyze_tweet_ultimate_async(text: str) -> TweetAnalysisResult:
|
||
"""终极版本异步推文分析 - 无需航空公司参数"""
|
||
agent = OptimizedDeepSeekTweetAgent()
|
||
|
||
# 自动检测航空公司或使用通用标识
|
||
airline = "通用航空公司"
|
||
|
||
# 简单的航空公司检测逻辑
|
||
airline_keywords = {
|
||
"united": "United Airlines",
|
||
"delta": "Delta Air Lines",
|
||
"american": "American Airlines",
|
||
"southwest": "Southwest Airlines",
|
||
"jetblue": "JetBlue Airways",
|
||
"air china": "中国国际航空",
|
||
"china eastern": "中国东方航空",
|
||
"china southern": "中国南方航空"
|
||
}
|
||
|
||
text_lower = text.lower()
|
||
for keyword, airline_name in airline_keywords.items():
|
||
if keyword in text_lower:
|
||
airline = airline_name
|
||
break
|
||
|
||
return await agent.analyze_tweet(text, airline)
|
||
|
||
|
||
def analyze_tweet_sync_ultimate(text: str) -> TweetAnalysisResult:
|
||
"""终极版本同步推文分析 - 完全无需航空公司参数"""
|
||
return asyncio.run(analyze_tweet_ultimate_async(text)) |