diff --git a/src/tweet_agent.py b/src/tweet_agent.py deleted file mode 100644 index 14dff62..0000000 --- a/src/tweet_agent.py +++ /dev/null @@ -1,345 +0,0 @@ -"""推文情感分析 Agent 模块 - -实现「分类 → 解释 → 生成处置方案」流程,输出结构化结果。 -""" - -from pathlib import Path -from typing import Optional - -import numpy as np -import polars as pl - -from pydantic import BaseModel, Field - -from src.tweet_data import load_cleaned_tweets -from src.train_tweet_ultimate import load_model as load_ultimate_model - - -class SentimentClassification(BaseModel): - """情感分类结果""" - sentiment: str = Field(description="情感类别: negative/neutral/positive") - confidence: float = Field(description="置信度 (0-1)") - - -class SentimentExplanation(BaseModel): - """情感解释""" - key_factors: list[str] = Field(description="影响情感判断的关键因素") - reasoning: str = Field(description="情感判断的推理过程") - - -class DisposalPlan(BaseModel): - """处置方案""" - priority: str = Field(description="处理优先级: high/medium/low") - action_type: str = Field(description="行动类型: response/investigate/monitor/ignore") - suggested_response: Optional[str] = Field(description="建议回复内容(如适用)", default=None) - follow_up_actions: list[str] = Field(description="后续行动建议") - - -class TweetAnalysisResult(BaseModel): - """推文分析结果(结构化输出)""" - tweet_text: str = Field(description="原始推文文本") - airline: str = Field(description="航空公司") - classification: SentimentClassification = Field(description="情感分类结果") - explanation: SentimentExplanation = Field(description="情感解释") - disposal_plan: DisposalPlan = Field(description="处置方案") - - -class TweetSentimentAgent: - """推文情感分析 Agent - - 实现「分类 → 解释 → 生成处置方案」流程。 - """ - - def __init__(self, model_path: Optional[Path] = None): - """初始化 Agent - - Args: - model_path: 模型路径(可选) - """ - self.model = load_ultimate_model() - self.label_encoder = self.model.label_encoder - self.tfidf_vectorizer = self.model.tfidf_vectorizer - self.airline_encoder = self.model.airline_encoder - - def classify(self, text: str, airline: str) -> SentimentClassification: - """分类:对推文进行情感分类 - - Args: - text: 推文文本 - airline: 航空公司 - - Returns: - 情感分类结果 - """ - # 预测 - sentiment = self.model.predict(np.array([text]), np.array([airline]))[0] - - # 预测概率 - proba = self.model.predict_proba(np.array([text]), np.array([airline]))[0] - - # 获取预测类别的置信度 - sentiment_idx = self.label_encoder.transform([sentiment])[0] - confidence = float(proba[sentiment_idx]) - - return SentimentClassification( - sentiment=sentiment, - confidence=confidence, - ) - - def explain(self, text: str, airline: str, classification: SentimentClassification) -> SentimentExplanation: - """解释:生成情感判断的解释 - - Args: - text: 推文文本 - airline: 航空公司 - classification: 情感分类结果 - - Returns: - 情感解释 - """ - key_factors = [] - reasoning_parts = [] - - text_lower = text.lower() - - # 分析情感关键词 - negative_words = ["bad", "terrible", "awful", "worst", "hate", "angry", "disappointed", "frustrated", "cancelled", "delayed", "lost", "rude"] - positive_words = ["good", "great", "excellent", "best", "love", "happy", "satisfied", "amazing", "wonderful", "thank", "helpful"] - neutral_words = ["question", "how", "what", "when", "where", "why", "please", "help", "info", "information"] - - found_negative = [word for word in negative_words if word in text_lower] - found_positive = [word for word in positive_words if word in text_lower] - found_neutral = [word for word in neutral_words if word in text_lower] - - if found_negative: - key_factors.append(f"包含负面词汇: {', '.join(found_negative[:3])}") - reasoning_parts.append("文本中包含多个负面情感词汇,表达不满情绪") - - if found_positive: - key_factors.append(f"包含正面词汇: {', '.join(found_positive[:3])}") - reasoning_parts.append("文本中包含正面情感词汇,表达满意或感谢") - - if found_neutral: - key_factors.append(f"包含中性词汇: {', '.join(found_neutral[:3])}") - reasoning_parts.append("文本主要包含询问或请求,情绪相对中性") - - # 分析文本特征 - if "!" in text: - key_factors.append("包含感叹号") - reasoning_parts.append("感叹号的使用表明情绪较为强烈") - - if "?" in text: - key_factors.append("包含问号") - reasoning_parts.append("问号的使用表明存在疑问或询问") - - if "@" in text: - key_factors.append("包含@提及") - reasoning_parts.append("直接@航空公司表明希望获得关注或回复") - - # 分析航空公司 - key_factors.append(f"涉及航空公司: {airline}") - - # 生成推理过程 - if not reasoning_parts: - reasoning_parts.append("根据文本整体语义和情感特征进行判断") - - reasoning = "。".join(reasoning_parts) + "。" - - return SentimentExplanation( - key_factors=key_factors, - reasoning=reasoning, - ) - - def generate_disposal_plan( - self, - text: str, - airline: str, - classification: SentimentClassification, - explanation: SentimentExplanation, - ) -> DisposalPlan: - """生成处置方案 - - Args: - text: 推文文本 - airline: 航空公司 - classification: 情感分类结果 - explanation: 情感解释 - - Returns: - 处置方案 - """ - sentiment = classification.sentiment - confidence = classification.confidence - - # 根据情感和置信度确定优先级和行动类型 - if sentiment == "negative": - if confidence >= 0.8: - priority = "high" - action_type = "response" - suggested_response = self._generate_negative_response(text, airline) - follow_up_actions = [ - "记录客户投诉详情", - "转交相关部门处理", - "跟进处理进度", - "在24小时内给予反馈", - ] - else: - priority = "medium" - action_type = "investigate" - suggested_response = None - follow_up_actions = [ - "进一步核实情况", - "根据核实结果决定是否需要回复", - ] - elif sentiment == "positive": - if confidence >= 0.8: - priority = "low" - action_type = "response" - suggested_response = self._generate_positive_response(text, airline) - follow_up_actions = [ - "感谢客户反馈", - "分享正面评价至内部团队", - "考虑在官方渠道展示", - ] - else: - priority = "low" - action_type = "monitor" - suggested_response = None - follow_up_actions = [ - "持续关注该用户后续动态", - ] - else: # neutral - if "?" in text or "help" in text.lower(): - priority = "medium" - action_type = "response" - suggested_response = self._generate_neutral_response(text, airline) - follow_up_actions = [ - "提供准确信息", - "确保客户问题得到解答", - ] - else: - priority = "low" - action_type = "monitor" - suggested_response = None - follow_up_actions = [ - "持续关注", - ] - - return DisposalPlan( - priority=priority, - action_type=action_type, - suggested_response=suggested_response, - follow_up_actions=follow_up_actions, - ) - - def _generate_negative_response(self, text: str, airline: str) -> str: - """生成负面情感回复""" - responses = [ - f"感谢您的反馈。我们非常重视您提到的问题,将立即进行调查并尽快给您答复。", - f"对于您的不愉快体验,我们深表歉意。请私信我们详细情况,我们将全力为您解决。", - f"收到您的反馈,我们对此感到抱歉。相关部门已介入,将尽快处理并给您满意的答复。", - ] - return responses[hash(text) % len(responses)] - - def _generate_positive_response(self, text: str, airline: str) -> str: - """生成正面情感回复""" - responses = [ - f"感谢您的认可和支持!我们会继续努力为您提供更好的服务。", - f"很高兴听到您的正面反馈!您的满意是我们前进的动力。", - f"感谢您的分享!我们会将您的反馈传达给团队,激励我们做得更好。", - ] - return responses[hash(text) % len(responses)] - - def _generate_neutral_response(self, text: str, airline: str) -> str: - """生成中性情感回复""" - responses = [ - f"感谢您的询问。请问您需要了解哪方面的信息?我们将竭诚为您解答。", - f"收到您的问题。请提供更多细节,以便我们更好地为您提供帮助。", - ] - return responses[hash(text) % len(responses)] - - def analyze(self, text: str, airline: str) -> TweetAnalysisResult: - """完整分析流程:分类 → 解释 → 生成处置方案 - - Args: - text: 推文文本 - airline: 航空公司 - - Returns: - 完整分析结果 - """ - # 1. 分类 - classification = self.classify(text, airline) - - # 2. 解释 - explanation = self.explain(text, airline, classification) - - # 3. 生成处置方案 - disposal_plan = self.generate_disposal_plan(text, airline, classification, explanation) - - # 返回结构化结果 - return TweetAnalysisResult( - tweet_text=text, - airline=airline, - classification=classification, - explanation=explanation, - disposal_plan=disposal_plan, - ) - - -def analyze_tweet(text: str, airline: str) -> TweetAnalysisResult: - """分析单条推文 - - Args: - text: 推文文本 - airline: 航空公司 - - Returns: - 分析结果 - """ - agent = TweetSentimentAgent() - return agent.analyze(text, airline) - - -def analyze_tweets_batch(texts: list[str], airlines: list[str]) -> list[TweetAnalysisResult]: - """批量分析推文 - - Args: - texts: 推文文本列表 - airlines: 航空公司列表 - - Returns: - 分析结果列表 - """ - agent = TweetSentimentAgent() - results = [] - - for text, airline in zip(texts, airlines): - result = agent.analyze(text, airline) - results.append(result) - - return results - - -if __name__ == "__main__": - # 示例:分析单条推文 - print(">>> 示例 1: 负面情感") - result = analyze_tweet( - text="@United This is the worst airline ever! My flight was delayed for 5 hours and no one helped!", - airline="United", - ) - print(result.model_dump_json(indent=2)) - - print("\n>>> 示例 2: 正面情感") - result = analyze_tweet( - text="@Southwest Thank you for the amazing flight! The crew was so helpful and friendly.", - airline="Southwest", - ) - print(result.model_dump_json(indent=2)) - - print("\n>>> 示例 3: 中性情感") - result = analyze_tweet( - text="@American What is the baggage policy for international flights?", - airline="American", - ) - print(result.model_dump_json(indent=2))