上传文件至 src
This commit is contained in:
parent
91d5baecab
commit
d971bfa799
345
src/tweet_agent.py
Normal file
345
src/tweet_agent.py
Normal file
@ -0,0 +1,345 @@
|
|||||||
|
"""推文情感分析 Agent 模块
|
||||||
|
|
||||||
|
实现「分类 → 解释 → 生成处置方案」流程,输出结构化结果。
|
||||||
|
"""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from src.tweet_data import load_cleaned_tweets
|
||||||
|
from src.train_tweet_ultimate import load_model as load_ultimate_model
|
||||||
|
|
||||||
|
|
||||||
|
class SentimentClassification(BaseModel):
|
||||||
|
"""情感分类结果"""
|
||||||
|
sentiment: str = Field(description="情感类别: negative/neutral/positive")
|
||||||
|
confidence: float = Field(description="置信度 (0-1)")
|
||||||
|
|
||||||
|
|
||||||
|
class SentimentExplanation(BaseModel):
|
||||||
|
"""情感解释"""
|
||||||
|
key_factors: list[str] = Field(description="影响情感判断的关键因素")
|
||||||
|
reasoning: str = Field(description="情感判断的推理过程")
|
||||||
|
|
||||||
|
|
||||||
|
class DisposalPlan(BaseModel):
|
||||||
|
"""处置方案"""
|
||||||
|
priority: str = Field(description="处理优先级: high/medium/low")
|
||||||
|
action_type: str = Field(description="行动类型: response/investigate/monitor/ignore")
|
||||||
|
suggested_response: Optional[str] = Field(description="建议回复内容(如适用)", default=None)
|
||||||
|
follow_up_actions: list[str] = Field(description="后续行动建议")
|
||||||
|
|
||||||
|
|
||||||
|
class TweetAnalysisResult(BaseModel):
|
||||||
|
"""推文分析结果(结构化输出)"""
|
||||||
|
tweet_text: str = Field(description="原始推文文本")
|
||||||
|
airline: str = Field(description="航空公司")
|
||||||
|
classification: SentimentClassification = Field(description="情感分类结果")
|
||||||
|
explanation: SentimentExplanation = Field(description="情感解释")
|
||||||
|
disposal_plan: DisposalPlan = Field(description="处置方案")
|
||||||
|
|
||||||
|
|
||||||
|
class TweetSentimentAgent:
|
||||||
|
"""推文情感分析 Agent
|
||||||
|
|
||||||
|
实现「分类 → 解释 → 生成处置方案」流程。
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, model_path: Optional[Path] = None):
|
||||||
|
"""初始化 Agent
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_path: 模型路径(可选)
|
||||||
|
"""
|
||||||
|
self.model = load_ultimate_model()
|
||||||
|
self.label_encoder = self.model.label_encoder
|
||||||
|
self.tfidf_vectorizer = self.model.tfidf_vectorizer
|
||||||
|
self.airline_encoder = self.model.airline_encoder
|
||||||
|
|
||||||
|
def classify(self, text: str, airline: str) -> SentimentClassification:
|
||||||
|
"""分类:对推文进行情感分类
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: 推文文本
|
||||||
|
airline: 航空公司
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
情感分类结果
|
||||||
|
"""
|
||||||
|
# 预测
|
||||||
|
sentiment = self.model.predict(np.array([text]), np.array([airline]))[0]
|
||||||
|
|
||||||
|
# 预测概率
|
||||||
|
proba = self.model.predict_proba(np.array([text]), np.array([airline]))[0]
|
||||||
|
|
||||||
|
# 获取预测类别的置信度
|
||||||
|
sentiment_idx = self.label_encoder.transform([sentiment])[0]
|
||||||
|
confidence = float(proba[sentiment_idx])
|
||||||
|
|
||||||
|
return SentimentClassification(
|
||||||
|
sentiment=sentiment,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
def explain(self, text: str, airline: str, classification: SentimentClassification) -> SentimentExplanation:
|
||||||
|
"""解释:生成情感判断的解释
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: 推文文本
|
||||||
|
airline: 航空公司
|
||||||
|
classification: 情感分类结果
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
情感解释
|
||||||
|
"""
|
||||||
|
key_factors = []
|
||||||
|
reasoning_parts = []
|
||||||
|
|
||||||
|
text_lower = text.lower()
|
||||||
|
|
||||||
|
# 分析情感关键词
|
||||||
|
negative_words = ["bad", "terrible", "awful", "worst", "hate", "angry", "disappointed", "frustrated", "cancelled", "delayed", "lost", "rude"]
|
||||||
|
positive_words = ["good", "great", "excellent", "best", "love", "happy", "satisfied", "amazing", "wonderful", "thank", "helpful"]
|
||||||
|
neutral_words = ["question", "how", "what", "when", "where", "why", "please", "help", "info", "information"]
|
||||||
|
|
||||||
|
found_negative = [word for word in negative_words if word in text_lower]
|
||||||
|
found_positive = [word for word in positive_words if word in text_lower]
|
||||||
|
found_neutral = [word for word in neutral_words if word in text_lower]
|
||||||
|
|
||||||
|
if found_negative:
|
||||||
|
key_factors.append(f"包含负面词汇: {', '.join(found_negative[:3])}")
|
||||||
|
reasoning_parts.append("文本中包含多个负面情感词汇,表达不满情绪")
|
||||||
|
|
||||||
|
if found_positive:
|
||||||
|
key_factors.append(f"包含正面词汇: {', '.join(found_positive[:3])}")
|
||||||
|
reasoning_parts.append("文本中包含正面情感词汇,表达满意或感谢")
|
||||||
|
|
||||||
|
if found_neutral:
|
||||||
|
key_factors.append(f"包含中性词汇: {', '.join(found_neutral[:3])}")
|
||||||
|
reasoning_parts.append("文本主要包含询问或请求,情绪相对中性")
|
||||||
|
|
||||||
|
# 分析文本特征
|
||||||
|
if "!" in text:
|
||||||
|
key_factors.append("包含感叹号")
|
||||||
|
reasoning_parts.append("感叹号的使用表明情绪较为强烈")
|
||||||
|
|
||||||
|
if "?" in text:
|
||||||
|
key_factors.append("包含问号")
|
||||||
|
reasoning_parts.append("问号的使用表明存在疑问或询问")
|
||||||
|
|
||||||
|
if "@" in text:
|
||||||
|
key_factors.append("包含@提及")
|
||||||
|
reasoning_parts.append("直接@航空公司表明希望获得关注或回复")
|
||||||
|
|
||||||
|
# 分析航空公司
|
||||||
|
key_factors.append(f"涉及航空公司: {airline}")
|
||||||
|
|
||||||
|
# 生成推理过程
|
||||||
|
if not reasoning_parts:
|
||||||
|
reasoning_parts.append("根据文本整体语义和情感特征进行判断")
|
||||||
|
|
||||||
|
reasoning = "。".join(reasoning_parts) + "。"
|
||||||
|
|
||||||
|
return SentimentExplanation(
|
||||||
|
key_factors=key_factors,
|
||||||
|
reasoning=reasoning,
|
||||||
|
)
|
||||||
|
|
||||||
|
def generate_disposal_plan(
|
||||||
|
self,
|
||||||
|
text: str,
|
||||||
|
airline: str,
|
||||||
|
classification: SentimentClassification,
|
||||||
|
explanation: SentimentExplanation,
|
||||||
|
) -> DisposalPlan:
|
||||||
|
"""生成处置方案
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: 推文文本
|
||||||
|
airline: 航空公司
|
||||||
|
classification: 情感分类结果
|
||||||
|
explanation: 情感解释
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
处置方案
|
||||||
|
"""
|
||||||
|
sentiment = classification.sentiment
|
||||||
|
confidence = classification.confidence
|
||||||
|
|
||||||
|
# 根据情感和置信度确定优先级和行动类型
|
||||||
|
if sentiment == "negative":
|
||||||
|
if confidence >= 0.8:
|
||||||
|
priority = "high"
|
||||||
|
action_type = "response"
|
||||||
|
suggested_response = self._generate_negative_response(text, airline)
|
||||||
|
follow_up_actions = [
|
||||||
|
"记录客户投诉详情",
|
||||||
|
"转交相关部门处理",
|
||||||
|
"跟进处理进度",
|
||||||
|
"在24小时内给予反馈",
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
priority = "medium"
|
||||||
|
action_type = "investigate"
|
||||||
|
suggested_response = None
|
||||||
|
follow_up_actions = [
|
||||||
|
"进一步核实情况",
|
||||||
|
"根据核实结果决定是否需要回复",
|
||||||
|
]
|
||||||
|
elif sentiment == "positive":
|
||||||
|
if confidence >= 0.8:
|
||||||
|
priority = "low"
|
||||||
|
action_type = "response"
|
||||||
|
suggested_response = self._generate_positive_response(text, airline)
|
||||||
|
follow_up_actions = [
|
||||||
|
"感谢客户反馈",
|
||||||
|
"分享正面评价至内部团队",
|
||||||
|
"考虑在官方渠道展示",
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
priority = "low"
|
||||||
|
action_type = "monitor"
|
||||||
|
suggested_response = None
|
||||||
|
follow_up_actions = [
|
||||||
|
"持续关注该用户后续动态",
|
||||||
|
]
|
||||||
|
else: # neutral
|
||||||
|
if "?" in text or "help" in text.lower():
|
||||||
|
priority = "medium"
|
||||||
|
action_type = "response"
|
||||||
|
suggested_response = self._generate_neutral_response(text, airline)
|
||||||
|
follow_up_actions = [
|
||||||
|
"提供准确信息",
|
||||||
|
"确保客户问题得到解答",
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
priority = "low"
|
||||||
|
action_type = "monitor"
|
||||||
|
suggested_response = None
|
||||||
|
follow_up_actions = [
|
||||||
|
"持续关注",
|
||||||
|
]
|
||||||
|
|
||||||
|
return DisposalPlan(
|
||||||
|
priority=priority,
|
||||||
|
action_type=action_type,
|
||||||
|
suggested_response=suggested_response,
|
||||||
|
follow_up_actions=follow_up_actions,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _generate_negative_response(self, text: str, airline: str) -> str:
|
||||||
|
"""生成负面情感回复"""
|
||||||
|
responses = [
|
||||||
|
f"感谢您的反馈。我们非常重视您提到的问题,将立即进行调查并尽快给您答复。",
|
||||||
|
f"对于您的不愉快体验,我们深表歉意。请私信我们详细情况,我们将全力为您解决。",
|
||||||
|
f"收到您的反馈,我们对此感到抱歉。相关部门已介入,将尽快处理并给您满意的答复。",
|
||||||
|
]
|
||||||
|
return responses[hash(text) % len(responses)]
|
||||||
|
|
||||||
|
def _generate_positive_response(self, text: str, airline: str) -> str:
|
||||||
|
"""生成正面情感回复"""
|
||||||
|
responses = [
|
||||||
|
f"感谢您的认可和支持!我们会继续努力为您提供更好的服务。",
|
||||||
|
f"很高兴听到您的正面反馈!您的满意是我们前进的动力。",
|
||||||
|
f"感谢您的分享!我们会将您的反馈传达给团队,激励我们做得更好。",
|
||||||
|
]
|
||||||
|
return responses[hash(text) % len(responses)]
|
||||||
|
|
||||||
|
def _generate_neutral_response(self, text: str, airline: str) -> str:
|
||||||
|
"""生成中性情感回复"""
|
||||||
|
responses = [
|
||||||
|
f"感谢您的询问。请问您需要了解哪方面的信息?我们将竭诚为您解答。",
|
||||||
|
f"收到您的问题。请提供更多细节,以便我们更好地为您提供帮助。",
|
||||||
|
]
|
||||||
|
return responses[hash(text) % len(responses)]
|
||||||
|
|
||||||
|
def analyze(self, text: str, airline: str) -> TweetAnalysisResult:
|
||||||
|
"""完整分析流程:分类 → 解释 → 生成处置方案
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: 推文文本
|
||||||
|
airline: 航空公司
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
完整分析结果
|
||||||
|
"""
|
||||||
|
# 1. 分类
|
||||||
|
classification = self.classify(text, airline)
|
||||||
|
|
||||||
|
# 2. 解释
|
||||||
|
explanation = self.explain(text, airline, classification)
|
||||||
|
|
||||||
|
# 3. 生成处置方案
|
||||||
|
disposal_plan = self.generate_disposal_plan(text, airline, classification, explanation)
|
||||||
|
|
||||||
|
# 返回结构化结果
|
||||||
|
return TweetAnalysisResult(
|
||||||
|
tweet_text=text,
|
||||||
|
airline=airline,
|
||||||
|
classification=classification,
|
||||||
|
explanation=explanation,
|
||||||
|
disposal_plan=disposal_plan,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_tweet(text: str, airline: str) -> TweetAnalysisResult:
|
||||||
|
"""分析单条推文
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: 推文文本
|
||||||
|
airline: 航空公司
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
分析结果
|
||||||
|
"""
|
||||||
|
agent = TweetSentimentAgent()
|
||||||
|
return agent.analyze(text, airline)
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_tweets_batch(texts: list[str], airlines: list[str]) -> list[TweetAnalysisResult]:
|
||||||
|
"""批量分析推文
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: 推文文本列表
|
||||||
|
airlines: 航空公司列表
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
分析结果列表
|
||||||
|
"""
|
||||||
|
agent = TweetSentimentAgent()
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for text, airline in zip(texts, airlines):
|
||||||
|
result = agent.analyze(text, airline)
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# 示例:分析单条推文
|
||||||
|
print(">>> 示例 1: 负面情感")
|
||||||
|
result = analyze_tweet(
|
||||||
|
text="@United This is the worst airline ever! My flight was delayed for 5 hours and no one helped!",
|
||||||
|
airline="United",
|
||||||
|
)
|
||||||
|
print(result.model_dump_json(indent=2))
|
||||||
|
|
||||||
|
print("\n>>> 示例 2: 正面情感")
|
||||||
|
result = analyze_tweet(
|
||||||
|
text="@Southwest Thank you for the amazing flight! The crew was so helpful and friendly.",
|
||||||
|
airline="Southwest",
|
||||||
|
)
|
||||||
|
print(result.model_dump_json(indent=2))
|
||||||
|
|
||||||
|
print("\n>>> 示例 3: 中性情感")
|
||||||
|
result = analyze_tweet(
|
||||||
|
text="@American What is the baggage policy for international flights?",
|
||||||
|
airline="American",
|
||||||
|
)
|
||||||
|
print(result.model_dump_json(indent=2))
|
||||||
Loading…
Reference in New Issue
Block a user