08_17-AirCARE/bigwork/src/competitive_intelligence.py
Your Name bd5d8d108c feat: 添加航空公司情感分析与智能客服系统初始代码
- 实现数据预处理模块(data.py)和模型训练模块(train.py)
- 添加智能客服Agent应用(agent_app.py)和DNA解码系统(dna_decoder.py)
- 包含补偿推荐系统(compensation_recommender.py)和可视化支持
- 添加项目配置文件(pyproject.toml)和README文档
- 提供多种启动脚本(start_app.*, fix_path_and_run.bat等)
2026-01-13 00:43:15 +08:00

372 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""竞争情报与差异化定位系统"""
import pandas as pd
import numpy as np
from typing import List, Dict, Tuple, Optional
from pydantic import BaseModel, Field
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.preprocessing import StandardScaler
from scipy import stats
class AirlineComparison(BaseModel):
"""航空公司对比分析"""
航空公司: str = Field(description="航空公司名称")
问题类型: str = Field(description="问题类型")
投诉数量: int = Field(description="投诉数量")
平均情感强度: float = Field(description="平均情感强度")
满意度得分: float = Field(description="满意度得分(0-100)")
相对表现: str = Field(description="相对表现(优于/劣于/持平)")
class CompetitiveAdvantage(BaseModel):
"""竞争优势分析"""
优势领域: str = Field(description="优势领域")
相对优势度: float = Field(description="相对优势度")
关键指标: List[str] = Field(description="关键指标")
改进建议: str = Field(description="改进建议")
class OpportunitySpace(BaseModel):
"""机会空间发现"""
机会领域: str = Field(description="机会领域")
未满足需求: str = Field(description="未满足需求")
潜在市场规模: float = Field(description="潜在市场规模(0-1)")
竞争对手弱点: List[str] = Field(description="竞争对手弱点")
差异化建议: str = Field(description="差异化建议")
class CompetitiveIntelligence:
"""竞争情报与差异化定位系统"""
def __init__(self, data_path: str = "data/Tweets.csv"):
self.data_path = data_path
self.airlines = ["united", "american", "delta", "southwest", "us airways"]
self.problem_types = [
"Bad Flight", "Can't Tell", "Late Flight", "Customer Service Issue",
"Flight Booking Problems", "Lost Luggage", "Flight Attendant Complaints",
"Cancelled Flight", "Damaged Luggage", "longlines"
]
def load_data(self) -> pd.DataFrame:
"""加载数据"""
df = pd.read_csv(self.data_path)
return df
def analyze_airline_comparison(self, target_airline: str, competitor_airlines: List[str]) -> List[AirlineComparison]:
"""分析航空公司对比"""
df = self.load_data()
comparisons = []
for problem in self.problem_types:
# 分析目标航空公司在当前问题上的表现
target_data = df[(df['airline'] == target_airline) & (df['negativereason'] == problem)]
if len(target_data) > 0:
target_complaints = len(target_data)
target_sentiment = target_data['airline_sentiment_confidence'].mean()
target_score = self._calculate_satisfaction_score(target_sentiment, target_complaints)
# 与每个竞争对手对比
for competitor in competitor_airlines:
if competitor != target_airline:
competitor_data = df[(df['airline'] == competitor) & (df['negativereason'] == problem)]
if len(competitor_data) > 0:
comp_complaints = len(competitor_data)
comp_sentiment = competitor_data['airline_sentiment_confidence'].mean()
comp_score = self._calculate_satisfaction_score(comp_sentiment, comp_complaints)
# 确定相对表现
relative_performance = self._determine_relative_performance(target_score, comp_score)
comparisons.append(AirlineComparison(
航空公司=f"{target_airline} vs {competitor}",
问题类型=problem,
投诉数量=target_complaints,
平均情感强度=target_sentiment,
满意度得分=target_score,
相对表现=relative_performance
))
return comparisons
def identify_competitive_advantages(self, target_airline: str, competitor_airlines: List[str]) -> List[CompetitiveAdvantage]:
"""识别竞争优势"""
df = self.load_data()
advantages = []
for problem in self.problem_types:
# 计算目标航空公司在当前问题上的表现
target_data = df[(df['airline'] == target_airline) & (df['negativereason'] == problem)]
if len(target_data) > 0:
target_score = self._calculate_satisfaction_score(
target_data['airline_sentiment_confidence'].mean(),
len(target_data)
)
# 计算竞争对手的平均表现
competitor_scores = []
for competitor in competitor_airlines:
if competitor != target_airline:
comp_data = df[(df['airline'] == competitor) & (df['negativereason'] == problem)]
if len(comp_data) > 0:
comp_score = self._calculate_satisfaction_score(
comp_data['airline_sentiment_confidence'].mean(),
len(comp_data)
)
competitor_scores.append(comp_score)
if competitor_scores:
avg_competitor_score = np.mean(competitor_scores)
advantage_degree = target_score - avg_competitor_score
# 如果优势明显,记录为竞争优势
if advantage_degree > 5: # 优势阈值
advantages.append(CompetitiveAdvantage(
优势领域=problem,
相对优势度=advantage_degree,
关键指标=[f"满意度得分: {target_score:.1f}", f"行业平均: {avg_competitor_score:.1f}"],
改进建议=self._generate_improvement_suggestion(problem, advantage_degree)
))
return advantages
def discover_opportunity_spaces(self, target_airline: str, competitor_airlines: List[str]) -> List[OpportunitySpace]:
"""发现机会空间"""
df = self.load_data()
opportunities = []
# 分析竞争对手的弱点
for competitor in competitor_airlines:
if competitor != target_airline:
# 找出竞争对手表现最差的问题领域
competitor_problems = []
for problem in self.problem_types:
comp_data = df[(df['airline'] == competitor) & (df['negativereason'] == problem)]
if len(comp_data) > 0:
score = self._calculate_satisfaction_score(
comp_data['airline_sentiment_confidence'].mean(),
len(comp_data)
)
competitor_problems.append((problem, score))
# 找出竞争对手最弱的问题领域(得分最低)
if competitor_problems:
worst_problem = min(competitor_problems, key=lambda x: x[1])
# 检查目标航空公司在相同问题上的表现
target_data = df[(df['airline'] == target_airline) & (df['negativereason'] == worst_problem[0])]
if len(target_data) > 0:
target_score = self._calculate_satisfaction_score(
target_data['airline_sentiment_confidence'].mean(),
len(target_data)
)
# 如果目标航空公司表现更好,则存在机会空间
if target_score > worst_problem[1]:
market_size = self._estimate_market_size(worst_problem[0], df)
opportunities.append(OpportunitySpace(
机会领域=worst_problem[0],
未满足需求=f"{competitor}{worst_problem[0]}问题上表现不佳",
潜在市场规模=market_size,
竞争对手弱点=[f"{competitor}满意度得分: {worst_problem[1]:.1f}"],
差异化建议=self._generate_differentiation_suggestion(worst_problem[0], competitor)
))
return opportunities
def monitor_competitor_improvements(self, competitor_airlines: List[str]) -> Dict[str, List[Dict]]:
"""监控竞争对手改进"""
df = self.load_data()
improvements = {}
for competitor in competitor_airlines:
competitor_improvements = []
# 分析竞争对手在不同问题上的表现趋势
for problem in self.problem_types:
problem_data = df[(df['airline'] == competitor) & (df['negativereason'] == problem)]
if len(problem_data) > 10: # 确保有足够的数据
# 简单的时间趋势分析按推文ID排序假设ID反映时间顺序
problem_data_sorted = problem_data.sort_values('tweet_id')
# 将数据分为前后两半
split_point = len(problem_data_sorted) // 2
early_period = problem_data_sorted.iloc[:split_point]
late_period = problem_data_sorted.iloc[split_point:]
if len(early_period) > 0 and len(late_period) > 0:
early_score = self._calculate_satisfaction_score(
early_period['airline_sentiment_confidence'].mean(),
len(early_period)
)
late_score = self._calculate_satisfaction_score(
late_period['airline_sentiment_confidence'].mean(),
len(late_period)
)
improvement = late_score - early_score
if improvement > 2: # 显著改进
competitor_improvements.append({
'问题类型': problem,
'改进幅度': improvement,
'前期表现': early_score,
'后期表现': late_score,
'改进措施': self._infer_improvement_measures(problem, improvement)
})
improvements[competitor] = competitor_improvements
return improvements
def _calculate_satisfaction_score(self, sentiment_confidence: float, complaint_count: int) -> float:
"""计算满意度得分"""
if pd.isna(sentiment_confidence):
sentiment_confidence = 0.5
# 基于情感置信度和投诉数量计算综合得分
base_score = sentiment_confidence * 100 # 转换为0-100分
# 考虑投诉数量的影响(投诉越多,得分越低)
complaint_penalty = min(complaint_count * 0.1, 20) # 最多扣20分
final_score = max(0, base_score - complaint_penalty)
return final_score
def _determine_relative_performance(self, target_score: float, competitor_score: float) -> str:
"""确定相对表现"""
difference = target_score - competitor_score
if difference > 5:
return "优于"
elif difference < -5:
return "劣于"
else:
return "持平"
def _generate_improvement_suggestion(self, problem: str, advantage_degree: float) -> str:
"""生成改进建议"""
suggestions = {
"Bad Flight": "继续保持航班质量监控,加强机组人员培训",
"Late Flight": "优化航班调度,提高准点率",
"Customer Service Issue": "加强客服培训,提升服务响应速度",
"Lost Luggage": "改进行李追踪系统,加强行李处理流程"
}
base_suggestion = suggestions.get(problem, "持续优化相关服务流程")
if advantage_degree > 10:
return f"{base_suggestion},考虑将这一优势作为品牌差异化点进行宣传"
else:
return f"{base_suggestion},保持现有优势"
def _estimate_market_size(self, problem: str, df: pd.DataFrame) -> float:
"""估计市场规模"""
# 基于问题在所有航空公司中的出现频率估计市场规模
total_complaints = len(df[df['negativereason'] == problem])
total_all_complaints = len(df[df['negativereason'].notna()])
if total_all_complaints > 0:
return total_complaints / total_all_complaints
else:
return 0.1 # 默认值
def _generate_differentiation_suggestion(self, problem: str, competitor: str) -> str:
"""生成差异化建议"""
suggestions = {
"Bad Flight": f"针对{competitor}在航班体验上的弱点,推出'舒适飞行保证'计划",
"Late Flight": f"利用{competitor}准点率问题,强调自身的准点承诺",
"Customer Service Issue": f"针对{competitor}的服务问题,推出'24小时客服响应'服务",
"Lost Luggage": f"针对{competitor}行李问题,提供'行李实时追踪'功能"
}
return suggestions.get(problem, f"针对{competitor}的弱点,推出差异化服务方案")
def _infer_improvement_measures(self, problem: str, improvement: float) -> str:
"""推断改进措施"""
measures = {
"Bad Flight": "可能改进了航班服务流程或机组培训",
"Late Flight": "可能优化了航班调度或地面服务",
"Customer Service Issue": "可能加强了客服培训或投诉处理流程",
"Lost Luggage": "可能升级了行李处理系统或追踪技术"
}
base_measure = measures.get(problem, "实施了相关服务改进措施")
if improvement > 5:
return f"显著{base_measure}"
else:
return f"轻微{base_measure}"
def generate_competitive_insights_report(self, target_airline: str, competitor_airlines: List[str]) -> Dict:
"""生成竞争洞察报告"""
comparisons = self.analyze_airline_comparison(target_airline, competitor_airlines)
advantages = self.identify_competitive_advantages(target_airline, competitor_airlines)
opportunities = self.discover_opportunity_spaces(target_airline, competitor_airlines)
improvements = self.monitor_competitor_improvements(competitor_airlines)
return {
'comparisons': comparisons,
'advantages': advantages,
'opportunities': opportunities,
'improvements': improvements
}
# 创建可视化函数
def create_competitive_analysis_charts(insights_report: Dict) -> Dict:
"""创建竞争分析图表"""
charts = {}
# 航空公司对比图表
if insights_report['comparisons']:
comparisons_df = pd.DataFrame([c.dict() for c in insights_report['comparisons']])
fig_comparison = px.bar(
comparisons_df,
x='问题类型',
y='满意度得分',
color='航空公司',
title='航空公司满意度对比',
barmode='group'
)
charts['comparison_chart'] = fig_comparison
# 竞争优势图表
if insights_report['advantages']:
advantages_df = pd.DataFrame([a.dict() for a in insights_report['advantages']])
fig_advantages = px.bar(
advantages_df,
x='优势领域',
y='相对优势度',
title='竞争优势分析',
color='相对优势度',
color_continuous_scale='Viridis'
)
charts['advantages_chart'] = fig_advantages
# 机会空间图表
if insights_report['opportunities']:
opportunities_df = pd.DataFrame([o.dict() for o in insights_report['opportunities']])
fig_opportunities = px.scatter(
opportunities_df,
x='机会领域',
y='潜在市场规模',
size='潜在市场规模',
title='机会空间发现',
hover_data=['未满足需求']
)
charts['opportunities_chart'] = fig_opportunities
return charts