- 实现数据预处理模块(data.py)和模型训练模块(train.py) - 添加智能客服Agent应用(agent_app.py)和DNA解码系统(dna_decoder.py) - 包含补偿推荐系统(compensation_recommender.py)和可视化支持 - 添加项目配置文件(pyproject.toml)和README文档 - 提供多种启动脚本(start_app.*, fix_path_and_run.bat等)
372 lines
17 KiB
Python
372 lines
17 KiB
Python
"""竞争情报与差异化定位系统"""
|
||
|
||
import pandas as pd
|
||
import numpy as np
|
||
from typing import List, Dict, Tuple, Optional
|
||
from pydantic import BaseModel, Field
|
||
import plotly.express as px
|
||
import plotly.graph_objects as go
|
||
from plotly.subplots import make_subplots
|
||
from sklearn.preprocessing import StandardScaler
|
||
from scipy import stats
|
||
|
||
|
||
class AirlineComparison(BaseModel):
|
||
"""航空公司对比分析"""
|
||
航空公司: str = Field(description="航空公司名称")
|
||
问题类型: str = Field(description="问题类型")
|
||
投诉数量: int = Field(description="投诉数量")
|
||
平均情感强度: float = Field(description="平均情感强度")
|
||
满意度得分: float = Field(description="满意度得分(0-100)")
|
||
相对表现: str = Field(description="相对表现(优于/劣于/持平)")
|
||
|
||
|
||
class CompetitiveAdvantage(BaseModel):
|
||
"""竞争优势分析"""
|
||
优势领域: str = Field(description="优势领域")
|
||
相对优势度: float = Field(description="相对优势度")
|
||
关键指标: List[str] = Field(description="关键指标")
|
||
改进建议: str = Field(description="改进建议")
|
||
|
||
|
||
class OpportunitySpace(BaseModel):
|
||
"""机会空间发现"""
|
||
机会领域: str = Field(description="机会领域")
|
||
未满足需求: str = Field(description="未满足需求")
|
||
潜在市场规模: float = Field(description="潜在市场规模(0-1)")
|
||
竞争对手弱点: List[str] = Field(description="竞争对手弱点")
|
||
差异化建议: str = Field(description="差异化建议")
|
||
|
||
|
||
class CompetitiveIntelligence:
|
||
"""竞争情报与差异化定位系统"""
|
||
|
||
def __init__(self, data_path: str = "data/Tweets.csv"):
|
||
self.data_path = data_path
|
||
self.airlines = ["united", "american", "delta", "southwest", "us airways"]
|
||
self.problem_types = [
|
||
"Bad Flight", "Can't Tell", "Late Flight", "Customer Service Issue",
|
||
"Flight Booking Problems", "Lost Luggage", "Flight Attendant Complaints",
|
||
"Cancelled Flight", "Damaged Luggage", "longlines"
|
||
]
|
||
|
||
def load_data(self) -> pd.DataFrame:
|
||
"""加载数据"""
|
||
df = pd.read_csv(self.data_path)
|
||
return df
|
||
|
||
def analyze_airline_comparison(self, target_airline: str, competitor_airlines: List[str]) -> List[AirlineComparison]:
|
||
"""分析航空公司对比"""
|
||
df = self.load_data()
|
||
|
||
comparisons = []
|
||
|
||
for problem in self.problem_types:
|
||
# 分析目标航空公司在当前问题上的表现
|
||
target_data = df[(df['airline'] == target_airline) & (df['negativereason'] == problem)]
|
||
|
||
if len(target_data) > 0:
|
||
target_complaints = len(target_data)
|
||
target_sentiment = target_data['airline_sentiment_confidence'].mean()
|
||
target_score = self._calculate_satisfaction_score(target_sentiment, target_complaints)
|
||
|
||
# 与每个竞争对手对比
|
||
for competitor in competitor_airlines:
|
||
if competitor != target_airline:
|
||
competitor_data = df[(df['airline'] == competitor) & (df['negativereason'] == problem)]
|
||
|
||
if len(competitor_data) > 0:
|
||
comp_complaints = len(competitor_data)
|
||
comp_sentiment = competitor_data['airline_sentiment_confidence'].mean()
|
||
comp_score = self._calculate_satisfaction_score(comp_sentiment, comp_complaints)
|
||
|
||
# 确定相对表现
|
||
relative_performance = self._determine_relative_performance(target_score, comp_score)
|
||
|
||
comparisons.append(AirlineComparison(
|
||
航空公司=f"{target_airline} vs {competitor}",
|
||
问题类型=problem,
|
||
投诉数量=target_complaints,
|
||
平均情感强度=target_sentiment,
|
||
满意度得分=target_score,
|
||
相对表现=relative_performance
|
||
))
|
||
|
||
return comparisons
|
||
|
||
def identify_competitive_advantages(self, target_airline: str, competitor_airlines: List[str]) -> List[CompetitiveAdvantage]:
|
||
"""识别竞争优势"""
|
||
df = self.load_data()
|
||
advantages = []
|
||
|
||
for problem in self.problem_types:
|
||
# 计算目标航空公司在当前问题上的表现
|
||
target_data = df[(df['airline'] == target_airline) & (df['negativereason'] == problem)]
|
||
|
||
if len(target_data) > 0:
|
||
target_score = self._calculate_satisfaction_score(
|
||
target_data['airline_sentiment_confidence'].mean(),
|
||
len(target_data)
|
||
)
|
||
|
||
# 计算竞争对手的平均表现
|
||
competitor_scores = []
|
||
for competitor in competitor_airlines:
|
||
if competitor != target_airline:
|
||
comp_data = df[(df['airline'] == competitor) & (df['negativereason'] == problem)]
|
||
if len(comp_data) > 0:
|
||
comp_score = self._calculate_satisfaction_score(
|
||
comp_data['airline_sentiment_confidence'].mean(),
|
||
len(comp_data)
|
||
)
|
||
competitor_scores.append(comp_score)
|
||
|
||
if competitor_scores:
|
||
avg_competitor_score = np.mean(competitor_scores)
|
||
advantage_degree = target_score - avg_competitor_score
|
||
|
||
# 如果优势明显,记录为竞争优势
|
||
if advantage_degree > 5: # 优势阈值
|
||
advantages.append(CompetitiveAdvantage(
|
||
优势领域=problem,
|
||
相对优势度=advantage_degree,
|
||
关键指标=[f"满意度得分: {target_score:.1f}", f"行业平均: {avg_competitor_score:.1f}"],
|
||
改进建议=self._generate_improvement_suggestion(problem, advantage_degree)
|
||
))
|
||
|
||
return advantages
|
||
|
||
def discover_opportunity_spaces(self, target_airline: str, competitor_airlines: List[str]) -> List[OpportunitySpace]:
|
||
"""发现机会空间"""
|
||
df = self.load_data()
|
||
opportunities = []
|
||
|
||
# 分析竞争对手的弱点
|
||
for competitor in competitor_airlines:
|
||
if competitor != target_airline:
|
||
# 找出竞争对手表现最差的问题领域
|
||
competitor_problems = []
|
||
for problem in self.problem_types:
|
||
comp_data = df[(df['airline'] == competitor) & (df['negativereason'] == problem)]
|
||
if len(comp_data) > 0:
|
||
score = self._calculate_satisfaction_score(
|
||
comp_data['airline_sentiment_confidence'].mean(),
|
||
len(comp_data)
|
||
)
|
||
competitor_problems.append((problem, score))
|
||
|
||
# 找出竞争对手最弱的问题领域(得分最低)
|
||
if competitor_problems:
|
||
worst_problem = min(competitor_problems, key=lambda x: x[1])
|
||
|
||
# 检查目标航空公司在相同问题上的表现
|
||
target_data = df[(df['airline'] == target_airline) & (df['negativereason'] == worst_problem[0])]
|
||
|
||
if len(target_data) > 0:
|
||
target_score = self._calculate_satisfaction_score(
|
||
target_data['airline_sentiment_confidence'].mean(),
|
||
len(target_data)
|
||
)
|
||
|
||
# 如果目标航空公司表现更好,则存在机会空间
|
||
if target_score > worst_problem[1]:
|
||
market_size = self._estimate_market_size(worst_problem[0], df)
|
||
|
||
opportunities.append(OpportunitySpace(
|
||
机会领域=worst_problem[0],
|
||
未满足需求=f"{competitor}在{worst_problem[0]}问题上表现不佳",
|
||
潜在市场规模=market_size,
|
||
竞争对手弱点=[f"{competitor}满意度得分: {worst_problem[1]:.1f}"],
|
||
差异化建议=self._generate_differentiation_suggestion(worst_problem[0], competitor)
|
||
))
|
||
|
||
return opportunities
|
||
|
||
def monitor_competitor_improvements(self, competitor_airlines: List[str]) -> Dict[str, List[Dict]]:
|
||
"""监控竞争对手改进"""
|
||
df = self.load_data()
|
||
improvements = {}
|
||
|
||
for competitor in competitor_airlines:
|
||
competitor_improvements = []
|
||
|
||
# 分析竞争对手在不同问题上的表现趋势
|
||
for problem in self.problem_types:
|
||
problem_data = df[(df['airline'] == competitor) & (df['negativereason'] == problem)]
|
||
|
||
if len(problem_data) > 10: # 确保有足够的数据
|
||
# 简单的时间趋势分析(按推文ID排序,假设ID反映时间顺序)
|
||
problem_data_sorted = problem_data.sort_values('tweet_id')
|
||
|
||
# 将数据分为前后两半
|
||
split_point = len(problem_data_sorted) // 2
|
||
early_period = problem_data_sorted.iloc[:split_point]
|
||
late_period = problem_data_sorted.iloc[split_point:]
|
||
|
||
if len(early_period) > 0 and len(late_period) > 0:
|
||
early_score = self._calculate_satisfaction_score(
|
||
early_period['airline_sentiment_confidence'].mean(),
|
||
len(early_period)
|
||
)
|
||
late_score = self._calculate_satisfaction_score(
|
||
late_period['airline_sentiment_confidence'].mean(),
|
||
len(late_period)
|
||
)
|
||
|
||
improvement = late_score - early_score
|
||
|
||
if improvement > 2: # 显著改进
|
||
competitor_improvements.append({
|
||
'问题类型': problem,
|
||
'改进幅度': improvement,
|
||
'前期表现': early_score,
|
||
'后期表现': late_score,
|
||
'改进措施': self._infer_improvement_measures(problem, improvement)
|
||
})
|
||
|
||
improvements[competitor] = competitor_improvements
|
||
|
||
return improvements
|
||
|
||
def _calculate_satisfaction_score(self, sentiment_confidence: float, complaint_count: int) -> float:
|
||
"""计算满意度得分"""
|
||
if pd.isna(sentiment_confidence):
|
||
sentiment_confidence = 0.5
|
||
|
||
# 基于情感置信度和投诉数量计算综合得分
|
||
base_score = sentiment_confidence * 100 # 转换为0-100分
|
||
|
||
# 考虑投诉数量的影响(投诉越多,得分越低)
|
||
complaint_penalty = min(complaint_count * 0.1, 20) # 最多扣20分
|
||
|
||
final_score = max(0, base_score - complaint_penalty)
|
||
return final_score
|
||
|
||
def _determine_relative_performance(self, target_score: float, competitor_score: float) -> str:
|
||
"""确定相对表现"""
|
||
difference = target_score - competitor_score
|
||
|
||
if difference > 5:
|
||
return "优于"
|
||
elif difference < -5:
|
||
return "劣于"
|
||
else:
|
||
return "持平"
|
||
|
||
def _generate_improvement_suggestion(self, problem: str, advantage_degree: float) -> str:
|
||
"""生成改进建议"""
|
||
suggestions = {
|
||
"Bad Flight": "继续保持航班质量监控,加强机组人员培训",
|
||
"Late Flight": "优化航班调度,提高准点率",
|
||
"Customer Service Issue": "加强客服培训,提升服务响应速度",
|
||
"Lost Luggage": "改进行李追踪系统,加强行李处理流程"
|
||
}
|
||
|
||
base_suggestion = suggestions.get(problem, "持续优化相关服务流程")
|
||
|
||
if advantage_degree > 10:
|
||
return f"{base_suggestion},考虑将这一优势作为品牌差异化点进行宣传"
|
||
else:
|
||
return f"{base_suggestion},保持现有优势"
|
||
|
||
def _estimate_market_size(self, problem: str, df: pd.DataFrame) -> float:
|
||
"""估计市场规模"""
|
||
# 基于问题在所有航空公司中的出现频率估计市场规模
|
||
total_complaints = len(df[df['negativereason'] == problem])
|
||
total_all_complaints = len(df[df['negativereason'].notna()])
|
||
|
||
if total_all_complaints > 0:
|
||
return total_complaints / total_all_complaints
|
||
else:
|
||
return 0.1 # 默认值
|
||
|
||
def _generate_differentiation_suggestion(self, problem: str, competitor: str) -> str:
|
||
"""生成差异化建议"""
|
||
suggestions = {
|
||
"Bad Flight": f"针对{competitor}在航班体验上的弱点,推出'舒适飞行保证'计划",
|
||
"Late Flight": f"利用{competitor}准点率问题,强调自身的准点承诺",
|
||
"Customer Service Issue": f"针对{competitor}的服务问题,推出'24小时客服响应'服务",
|
||
"Lost Luggage": f"针对{competitor}行李问题,提供'行李实时追踪'功能"
|
||
}
|
||
|
||
return suggestions.get(problem, f"针对{competitor}的弱点,推出差异化服务方案")
|
||
|
||
def _infer_improvement_measures(self, problem: str, improvement: float) -> str:
|
||
"""推断改进措施"""
|
||
measures = {
|
||
"Bad Flight": "可能改进了航班服务流程或机组培训",
|
||
"Late Flight": "可能优化了航班调度或地面服务",
|
||
"Customer Service Issue": "可能加强了客服培训或投诉处理流程",
|
||
"Lost Luggage": "可能升级了行李处理系统或追踪技术"
|
||
}
|
||
|
||
base_measure = measures.get(problem, "实施了相关服务改进措施")
|
||
|
||
if improvement > 5:
|
||
return f"显著{base_measure}"
|
||
else:
|
||
return f"轻微{base_measure}"
|
||
|
||
def generate_competitive_insights_report(self, target_airline: str, competitor_airlines: List[str]) -> Dict:
|
||
"""生成竞争洞察报告"""
|
||
comparisons = self.analyze_airline_comparison(target_airline, competitor_airlines)
|
||
advantages = self.identify_competitive_advantages(target_airline, competitor_airlines)
|
||
opportunities = self.discover_opportunity_spaces(target_airline, competitor_airlines)
|
||
improvements = self.monitor_competitor_improvements(competitor_airlines)
|
||
|
||
return {
|
||
'comparisons': comparisons,
|
||
'advantages': advantages,
|
||
'opportunities': opportunities,
|
||
'improvements': improvements
|
||
}
|
||
|
||
|
||
# 创建可视化函数
|
||
def create_competitive_analysis_charts(insights_report: Dict) -> Dict:
|
||
"""创建竞争分析图表"""
|
||
charts = {}
|
||
|
||
# 航空公司对比图表
|
||
if insights_report['comparisons']:
|
||
comparisons_df = pd.DataFrame([c.dict() for c in insights_report['comparisons']])
|
||
|
||
fig_comparison = px.bar(
|
||
comparisons_df,
|
||
x='问题类型',
|
||
y='满意度得分',
|
||
color='航空公司',
|
||
title='航空公司满意度对比',
|
||
barmode='group'
|
||
)
|
||
charts['comparison_chart'] = fig_comparison
|
||
|
||
# 竞争优势图表
|
||
if insights_report['advantages']:
|
||
advantages_df = pd.DataFrame([a.dict() for a in insights_report['advantages']])
|
||
|
||
fig_advantages = px.bar(
|
||
advantages_df,
|
||
x='优势领域',
|
||
y='相对优势度',
|
||
title='竞争优势分析',
|
||
color='相对优势度',
|
||
color_continuous_scale='Viridis'
|
||
)
|
||
charts['advantages_chart'] = fig_advantages
|
||
|
||
# 机会空间图表
|
||
if insights_report['opportunities']:
|
||
opportunities_df = pd.DataFrame([o.dict() for o in insights_report['opportunities']])
|
||
|
||
fig_opportunities = px.scatter(
|
||
opportunities_df,
|
||
x='机会领域',
|
||
y='潜在市场规模',
|
||
size='潜在市场规模',
|
||
title='机会空间发现',
|
||
hover_data=['未满足需求']
|
||
)
|
||
charts['opportunities_chart'] = fig_opportunities
|
||
|
||
return charts |