2026-01-14 20:43:02 +08:00
|
|
|
import os
|
|
|
|
|
import joblib
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pandas as pd
|
2026-01-15 20:12:33 +08:00
|
|
|
import requests
|
2026-01-14 20:43:02 +08:00
|
|
|
from typing import Literal, Annotated
|
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
|
|
|
|
_model_lgb = None
|
|
|
|
|
_model_lr = None
|
|
|
|
|
_le = None
|
|
|
|
|
|
|
|
|
|
class CustomerFeatures(BaseModel):
|
|
|
|
|
gender: Literal["male", "female", "other"]
|
|
|
|
|
age_group: Literal["18-25", "26-35", "36-45", "46-60", "60+"]
|
|
|
|
|
region: Literal["north", "south", "east", "west", "central"]
|
|
|
|
|
product_category: str
|
|
|
|
|
purchase_channel: Literal["online", "offline"]
|
|
|
|
|
platform: str
|
|
|
|
|
response_time_hours: Annotated[float, Field(ge=0)]
|
|
|
|
|
issue_resolved: bool
|
|
|
|
|
complaint_registered: bool
|
|
|
|
|
review_text: Annotated[str, Field(min_length=3)]
|
|
|
|
|
|
|
|
|
|
class RiskOutput(BaseModel):
|
|
|
|
|
risk: float
|
|
|
|
|
|
|
|
|
|
class ExplanationOutput(BaseModel):
|
|
|
|
|
factors: list[str]
|
|
|
|
|
|
|
|
|
|
def _ensure_loaded():
|
|
|
|
|
global _model_lgb, _model_lr, _le
|
|
|
|
|
if _model_lgb is None:
|
|
|
|
|
_model_lgb = joblib.load(os.path.join("artifacts", "lgb_pipeline.joblib"))
|
|
|
|
|
if _model_lr is None:
|
|
|
|
|
_model_lr = joblib.load(os.path.join("artifacts", "lr_pipeline.joblib"))
|
|
|
|
|
if _le is None:
|
|
|
|
|
_le = joblib.load(os.path.join("artifacts", "label_encoder.joblib"))
|
|
|
|
|
|
|
|
|
|
def _to_dataframe(features) -> pd.DataFrame:
|
|
|
|
|
if isinstance(features, CustomerFeatures):
|
|
|
|
|
payload = features.model_dump()
|
|
|
|
|
elif isinstance(features, dict):
|
|
|
|
|
payload = features
|
|
|
|
|
else:
|
|
|
|
|
raise TypeError("features must be CustomerFeatures or dict")
|
|
|
|
|
return pd.DataFrame([payload])
|
|
|
|
|
|
|
|
|
|
def predict_risk(features: CustomerFeatures | dict) -> float:
|
|
|
|
|
_ensure_loaded()
|
|
|
|
|
df = _to_dataframe(features)
|
|
|
|
|
probs = _model_lgb.predict_proba(df)[0]
|
|
|
|
|
idx_neg = int(_le.transform(["negative"])[0])
|
|
|
|
|
return float(probs[idx_neg])
|
|
|
|
|
|
|
|
|
|
def predict_risk_model(features: CustomerFeatures | dict) -> RiskOutput:
|
|
|
|
|
return RiskOutput(risk=predict_risk(features))
|
|
|
|
|
|
|
|
|
|
def explain_features(features: CustomerFeatures | dict) -> list[str]:
|
|
|
|
|
_ensure_loaded()
|
|
|
|
|
df = _to_dataframe(features)
|
|
|
|
|
pre = _model_lr.named_steps["preprocessor"]
|
|
|
|
|
Xv = pre.transform(df)
|
|
|
|
|
clf = _model_lr.named_steps["classifier"]
|
|
|
|
|
idx_neg = int(_le.transform(["negative"])[0])
|
|
|
|
|
coefs = clf.coef_[idx_neg]
|
|
|
|
|
vec = Xv.toarray().ravel()
|
|
|
|
|
contrib = vec * coefs
|
|
|
|
|
names = pre.get_feature_names_out()
|
|
|
|
|
order = np.argsort(-np.abs(contrib))[:8]
|
|
|
|
|
out = []
|
|
|
|
|
for i in order:
|
|
|
|
|
direction = "increase" if contrib[i] > 0 else "decrease"
|
|
|
|
|
out.append(f"{names[i]} {direction} negative risk (weight={contrib[i]:.3f})")
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
def explain_features_model(features: CustomerFeatures | dict) -> ExplanationOutput:
|
|
|
|
|
return ExplanationOutput(factors=explain_features(features))
|
2026-01-15 20:12:33 +08:00
|
|
|
|
|
|
|
|
def explain_features_with_llm(features: CustomerFeatures | dict, api_key: str) -> str:
|
|
|
|
|
"""Use LLM to generate natural language explanation for the risk factors"""
|
|
|
|
|
_ensure_loaded()
|
|
|
|
|
explanations = explain_features(features)
|
|
|
|
|
|
|
|
|
|
# Map feature names to human-readable descriptions
|
|
|
|
|
feature_mapping = {
|
|
|
|
|
'cat__gender_male': '男性',
|
|
|
|
|
'cat__gender_female': '女性',
|
|
|
|
|
'cat__gender_other': '其他性别',
|
|
|
|
|
'cat__age_group_18-25': '18-25岁年龄段',
|
|
|
|
|
'cat__age_group_26-35': '26-35岁年龄段',
|
|
|
|
|
'cat__age_group_36-45': '36-45岁年龄段',
|
|
|
|
|
'cat__age_group_46-60': '46-60岁年龄段',
|
|
|
|
|
'cat__age_group_60+': '60岁以上年龄段',
|
|
|
|
|
'cat__region_north': '北部地区',
|
|
|
|
|
'cat__region_south': '南部地区',
|
|
|
|
|
'cat__region_east': '东部地区',
|
|
|
|
|
'cat__region_west': '西部地区',
|
|
|
|
|
'cat__region_central': '中部地区',
|
|
|
|
|
'cat__purchase_channel_online': '线上购买渠道',
|
|
|
|
|
'cat__purchase_channel_offline': '线下购买渠道',
|
|
|
|
|
'cat__issue_resolved_True': '问题已解决',
|
|
|
|
|
'cat__issue_resolved_False': '问题未解决',
|
|
|
|
|
'cat__complaint_registered_True': '已注册投诉',
|
|
|
|
|
'cat__complaint_registered_False': '未注册投诉',
|
|
|
|
|
'num__response_time_hours': '响应时间(小时)'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Convert explanations to human-readable format
|
|
|
|
|
human_explanations = []
|
|
|
|
|
for exp in explanations:
|
|
|
|
|
for feature, desc in feature_mapping.items():
|
|
|
|
|
if feature in exp:
|
|
|
|
|
# Replace feature name with description
|
|
|
|
|
human_exp = exp.replace(feature, desc)
|
|
|
|
|
# Make the text more natural
|
|
|
|
|
human_exp = human_exp.replace('increase negative risk', '增加了负面情绪风险')
|
|
|
|
|
human_exp = human_exp.replace('decrease negative risk', '降低了负面情绪风险')
|
|
|
|
|
human_exp = human_exp.replace('weight=', '权重为')
|
|
|
|
|
human_explanations.append(human_exp)
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if not human_explanations:
|
|
|
|
|
# Fallback if no feature mappings found
|
|
|
|
|
human_explanations = [exp.replace('increase negative risk', '增加了负面情绪风险').replace('decrease negative risk', '降低了负面情绪风险') for exp in explanations]
|
|
|
|
|
|
|
|
|
|
# Use DeepSeek API to generate natural language explanation
|
|
|
|
|
prompt = f"请将以下客户负面情绪风险因素分析结果转化为一段自然、流畅的中文解释,用于向客服人员展示:\n\n{chr(10).join(human_explanations)}\n\n要求:\n1. 用简洁的语言说明主要风险因素\n2. 突出影响最大的几个因素\n3. 保持专业但易于理解\n4. 不要使用技术术语\n5. 总长度控制在100-200字之间"
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
response = requests.post(
|
|
|
|
|
"https://api.deepseek.com/v1/chat/completions",
|
|
|
|
|
headers={
|
|
|
|
|
"Authorization": f"Bearer {api_key}",
|
|
|
|
|
"Content-Type": "application/json"
|
|
|
|
|
},
|
|
|
|
|
json={
|
|
|
|
|
"model": "deepseek-chat",
|
|
|
|
|
"messages": [
|
|
|
|
|
{"role": "system", "content": "你是一位专业的客户分析专家,擅长将复杂的数据分析结果转化为通俗易懂的解释。"},
|
|
|
|
|
{"role": "user", "content": prompt}
|
|
|
|
|
],
|
|
|
|
|
"max_tokens": 200,
|
|
|
|
|
"temperature": 0.7
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
response.raise_for_status()
|
|
|
|
|
result = response.json()
|
|
|
|
|
return result["choices"][0]["message"]["content"]
|
|
|
|
|
except Exception as e:
|
|
|
|
|
# Fallback to simple concatenation if API call fails
|
|
|
|
|
return f"客户负面情绪风险分析:{chr(10).join(human_explanations)}"
|