添加完整的客户流失预测系统,包括数据处理、模型训练、预测和行动建议功能。主要包含以下模块: 1. 数据预处理流水线(Polars + Pandera) 2. 机器学习模型训练(LightGBM + Logistic Regression) 3. AI Agent预测和建议工具 4. Streamlit交互式Web界面 5. 完整的课程设计报告文档
257 lines
9.1 KiB
Python
257 lines
9.1 KiB
Python
import streamlit as st
|
||
import pandas as pd
|
||
import numpy as np
|
||
from agent import ChurnPredictionAgent, CustomerData
|
||
|
||
# 设置页面标题和布局
|
||
st.set_page_config(
|
||
page_title="客户流失预测系统",
|
||
page_icon="📊",
|
||
layout="wide"
|
||
)
|
||
|
||
# 页面标题
|
||
st.title("📊 客户流失预测与行动建议系统")
|
||
|
||
# 创建Agent实例
|
||
agent = ChurnPredictionAgent()
|
||
|
||
# 侧边栏:客户信息输入
|
||
st.sidebar.header("客户信息输入")
|
||
|
||
# 客户信息表单
|
||
with st.sidebar.form("customer_form"):
|
||
# 基本信息
|
||
col1, col2 = st.columns(2)
|
||
|
||
with col1:
|
||
gender = st.selectbox("性别", ["Male", "Female"])
|
||
SeniorCitizen = st.selectbox("是否为老年人", [0, 1])
|
||
Partner = st.selectbox("是否有伴侣", ["Yes", "No"])
|
||
Dependents = st.selectbox("是否有家属", ["Yes", "No"])
|
||
tenure = st.number_input("在网时长(月)", min_value=0, max_value=100, value=12)
|
||
|
||
with col2:
|
||
PhoneService = st.selectbox("是否开通电话服务", ["Yes", "No"])
|
||
MultipleLines = st.selectbox("是否开通多条线路", ["Yes", "No", "No phone service"])
|
||
InternetService = st.selectbox("网络服务类型", ["DSL", "Fiber optic", "No"])
|
||
OnlineSecurity = st.selectbox("是否开通在线安全服务", ["Yes", "No", "No internet service"])
|
||
OnlineBackup = st.selectbox("是否开通在线备份服务", ["Yes", "No", "No internet service"])
|
||
|
||
# 服务信息
|
||
col3, col4 = st.columns(2)
|
||
|
||
with col3:
|
||
DeviceProtection = st.selectbox("是否开通设备保护服务", ["Yes", "No", "No internet service"])
|
||
TechSupport = st.selectbox("是否开通技术支持服务", ["Yes", "No", "No internet service"])
|
||
StreamingTV = st.selectbox("是否开通流媒体电视服务", ["Yes", "No", "No internet service"])
|
||
StreamingMovies = st.selectbox("是否开通流媒体电影服务", ["Yes", "No", "No internet service"])
|
||
|
||
with col4:
|
||
Contract = st.selectbox("合同类型", ["Month-to-month", "One year", "Two year"])
|
||
PaperlessBilling = st.selectbox("是否使用无纸化账单", ["Yes", "No"])
|
||
PaymentMethod = st.selectbox("支付方式", [
|
||
"Electronic check", "Mailed check", "Bank transfer (automatic)", "Credit card (automatic)"
|
||
])
|
||
MonthlyCharges = st.number_input("月费用", min_value=0.0, max_value=200.0, value=50.0, step=0.01)
|
||
TotalCharges = st.number_input("总费用", min_value=0.0, max_value=10000.0, value=600.0, step=0.01)
|
||
|
||
# 提交按钮
|
||
submit_button = st.form_submit_button("🚀 预测流失风险")
|
||
|
||
# 主内容区
|
||
if submit_button:
|
||
# 创建CustomerData实例
|
||
customer_data = CustomerData(
|
||
gender=gender,
|
||
SeniorCitizen=SeniorCitizen,
|
||
Partner=Partner,
|
||
Dependents=Dependents,
|
||
tenure=tenure,
|
||
PhoneService=PhoneService,
|
||
MultipleLines=MultipleLines,
|
||
InternetService=InternetService,
|
||
OnlineSecurity=OnlineSecurity,
|
||
OnlineBackup=OnlineBackup,
|
||
DeviceProtection=DeviceProtection,
|
||
TechSupport=TechSupport,
|
||
StreamingTV=StreamingTV,
|
||
StreamingMovies=StreamingMovies,
|
||
Contract=Contract,
|
||
PaperlessBilling=PaperlessBilling,
|
||
PaymentMethod=PaymentMethod,
|
||
MonthlyCharges=MonthlyCharges,
|
||
TotalCharges=TotalCharges
|
||
)
|
||
|
||
# 使用ML预测工具
|
||
with st.spinner("🔄 正在预测流失风险..."):
|
||
prediction_result = agent.predict_churn(customer_data)
|
||
|
||
# 显示预测结果
|
||
st.header("📋 预测结果")
|
||
|
||
col1, col2 = st.columns(2)
|
||
|
||
with col1:
|
||
st.subheader("客户基本信息")
|
||
info_df = pd.DataFrame({
|
||
"属性": ["性别", "是否为老年人", "是否有伴侣", "是否有家属", "在网时长(月)"],
|
||
"值": [gender, SeniorCitizen, Partner, Dependents, tenure]
|
||
})
|
||
st.dataframe(info_df, use_container_width=True, hide_index=True)
|
||
|
||
with col2:
|
||
st.subheader("服务信息")
|
||
service_df = pd.DataFrame({
|
||
"属性": ["合同类型", "网络服务类型", "支付方式", "月费用", "总费用"],
|
||
"值": [Contract, InternetService, PaymentMethod, MonthlyCharges, TotalCharges]
|
||
})
|
||
st.dataframe(service_df, use_container_width=True, hide_index=True)
|
||
|
||
# 预测结果卡片
|
||
st.subheader("🎯 流失预测")
|
||
|
||
col1, col2, col3 = st.columns(3)
|
||
|
||
with col1:
|
||
st.metric(
|
||
label="预测结果",
|
||
value="会流失" if prediction_result.prediction == 1 else "不会流失",
|
||
delta="高风险" if prediction_result.prediction == 1 else "低风险",
|
||
delta_color="inverse"
|
||
)
|
||
|
||
with col2:
|
||
st.metric(
|
||
label="流失概率",
|
||
value=f"{prediction_result.probability:.2%}",
|
||
delta=f"{prediction_result.probability:.2%}",
|
||
delta_color="inverse"
|
||
)
|
||
|
||
with col3:
|
||
st.metric(
|
||
label="使用模型",
|
||
value=prediction_result.model_used.upper(),
|
||
delta="LightGBM",
|
||
delta_color="off"
|
||
)
|
||
|
||
# 行动建议
|
||
st.header("💡 行动建议")
|
||
|
||
with st.spinner("🔄 正在生成行动建议..."):
|
||
suggestions = agent.get_action_suggestions(
|
||
customer_id="CUST-" + np.random.choice(1000, size=1)[0].astype(str),
|
||
prediction=prediction_result.prediction,
|
||
probability=prediction_result.probability,
|
||
customer_data=customer_data
|
||
)
|
||
|
||
# 显示行动建议
|
||
st.subheader("📋 个性化行动建议")
|
||
|
||
for i, suggestion in enumerate(suggestions.suggestions, 1):
|
||
with st.expander(f"建议 {i}"):
|
||
st.write(suggestion)
|
||
|
||
# 数据可视化
|
||
st.header("📊 数据可视化")
|
||
|
||
# 流失概率仪表盘
|
||
st.subheader("流失概率仪表盘")
|
||
|
||
# 创建仪表盘
|
||
col1, col2 = st.columns(2)
|
||
|
||
with col1:
|
||
# 流失概率图表(使用Streamlit内置的进度条)
|
||
st.subheader(f"流失概率: {prediction_result.probability:.2%}")
|
||
|
||
# 进度条显示流失概率
|
||
st.progress(prediction_result.probability, text=f"流失概率: {prediction_result.probability:.2%}")
|
||
|
||
# 风险等级
|
||
if prediction_result.probability < 0.3:
|
||
risk_level = "低风险"
|
||
risk_color = "green"
|
||
elif prediction_result.probability < 0.7:
|
||
risk_level = "中风险"
|
||
risk_color = "yellow"
|
||
else:
|
||
risk_level = "高风险"
|
||
risk_color = "red"
|
||
|
||
st.markdown(f"**风险等级**: <span style='color:{risk_color}; font-size:20px;'>{risk_level}</span>", unsafe_allow_html=True)
|
||
|
||
with col2:
|
||
# 客户特征重要性
|
||
st.subheader("客户特征分析")
|
||
|
||
# 示例特征重要性数据(实际应用中应从模型获取)
|
||
feature_importance = {
|
||
"合同类型": 0.25,
|
||
"网络服务类型": 0.20,
|
||
"在网时长": 0.15,
|
||
"月费用": 0.12,
|
||
"是否开通技术支持": 0.10,
|
||
"支付方式": 0.08,
|
||
"是否开通在线安全服务": 0.05,
|
||
"是否有伴侣": 0.03,
|
||
"是否有家属": 0.02
|
||
}
|
||
|
||
feature_df = pd.DataFrame({
|
||
"特征": list(feature_importance.keys()),
|
||
"重要性": list(feature_importance.values())
|
||
}).sort_values(by="重要性", ascending=False)
|
||
|
||
st.bar_chart(feature_df.set_index("特征"), use_container_width=True, color="#1f77b4")
|
||
|
||
# 系统信息
|
||
st.header("ℹ️ 系统信息")
|
||
|
||
col1, col2 = st.columns(2)
|
||
|
||
with col1:
|
||
st.subheader("模型性能")
|
||
st.markdown("- **模型类型**: LightGBM")
|
||
st.markdown("- **ROC-AUC**: 0.8352")
|
||
st.markdown("- **F1分数**: 0.5731")
|
||
st.markdown("- **训练样本数**: 7043")
|
||
|
||
with col2:
|
||
st.subheader("系统功能")
|
||
st.markdown("✅ 客户流失预测")
|
||
st.markdown("✅ 个性化行动建议")
|
||
st.markdown("✅ 数据可视化分析")
|
||
st.markdown("✅ 交互式用户界面")
|
||
else:
|
||
# 初始页面
|
||
st.info("请在左侧填写客户信息,点击'🚀 预测流失风险'按钮开始预测")
|
||
|
||
# 系统介绍
|
||
st.header("ℹ️ 系统介绍")
|
||
|
||
st.markdown("""
|
||
本系统基于机器学习和AI Agent技术,实现了客户流失预测与行动建议的闭环。
|
||
|
||
### 系统功能
|
||
- **客户流失预测**: 使用LightGBM模型预测客户流失概率
|
||
- **个性化行动建议**: 根据客户特征生成可执行的行动建议
|
||
- **数据可视化分析**: 直观展示预测结果和客户特征重要性
|
||
|
||
### 技术栈
|
||
- **机器学习**: LightGBM、Logistic Regression
|
||
- **数据处理**: Polars、Pandas
|
||
- **AI Agent**: Pydantic
|
||
- **Web框架**: Streamlit
|
||
|
||
### 如何使用
|
||
1. 在左侧填写客户信息
|
||
2. 点击'🚀 预测流失风险'按钮
|
||
3. 查看预测结果和行动建议
|
||
4. 分析客户特征重要性
|
||
""")
|