213 lines
8.2 KiB
Python
213 lines
8.2 KiB
Python
|
|
import pandas as pd
|
|||
|
|
import numpy as np
|
|||
|
|
import joblib
|
|||
|
|
import json
|
|||
|
|
import random
|
|||
|
|
|
|||
|
|
# ==========================================
|
|||
|
|
# 1. 模拟 LLM 接口 (Mock Agent)
|
|||
|
|
# ==========================================
|
|||
|
|
def mock_llm_generate(prompt):
|
|||
|
|
"""
|
|||
|
|
模拟 LLM 的生成过程。
|
|||
|
|
在实际应用中,这里会调用 OpenAI/Anthropic/DeepSeek 的 API。
|
|||
|
|
"""
|
|||
|
|
# 从 Prompt 中提取关键信息来生成“假”的智能回复
|
|||
|
|
# 这里我们用简单的规则来模拟 LLM 的“思考”
|
|||
|
|
|
|||
|
|
# 提取意向度 (从 Prompt 文本中查找)
|
|||
|
|
import re
|
|||
|
|
# 匹配 "预测模型判定该客户订阅定存的概率为: 78.5%" 这种格式
|
|||
|
|
prob_match = re.search(r"预测模型判定该客户订阅定存的概率为:\s*([\d\.]+)%", prompt)
|
|||
|
|
prob = float(prob_match.group(1)) if prob_match else 0
|
|||
|
|
|
|||
|
|
# 模拟 LLM 根据概率生成的建议
|
|||
|
|
if prob > 70:
|
|||
|
|
strategy = "VIP 专属服务"
|
|||
|
|
action = "资深理财经理致电"
|
|||
|
|
script = "您好,鉴于您良好的信用记录,我们为您预留了一款高收益理财产品..."
|
|||
|
|
reason = "客户属于高意向群体,且过往活动反馈良好。"
|
|||
|
|
elif prob > 40:
|
|||
|
|
strategy = "标准营销"
|
|||
|
|
action = "普通客服致电或短信触达"
|
|||
|
|
script = "您好,我行近期推出了几款稳健型存款产品,占用您一分钟..."
|
|||
|
|
reason = "客户意向中等,建议通过低成本渠道试探。"
|
|||
|
|
else:
|
|||
|
|
strategy = "静默观察"
|
|||
|
|
action = "发送月度邮件"
|
|||
|
|
script = "(邮件内容) 本月财经摘要..."
|
|||
|
|
reason = "客户意向较低,频繁打扰可能导致反感。"
|
|||
|
|
|
|||
|
|
# 构造 JSON 输出
|
|||
|
|
response = {
|
|||
|
|
"customer_id": "Unknown", # 实际中会从 Context 获取
|
|||
|
|
"analysis": {
|
|||
|
|
"score": prob,
|
|||
|
|
"segment": strategy
|
|||
|
|
},
|
|||
|
|
"action_plan": {
|
|||
|
|
"primary_action": action,
|
|||
|
|
"backup_action": "记录反馈并更新标签",
|
|||
|
|
"suggested_script": script
|
|||
|
|
},
|
|||
|
|
"reasoning": reason
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return json.dumps(response, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
# ==========================================
|
|||
|
|
# 2. Agent 核心类
|
|||
|
|
# ==========================================
|
|||
|
|
class MarketingAgent:
|
|||
|
|
def __init__(self, artifact_path='model_artifacts.pkl'):
|
|||
|
|
print(f"Agent 正在加载模型资产: {artifact_path} ...")
|
|||
|
|
self.artifacts = joblib.load(artifact_path)
|
|||
|
|
self.model = self.artifacts['model']
|
|||
|
|
self.encoders = self.artifacts['encoders']
|
|||
|
|
self.feature_meta = self.artifacts['feature_meta']
|
|||
|
|
|
|||
|
|
def preprocess(self, customer_data):
|
|||
|
|
"""将原始字典数据转换为模型可接受的 DataFrame"""
|
|||
|
|
# 创建 DataFrame
|
|||
|
|
df = pd.DataFrame([customer_data])
|
|||
|
|
|
|||
|
|
# 移除 duration (如果存在)
|
|||
|
|
if 'duration' in df.columns:
|
|||
|
|
df = df.drop('duration', axis=1)
|
|||
|
|
|
|||
|
|
# 编码分类特征
|
|||
|
|
for col, le in self.encoders.items():
|
|||
|
|
if col in df.columns:
|
|||
|
|
# 处理未知类别: 如果遇到训练集没见过的类别,设为出现最多的那个或报错
|
|||
|
|
# 这里简单处理:如果遇到未知,就用 transform 的第一个类别 (仅作演示)
|
|||
|
|
try:
|
|||
|
|
df[col] = le.transform(df[col])
|
|||
|
|
except ValueError:
|
|||
|
|
# 遇到未知标签,使用众数填充或标记为 -1 (取决于模型训练时是否处理了未知)
|
|||
|
|
# 这里为了演示不报错,我们假设数据是干净的,或者直接填 0
|
|||
|
|
df[col] = 0
|
|||
|
|
|
|||
|
|
# 确保列顺序一致
|
|||
|
|
# 注意:XGBoost 可能会报错如果列顺序不对,最好重新索引
|
|||
|
|
# 这里假设 feature_meta['all_cols'] 保存了训练时的特征顺序
|
|||
|
|
df = df[self.feature_meta['all_cols']]
|
|||
|
|
|
|||
|
|
return df
|
|||
|
|
|
|||
|
|
def analyze_customer(self, customer_data):
|
|||
|
|
"""
|
|||
|
|
Agent 的主工作流:
|
|||
|
|
1. 感知 (Perception): 接收数据,进行预处理。
|
|||
|
|
2. 思考 (Cognition - Model): 调用 ML 模型预测概率。
|
|||
|
|
3. 规划 (Planning - LLM): 构建 Prompt,调用 LLM 生成建议。
|
|||
|
|
4. 行动 (Action): 输出结构化建议。
|
|||
|
|
"""
|
|||
|
|
# 1. 预处理
|
|||
|
|
X_input = self.preprocess(customer_data)
|
|||
|
|
|
|||
|
|
# 2. 模型预测
|
|||
|
|
prob = self.model.predict_proba(X_input)[0][1] # 获取属于类别 1 (yes) 的概率
|
|||
|
|
prob_percent = round(prob * 100, 2)
|
|||
|
|
|
|||
|
|
# 获取特征重要性高的特征值,放入 Prompt (简单逻辑:列出所有特征)
|
|||
|
|
# 实际中可以结合 SHAP 值只列出 Top 3 贡献特征
|
|||
|
|
feature_desc = ", ".join([f"{k}={v}" for k, v in customer_data.items() if k != 'duration'])
|
|||
|
|
|
|||
|
|
# 3. 构建 Prompt
|
|||
|
|
# 这是一个 "Prompt Engineering" 的过程
|
|||
|
|
system_prompt = """你是一个专业的银行营销决策 Agent。请根据客户数据和预测模型的结果,给出具体的执行建议。
|
|||
|
|
要求输出必须是 JSON 格式。"""
|
|||
|
|
|
|||
|
|
user_prompt = f"""
|
|||
|
|
【输入数据】
|
|||
|
|
客户特征: {feature_desc}
|
|||
|
|
|
|||
|
|
【模型分析】
|
|||
|
|
预测模型判定该客户订阅定存的概率为: {prob_percent}%
|
|||
|
|
|
|||
|
|
【业务规则库】
|
|||
|
|
- 概率 > 70%: 高价值,高优先级,人工介入。
|
|||
|
|
- 概率 40%-70%: 潜在价值,自动化营销 + 人工辅助。
|
|||
|
|
- 概率 < 40%: 低价值,仅自动化触达。
|
|||
|
|
|
|||
|
|
【任务】
|
|||
|
|
请基于以上信息,生成该客户的营销建议 JSON,包含:
|
|||
|
|
- 客户分群 (segment)
|
|||
|
|
- 推荐行动 (primary_action)
|
|||
|
|
- 话术建议 (suggested_script)
|
|||
|
|
- 决策依据 (reasoning)
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
print(f"\n--- Agent 正在思考 (构建 Prompt) ---\n[Prompt 摘要] 预测概率: {prob_percent}%")
|
|||
|
|
|
|||
|
|
# 4. 调用 LLM (模拟)
|
|||
|
|
# 在真实场景中:response = openai.ChatCompletion.create(...)
|
|||
|
|
llm_response = mock_llm_generate(user_prompt)
|
|||
|
|
|
|||
|
|
return json.loads(llm_response)
|
|||
|
|
|
|||
|
|
# ==========================================
|
|||
|
|
# 3. 运行演示
|
|||
|
|
# ==========================================
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
# 加载原始数据取样
|
|||
|
|
df_raw = pd.read_csv('bank.csv')
|
|||
|
|
|
|||
|
|
# 实例化 Agent
|
|||
|
|
agent = MarketingAgent()
|
|||
|
|
|
|||
|
|
print("\n" + "="*50)
|
|||
|
|
print("开始模拟业务流程...")
|
|||
|
|
print("="*50)
|
|||
|
|
|
|||
|
|
# 随机抽取 3 个客户进行模拟
|
|||
|
|
sample_indices = [1, 20, 100]
|
|||
|
|
|
|||
|
|
# 构造一个高意向客户 (VIP 模拟)
|
|||
|
|
# 基于特征重要性: poutcome=success (最重要), contact=cellular, housing=no, balance=high
|
|||
|
|
vip_customer = {
|
|||
|
|
'age': 35,
|
|||
|
|
'job': 'management',
|
|||
|
|
'marital': 'married',
|
|||
|
|
'education': 'tertiary',
|
|||
|
|
'default': 'no',
|
|||
|
|
'balance': 5000,
|
|||
|
|
'housing': 'no',
|
|||
|
|
'loan': 'no',
|
|||
|
|
'contact': 'cellular',
|
|||
|
|
'day': 15,
|
|||
|
|
'month': 'oct',
|
|||
|
|
'duration': 0, # 会被移除
|
|||
|
|
'campaign': 1,
|
|||
|
|
'pdays': 90,
|
|||
|
|
'previous': 2,
|
|||
|
|
'poutcome': 'success', # 强特征
|
|||
|
|
'deposit': 'yes' # 仅用于展示
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 将 VIP 客户加入测试列表 (使用特殊的 -1 索引标记)
|
|||
|
|
test_cases = [(i, df_raw.iloc[i].to_dict()) for i in sample_indices]
|
|||
|
|
test_cases.append((-1, vip_customer))
|
|||
|
|
|
|||
|
|
for idx, customer_dict in test_cases:
|
|||
|
|
# 移除结果列 'deposit',模拟这是新客户
|
|||
|
|
if 'deposit' in customer_dict:
|
|||
|
|
real_result = customer_dict.pop('deposit')
|
|||
|
|
else:
|
|||
|
|
real_result = "Unknown"
|
|||
|
|
|
|||
|
|
if idx == -1:
|
|||
|
|
print(f"\n>>> 处理客户 ID: VIP-Demo (人工构造的高意向客户)")
|
|||
|
|
else:
|
|||
|
|
print(f"\n>>> 处理客户 ID: {idx}")
|
|||
|
|
|
|||
|
|
print(f"真实结果 (仅供参考): {real_result}")
|
|||
|
|
|
|||
|
|
# Agent 工作
|
|||
|
|
decision = agent.analyze_customer(customer_dict)
|
|||
|
|
|
|||
|
|
# 打印结果
|
|||
|
|
print("\n[Agent 最终建议]")
|
|||
|
|
print(json.dumps(decision, ensure_ascii=False, indent=2))
|
|||
|
|
print("-" * 30)
|