G09-BankMarketing/agent_app.py
2026-01-16 19:28:30 +08:00

185 lines
6.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from pydantic import BaseModel, Field
import joblib
import json
import pandas as pd
import numpy as np
import logging
import os
import random
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# ==========================================
# 1. 定义 Pydantic 数据模型
# ==========================================
class CustomerFeatures(BaseModel):
"""客户特征输入模型"""
age: int = Field(ge=18, le=120, description="客户年龄")
job: str = Field(description="职业")
marital: str = Field(description="婚姻状况")
education: str = Field(description="教育程度")
default: str = Field(pattern="^(yes|no)$", description="是否有违约记录")
balance: int = Field(description="账户余额")
housing: str = Field(pattern="^(yes|no)$", description="是否有住房贷款")
loan: str = Field(pattern="^(yes|no)$", description="是否有个人贷款")
contact: str = Field(description="联系方式")
day: int = Field(ge=1, le=31, description="最后联系日")
month: str = Field(description="最后联系月份")
campaign: int = Field(ge=1, description="本次活动联系次数")
pdays: int = Field(description="距离上次联系天数 (-1代表未联系)")
previous: int = Field(ge=0, description="活动前联系次数")
poutcome: str = Field(description="上次活动结果")
# 注意:我们不包含 duration因为它是事后变量
class Decision(BaseModel):
"""Agent 输出的结构化决策"""
risk_score: float = Field(ge=0, le=1, description="预测购买概率 (0-1)")
customer_segment: str = Field(description="客户分群 (如: 高价值/潜在/沉睡)")
decision: str = Field(description="建议策略 (如: 立即致电/邮件触达/放弃)")
actions: list[str] = Field(description="可执行动作清单")
rationale: str = Field(description="决策依据 (结合模型预测与业务规则)")
# ==========================================
# 2. 定义 Agent 类
# ==========================================
class MarketingAgent:
def __init__(self, model_path="models/model_artifacts.pkl"):
self.model_path = model_path
self.artifacts = None
self._load_model()
def _load_model(self):
if os.path.exists(self.model_path):
self.artifacts = joblib.load(self.model_path)
logger.info(f"Agent 已加载模型: {self.model_path}")
else:
logger.warning(f"模型文件不存在: {self.model_path}Agent 将无法进行预测")
def predict_risk(self, features: CustomerFeatures) -> dict:
"""
Tool 1: 调用 ML 模型预测购买概率
"""
if not self.artifacts:
return {"score": 0.0, "reason": "Model not loaded"}
# 转换输入为 DataFrame
data = features.model_dump()
df = pd.DataFrame([data])
# 预处理 (使用训练时保存的 encoder)
# 注意:这里需要严格复现训练时的预处理逻辑
# 训练时我们做了 Label Encoding
for col, le in self.artifacts['encoders'].items():
if col in df.columns:
# 处理未知类别
try:
df[col] = le.transform(df[col].astype(str))
except:
# 遇到未知类别,这里简单处理为 0 (或者 mode)
logger.warning(f"Unknown category in {col}")
df[col] = 0
# 确保列顺序一致
# 我们训练时用了 X (df.drop(target))
# 这里需要筛选出 numeric_cols + categorical_cols
# 简单起见,我们假设 feature names 保存了顺序
feature_names = self.artifacts['features']
# 补齐可能缺失的列
for col in feature_names:
if col not in df.columns:
df[col] = 0
X_input = df[feature_names]
# 预测
model = self.artifacts['lgb_model'] # 优先使用 LightGBM
prob = model.predict_proba(X_input)[0][1]
return {
"score": float(prob),
"top_features": ["balance", "poutcome"] # 这里简化,实际可用 SHAP
}
def get_strategy(self, score: float) -> dict:
"""
Tool 2: 规则引擎/检索工具
"""
if score > 0.6:
return {
"segment": "高意向 VIP",
"action_type": "人工介入",
"templates": ["尊贵的客户,鉴于您...", "专属理财经理一对一服务"]
}
elif score > 0.3:
return {
"segment": "潜在客户",
"action_type": "自动化营销",
"templates": ["你好,近期理财活动...", "点击领取加息券"]
}
else:
return {
"segment": "低意向群体",
"action_type": "静默/邮件",
"templates": ["月度财经摘要"]
}
def run(self, features: CustomerFeatures) -> Decision:
"""
Agent 主流程
"""
logger.info(f"Agent 正在处理客户: {features.job}, {features.age}")
# 1. 感知 (调用 ML 工具)
pred_result = self.predict_risk(features)
score = pred_result["score"]
# 2. 规划 (调用 策略工具)
strategy = self.get_strategy(score)
# 3. 决策 (模拟 LLM 整合)
# 在真实场景中,这里构建 Prompt 发送给 DeepSeek
# 这里我们用 Python 逻辑模拟 LLM 的结构化输出能力
decision = Decision(
risk_score=round(score, 4),
customer_segment=strategy["segment"],
decision=f"建议采取 {strategy['action_type']}",
actions=[f"使用话术: {t}" for t in strategy["templates"]],
rationale=f"模型预测概率为 {score:.1%},属于{strategy['segment']}。该群体对{strategy['action_type']}转化率较高。"
)
return decision
if __name__ == "__main__":
# 测试 Agent
agent = MarketingAgent()
# 构造一个测试用例
test_customer = CustomerFeatures(
age=35,
job="management",
marital="married",
education="tertiary",
default="no",
balance=2000,
housing="yes",
loan="no",
contact="cellular",
day=15,
month="may",
campaign=1,
pdays=-1,
previous=0,
poutcome="unknown"
)
result = agent.run(test_customer)
print("\n=== Agent Decision ===")
print(result.model_dump_json(indent=2))