G05-Customer_Sentiment/src/agent.py

78 lines
2.6 KiB
Python
Raw Normal View History

import os
import joblib
import numpy as np
import pandas as pd
from typing import Literal, Annotated
from pydantic import BaseModel, Field
_model_lgb = None
_model_lr = None
_le = None
class CustomerFeatures(BaseModel):
gender: Literal["male", "female", "other"]
age_group: Literal["18-25", "26-35", "36-45", "46-60", "60+"]
region: Literal["north", "south", "east", "west", "central"]
product_category: str
purchase_channel: Literal["online", "offline"]
platform: str
response_time_hours: Annotated[float, Field(ge=0)]
issue_resolved: bool
complaint_registered: bool
review_text: Annotated[str, Field(min_length=3)]
class RiskOutput(BaseModel):
risk: float
class ExplanationOutput(BaseModel):
factors: list[str]
def _ensure_loaded():
global _model_lgb, _model_lr, _le
if _model_lgb is None:
_model_lgb = joblib.load(os.path.join("artifacts", "lgb_pipeline.joblib"))
if _model_lr is None:
_model_lr = joblib.load(os.path.join("artifacts", "lr_pipeline.joblib"))
if _le is None:
_le = joblib.load(os.path.join("artifacts", "label_encoder.joblib"))
def _to_dataframe(features) -> pd.DataFrame:
if isinstance(features, CustomerFeatures):
payload = features.model_dump()
elif isinstance(features, dict):
payload = features
else:
raise TypeError("features must be CustomerFeatures or dict")
return pd.DataFrame([payload])
def predict_risk(features: CustomerFeatures | dict) -> float:
_ensure_loaded()
df = _to_dataframe(features)
probs = _model_lgb.predict_proba(df)[0]
idx_neg = int(_le.transform(["negative"])[0])
return float(probs[idx_neg])
def predict_risk_model(features: CustomerFeatures | dict) -> RiskOutput:
return RiskOutput(risk=predict_risk(features))
def explain_features(features: CustomerFeatures | dict) -> list[str]:
_ensure_loaded()
df = _to_dataframe(features)
pre = _model_lr.named_steps["preprocessor"]
Xv = pre.transform(df)
clf = _model_lr.named_steps["classifier"]
idx_neg = int(_le.transform(["negative"])[0])
coefs = clf.coef_[idx_neg]
vec = Xv.toarray().ravel()
contrib = vec * coefs
names = pre.get_feature_names_out()
order = np.argsort(-np.abs(contrib))[:8]
out = []
for i in order:
direction = "increase" if contrib[i] > 0 else "decrease"
out.append(f"{names[i]} {direction} negative risk (weight={contrib[i]:.3f})")
return out
def explain_features_model(features: CustomerFeatures | dict) -> ExplanationOutput:
return ExplanationOutput(factors=explain_features(features))