78 lines
2.6 KiB
Python
78 lines
2.6 KiB
Python
|
|
import os
|
||
|
|
import joblib
|
||
|
|
import numpy as np
|
||
|
|
import pandas as pd
|
||
|
|
from typing import Literal, Annotated
|
||
|
|
from pydantic import BaseModel, Field
|
||
|
|
|
||
|
|
_model_lgb = None
|
||
|
|
_model_lr = None
|
||
|
|
_le = None
|
||
|
|
|
||
|
|
class CustomerFeatures(BaseModel):
|
||
|
|
gender: Literal["male", "female", "other"]
|
||
|
|
age_group: Literal["18-25", "26-35", "36-45", "46-60", "60+"]
|
||
|
|
region: Literal["north", "south", "east", "west", "central"]
|
||
|
|
product_category: str
|
||
|
|
purchase_channel: Literal["online", "offline"]
|
||
|
|
platform: str
|
||
|
|
response_time_hours: Annotated[float, Field(ge=0)]
|
||
|
|
issue_resolved: bool
|
||
|
|
complaint_registered: bool
|
||
|
|
review_text: Annotated[str, Field(min_length=3)]
|
||
|
|
|
||
|
|
class RiskOutput(BaseModel):
|
||
|
|
risk: float
|
||
|
|
|
||
|
|
class ExplanationOutput(BaseModel):
|
||
|
|
factors: list[str]
|
||
|
|
|
||
|
|
def _ensure_loaded():
|
||
|
|
global _model_lgb, _model_lr, _le
|
||
|
|
if _model_lgb is None:
|
||
|
|
_model_lgb = joblib.load(os.path.join("artifacts", "lgb_pipeline.joblib"))
|
||
|
|
if _model_lr is None:
|
||
|
|
_model_lr = joblib.load(os.path.join("artifacts", "lr_pipeline.joblib"))
|
||
|
|
if _le is None:
|
||
|
|
_le = joblib.load(os.path.join("artifacts", "label_encoder.joblib"))
|
||
|
|
|
||
|
|
def _to_dataframe(features) -> pd.DataFrame:
|
||
|
|
if isinstance(features, CustomerFeatures):
|
||
|
|
payload = features.model_dump()
|
||
|
|
elif isinstance(features, dict):
|
||
|
|
payload = features
|
||
|
|
else:
|
||
|
|
raise TypeError("features must be CustomerFeatures or dict")
|
||
|
|
return pd.DataFrame([payload])
|
||
|
|
|
||
|
|
def predict_risk(features: CustomerFeatures | dict) -> float:
|
||
|
|
_ensure_loaded()
|
||
|
|
df = _to_dataframe(features)
|
||
|
|
probs = _model_lgb.predict_proba(df)[0]
|
||
|
|
idx_neg = int(_le.transform(["negative"])[0])
|
||
|
|
return float(probs[idx_neg])
|
||
|
|
|
||
|
|
def predict_risk_model(features: CustomerFeatures | dict) -> RiskOutput:
|
||
|
|
return RiskOutput(risk=predict_risk(features))
|
||
|
|
|
||
|
|
def explain_features(features: CustomerFeatures | dict) -> list[str]:
|
||
|
|
_ensure_loaded()
|
||
|
|
df = _to_dataframe(features)
|
||
|
|
pre = _model_lr.named_steps["preprocessor"]
|
||
|
|
Xv = pre.transform(df)
|
||
|
|
clf = _model_lr.named_steps["classifier"]
|
||
|
|
idx_neg = int(_le.transform(["negative"])[0])
|
||
|
|
coefs = clf.coef_[idx_neg]
|
||
|
|
vec = Xv.toarray().ravel()
|
||
|
|
contrib = vec * coefs
|
||
|
|
names = pre.get_feature_names_out()
|
||
|
|
order = np.argsort(-np.abs(contrib))[:8]
|
||
|
|
out = []
|
||
|
|
for i in order:
|
||
|
|
direction = "increase" if contrib[i] > 0 else "decrease"
|
||
|
|
out.append(f"{names[i]} {direction} negative risk (weight={contrib[i]:.3f})")
|
||
|
|
return out
|
||
|
|
|
||
|
|
def explain_features_model(features: CustomerFeatures | dict) -> ExplanationOutput:
|
||
|
|
return ExplanationOutput(factors=explain_features(features))
|