import os import joblib import numpy as np import pandas as pd from typing import Literal, Annotated from pydantic import BaseModel, Field _model_lgb = None _model_lr = None _le = None class CustomerFeatures(BaseModel): gender: Literal["male", "female", "other"] age_group: Literal["18-25", "26-35", "36-45", "46-60", "60+"] region: Literal["north", "south", "east", "west", "central"] product_category: str purchase_channel: Literal["online", "offline"] platform: str response_time_hours: Annotated[float, Field(ge=0)] issue_resolved: bool complaint_registered: bool review_text: Annotated[str, Field(min_length=3)] class RiskOutput(BaseModel): risk: float class ExplanationOutput(BaseModel): factors: list[str] def _ensure_loaded(): global _model_lgb, _model_lr, _le if _model_lgb is None: _model_lgb = joblib.load(os.path.join("artifacts", "lgb_pipeline.joblib")) if _model_lr is None: _model_lr = joblib.load(os.path.join("artifacts", "lr_pipeline.joblib")) if _le is None: _le = joblib.load(os.path.join("artifacts", "label_encoder.joblib")) def _to_dataframe(features) -> pd.DataFrame: if isinstance(features, CustomerFeatures): payload = features.model_dump() elif isinstance(features, dict): payload = features else: raise TypeError("features must be CustomerFeatures or dict") return pd.DataFrame([payload]) def predict_risk(features: CustomerFeatures | dict) -> float: _ensure_loaded() df = _to_dataframe(features) probs = _model_lgb.predict_proba(df)[0] idx_neg = int(_le.transform(["negative"])[0]) return float(probs[idx_neg]) def predict_risk_model(features: CustomerFeatures | dict) -> RiskOutput: return RiskOutput(risk=predict_risk(features)) def explain_features(features: CustomerFeatures | dict) -> list[str]: _ensure_loaded() df = _to_dataframe(features) pre = _model_lr.named_steps["preprocessor"] Xv = pre.transform(df) clf = _model_lr.named_steps["classifier"] idx_neg = int(_le.transform(["negative"])[0]) coefs = clf.coef_[idx_neg] vec = Xv.toarray().ravel() contrib = vec * coefs names = pre.get_feature_names_out() order = np.argsort(-np.abs(contrib))[:8] out = [] for i in order: direction = "increase" if contrib[i] > 0 else "decrease" out.append(f"{names[i]} {direction} negative risk (weight={contrib[i]:.3f})") return out def explain_features_model(features: CustomerFeatures | dict) -> ExplanationOutput: return ExplanationOutput(factors=explain_features(features))