From b46c65ac73e0e19d04d2dc3662a8d14270a8ac90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=A0=E7=9A=84=E7=9C=9F=E5=AE=9E=E5=A7=93=E5=90=8D?= <你的邮箱地址> Date: Mon, 12 Jan 2026 16:05:47 +0800 Subject: [PATCH] =?UTF-8?q?init:=20=E6=8F=90=E4=BA=A4=E7=94=B5=E4=BF=A1?= =?UTF-8?q?=E6=B5=81=E5=A4=B1=E9=A2=84=E6=B5=8B=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 0 requirements.txt | 0 src/__init__.py | 0 src/data.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++ src/features.py | 0 src/infer.py | 0 src/train.py | 0 7 files changed, 54 insertions(+) create mode 100644 README.md create mode 100644 requirements.txt create mode 100644 src/__init__.py create mode 100644 src/data.py create mode 100644 src/features.py create mode 100644 src/infer.py create mode 100644 src/train.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data.py b/src/data.py new file mode 100644 index 0000000..def428e --- /dev/null +++ b/src/data.py @@ -0,0 +1,54 @@ +"""电信客户流失数据集加载与清洗(最终可运行版)""" +import polars as pl +import os # 用于检查文件是否存在 + +def load_telco_data(): + """加载并清洗电信流失数据集,绝对路径+完整容错""" + # ========== 1. 正确的绝对路径(二选一即可) ========== + # 方式1:双反斜杠(推荐) + data_path = "C:\\Users\\s1313\\Desktop\\telco_churn_analysis\\data\\WA_Fn-UseC_-Telco-Customer-Churn.csv" + # 方式2:原始字符串(注释掉方式1,解开下面注释也可以) + # data_path = r"C:\Users\s1313\Desktop\telco_churn_analysis\data\WA_Fn-UseC_-Telco-Customer-Churn.csv" + + # ========== 2. 检查文件是否存在(关键) ========== + if not os.path.exists(data_path): + print(f"\n❌ 错误:文件不存在!") + print(f"👉 请检查路径是否正确:{data_path}") + print(f"👉 确认文件是否在这个位置,且文件名没有写错") + return None # 避免程序崩溃 + + # ========== 3. 读取并清洗数据 ========== + try: + df = pl.read_csv(data_path) + + # 安全清洗TotalCharges字段 + if df["TotalCharges"].dtype == pl.Utf8: + df = df.with_columns( + pl.col("TotalCharges") + .str.replace(" ", "0") + .cast(pl.Float64, strict=False) + .fill_null(0.0) + .alias("TotalCharges") + ) + else: + df = df.with_columns( + pl.col("TotalCharges") + .fill_null(0.0) + .alias("TotalCharges") + ) + + # ========== 4. 输出成功结果 ========== + print("\n✅ 数据集加载并清洗完成!") + print(f"📊 数据规模:{df.shape[0]}行 × {df.shape[1]}列") + print(f"📈 TotalCharges字段类型:{df['TotalCharges'].dtype}") + print("🔍 前2行预览:") + print(df.head(2)) + return df + + except Exception as e: + print(f"\n❌ 数据处理出错:{type(e).__name__} → {e}") + return None + +# 测试入口 +if __name__ == "__main__": + load_telco_data() \ No newline at end of file diff --git a/src/features.py b/src/features.py new file mode 100644 index 0000000..e69de29 diff --git a/src/infer.py b/src/infer.py new file mode 100644 index 0000000..e69de29 diff --git a/src/train.py b/src/train.py new file mode 100644 index 0000000..e69de29