From b46c65ac73e0e19d04d2dc3662a8d14270a8ac90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BD=A0=E7=9A=84=E7=9C=9F=E5=AE=9E=E5=A7=93=E5=90=8D?=
 <你的邮箱地址>
Date: Mon, 12 Jan 2026 16:05:47 +0800
Subject: [PATCH] =?UTF-8?q?init:=20=E6=8F=90=E4=BA=A4=E7=94=B5=E4=BF=A1?=
 =?UTF-8?q?=E6=B5=81=E5=A4=B1=E9=A2=84=E6=B5=8B=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md        |  0
 requirements.txt |  0
 src/__init__.py  |  0
 src/data.py      | 54 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/features.py  |  0
 src/infer.py     |  0
 src/train.py     |  0
 7 files changed, 54 insertions(+)
 create mode 100644 README.md
 create mode 100644 requirements.txt
 create mode 100644 src/__init__.py
 create mode 100644 src/data.py
 create mode 100644 src/features.py
 create mode 100644 src/infer.py
 create mode 100644 src/train.py

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e69de29
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e69de29
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/data.py b/src/data.py
new file mode 100644
index 0000000..def428e
--- /dev/null
+++ b/src/data.py
@@ -0,0 +1,54 @@
+"""电信客户流失数据集加载与清洗（最终可运行版）"""
+import polars as pl
+import os  # 用于检查文件是否存在
+
+def load_telco_data():
+    """加载并清洗电信流失数据集，绝对路径+完整容错"""
+    # ========== 1. 正确的绝对路径（二选一即可） ==========
+    # 方式1：双反斜杠（推荐）
+    data_path = "C:\\Users\\s1313\\Desktop\\telco_churn_analysis\\data\\WA_Fn-UseC_-Telco-Customer-Churn.csv"
+    # 方式2：原始字符串（注释掉方式1，解开下面注释也可以）
+    # data_path = r"C:\Users\s1313\Desktop\telco_churn_analysis\data\WA_Fn-UseC_-Telco-Customer-Churn.csv"
+
+    # ========== 2. 检查文件是否存在（关键） ==========
+    if not os.path.exists(data_path):
+        print(f"\n❌ 错误：文件不存在！")
+        print(f"👉 请检查路径是否正确：{data_path}")
+        print(f"👉 确认文件是否在这个位置，且文件名没有写错")
+        return None  # 避免程序崩溃
+
+    # ========== 3. 读取并清洗数据 ==========
+    try:
+        df = pl.read_csv(data_path)
+        
+        # 安全清洗TotalCharges字段
+        if df["TotalCharges"].dtype == pl.Utf8:
+            df = df.with_columns(
+                pl.col("TotalCharges")
+                .str.replace(" ", "0")
+                .cast(pl.Float64, strict=False)
+                .fill_null(0.0)
+                .alias("TotalCharges")
+            )
+        else:
+            df = df.with_columns(
+                pl.col("TotalCharges")
+                .fill_null(0.0)
+                .alias("TotalCharges")
+            )
+
+        # ========== 4. 输出成功结果 ==========
+        print("\n✅ 数据集加载并清洗完成！")
+        print(f"📊 数据规模：{df.shape[0]}行 × {df.shape[1]}列")
+        print(f"📈 TotalCharges字段类型：{df['TotalCharges'].dtype}")
+        print("🔍 前2行预览：")
+        print(df.head(2))
+        return df
+
+    except Exception as e:
+        print(f"\n❌ 数据处理出错：{type(e).__name__} → {e}")
+        return None
+
+# 测试入口
+if __name__ == "__main__":
+    load_telco_data()
\ No newline at end of file
diff --git a/src/features.py b/src/features.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/infer.py b/src/infer.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/train.py b/src/train.py
new file mode 100644
index 0000000..e69de29