chore: 初始化项目结构与基线模型

This commit is contained in:
邢可易 2026-01-12 15:43:32 +08:00
commit b077bae62d
17 changed files with 287552 additions and 0 deletions

1
.env.example Normal file
View File

@ -0,0 +1 @@
DEEPSEEK_API_KEY=sk-aae8c48b3c654e9983dd2a8d612861d3

5
.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
.env
.venv/
__pycache__/
.ipynb_checkpoints/
.DS_Store

1
.python-version Normal file
View File

@ -0,0 +1 @@
3.12

BIN
.~完成记录.docx Normal file

Binary file not shown.

BIN
.~课设要求.docx Normal file

Binary file not shown.

0
README.md Normal file
View File

284808
creditcard.csv Normal file

File diff suppressed because it is too large Load Diff

6
main.py Normal file
View File

@ -0,0 +1,6 @@
def main():
print("Hello from ml-course-project!")
if __name__ == "__main__":
main()

17
pyproject.toml Normal file
View File

@ -0,0 +1,17 @@
[project]
name = "ml-course-project"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"jupyter>=1.1.1",
"matplotlib>=3.10.8",
"numpy>=2.4.1",
"openai>=2.15.0",
"pandas>=2.3.3",
"python-dotenv>=1.2.1",
"scikit-learn>=1.8.0",
"seaborn>=0.13.2",
"streamlit>=1.52.2",
]

1
src/__init__.py Normal file
View File

@ -0,0 +1 @@
__all__ = []

31
src/data_pipeline.py Normal file
View File

@ -0,0 +1,31 @@
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
class CleanTransformer(BaseEstimator, TransformerMixin):
def __init__(self):
pass
def fit(self, X, y=None):
return self
def transform(self, X):
X = X.drop_duplicates()
return X
def build_preprocess(columns, target):
num_cols = [c for c in columns if c != target]
numeric = Pipeline(steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())])
ct = ColumnTransformer([("num", numeric, num_cols)], remainder="drop")
return ct, num_cols
def load_data(path):
df = pd.read_csv(path)
return df
def split_Xy(df, target):
X = df.drop(columns=[target])
y = df[target].astype(int)
return X, y

53
src/streamlit_app.py Normal file
View File

@ -0,0 +1,53 @@
import streamlit as st
import os
from dotenv import load_dotenv
from openai import OpenAI
# 加载环境变量
load_dotenv()
# 获取 API Key
api_key = os.getenv("DEEPSEEK_API_KEY")
st.title("DeepSeek Chat Demo")
if not api_key or api_key == "your-key-here":
st.error("请在 .env 文件中配置 DEEPSEEK_API_KEY")
st.stop()
# 初始化 DeepSeek 客户端
client = OpenAI(
api_key=api_key,
base_url="https://api.deepseek.com"
)
# 初始化聊天历史
if "messages" not in st.session_state:
st.session_state.messages = []
# 显示聊天历史
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# 接收用户输入
if prompt := st.chat_input("What is up?"):
# 添加用户消息到历史
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# 获取回复
with st.chat_message("assistant"):
stream = client.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": m["role"], "content": m["content"]}
for m in st.session_state.messages
],
stream=True,
)
response = st.write_stream(stream)
# 添加助手消息到历史
st.session_state.messages.append({"role": "assistant", "content": response})

28
src/train_baseline.py Normal file
View File

@ -0,0 +1,28 @@
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from src.data_pipeline import load_data, CleanTransformer, build_preprocess, split_Xy
def main():
df = load_data("creditcard.csv")
target = "Class"
cleaner = CleanTransformer()
df = cleaner.transform(df)
ct, num_cols = build_preprocess(df.columns.tolist(), target)
X, y = split_Xy(df, target)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
clf = LogisticRegression(max_iter=1000, class_weight="balanced", n_jobs=1)
pipe = Pipeline(steps=[("preprocess", ct), ("clf", clf)])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
y_proba = pipe.predict_proba(X_test)[:, 1]
report = classification_report(y_test, y_pred, digits=4)
auc = roc_auc_score(y_test, y_proba)
print("Classification Report")
print(report)
print("ROC-AUC", round(auc, 4))
if __name__ == "__main__":
main()

2598
uv.lock generated Normal file

File diff suppressed because it is too large Load Diff

3
uv.toml Normal file
View File

@ -0,0 +1,3 @@
[[index]]
url = "https://mirrors.aliyun.com/pypi/simple/"
default = true

BIN
完成记录.docx Normal file

Binary file not shown.

BIN
课设要求.docx Normal file

Binary file not shown.