chore: 初始化项目结构与基线模型
This commit is contained in:
commit
b077bae62d
1
.env.example
Normal file
1
.env.example
Normal file
@ -0,0 +1 @@
|
|||||||
|
DEEPSEEK_API_KEY=sk-aae8c48b3c654e9983dd2a8d612861d3
|
||||||
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
.env
|
||||||
|
.venv/
|
||||||
|
__pycache__/
|
||||||
|
.ipynb_checkpoints/
|
||||||
|
.DS_Store
|
||||||
1
.python-version
Normal file
1
.python-version
Normal file
@ -0,0 +1 @@
|
|||||||
|
3.12
|
||||||
BIN
.~完成记录.docx
Normal file
BIN
.~完成记录.docx
Normal file
Binary file not shown.
BIN
.~课设要求.docx
Normal file
BIN
.~课设要求.docx
Normal file
Binary file not shown.
284808
creditcard.csv
Normal file
284808
creditcard.csv
Normal file
File diff suppressed because it is too large
Load Diff
6
main.py
Normal file
6
main.py
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
def main():
|
||||||
|
print("Hello from ml-course-project!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
17
pyproject.toml
Normal file
17
pyproject.toml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
[project]
|
||||||
|
name = "ml-course-project"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Add your description here"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.12"
|
||||||
|
dependencies = [
|
||||||
|
"jupyter>=1.1.1",
|
||||||
|
"matplotlib>=3.10.8",
|
||||||
|
"numpy>=2.4.1",
|
||||||
|
"openai>=2.15.0",
|
||||||
|
"pandas>=2.3.3",
|
||||||
|
"python-dotenv>=1.2.1",
|
||||||
|
"scikit-learn>=1.8.0",
|
||||||
|
"seaborn>=0.13.2",
|
||||||
|
"streamlit>=1.52.2",
|
||||||
|
]
|
||||||
1
src/__init__.py
Normal file
1
src/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
__all__ = []
|
||||||
31
src/data_pipeline.py
Normal file
31
src/data_pipeline.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.base import BaseEstimator, TransformerMixin
|
||||||
|
from sklearn.impute import SimpleImputer
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
from sklearn.compose import ColumnTransformer
|
||||||
|
from sklearn.pipeline import Pipeline
|
||||||
|
|
||||||
|
class CleanTransformer(BaseEstimator, TransformerMixin):
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
def fit(self, X, y=None):
|
||||||
|
return self
|
||||||
|
def transform(self, X):
|
||||||
|
X = X.drop_duplicates()
|
||||||
|
return X
|
||||||
|
|
||||||
|
def build_preprocess(columns, target):
|
||||||
|
num_cols = [c for c in columns if c != target]
|
||||||
|
numeric = Pipeline(steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())])
|
||||||
|
ct = ColumnTransformer([("num", numeric, num_cols)], remainder="drop")
|
||||||
|
return ct, num_cols
|
||||||
|
|
||||||
|
def load_data(path):
|
||||||
|
df = pd.read_csv(path)
|
||||||
|
return df
|
||||||
|
|
||||||
|
def split_Xy(df, target):
|
||||||
|
X = df.drop(columns=[target])
|
||||||
|
y = df[target].astype(int)
|
||||||
|
return X, y
|
||||||
53
src/streamlit_app.py
Normal file
53
src/streamlit_app.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import streamlit as st
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# 加载环境变量
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# 获取 API Key
|
||||||
|
api_key = os.getenv("DEEPSEEK_API_KEY")
|
||||||
|
|
||||||
|
st.title("DeepSeek Chat Demo")
|
||||||
|
|
||||||
|
if not api_key or api_key == "your-key-here":
|
||||||
|
st.error("请在 .env 文件中配置 DEEPSEEK_API_KEY")
|
||||||
|
st.stop()
|
||||||
|
|
||||||
|
# 初始化 DeepSeek 客户端
|
||||||
|
client = OpenAI(
|
||||||
|
api_key=api_key,
|
||||||
|
base_url="https://api.deepseek.com"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 初始化聊天历史
|
||||||
|
if "messages" not in st.session_state:
|
||||||
|
st.session_state.messages = []
|
||||||
|
|
||||||
|
# 显示聊天历史
|
||||||
|
for message in st.session_state.messages:
|
||||||
|
with st.chat_message(message["role"]):
|
||||||
|
st.markdown(message["content"])
|
||||||
|
|
||||||
|
# 接收用户输入
|
||||||
|
if prompt := st.chat_input("What is up?"):
|
||||||
|
# 添加用户消息到历史
|
||||||
|
st.session_state.messages.append({"role": "user", "content": prompt})
|
||||||
|
with st.chat_message("user"):
|
||||||
|
st.markdown(prompt)
|
||||||
|
|
||||||
|
# 获取回复
|
||||||
|
with st.chat_message("assistant"):
|
||||||
|
stream = client.chat.completions.create(
|
||||||
|
model="deepseek-chat",
|
||||||
|
messages=[
|
||||||
|
{"role": m["role"], "content": m["content"]}
|
||||||
|
for m in st.session_state.messages
|
||||||
|
],
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
response = st.write_stream(stream)
|
||||||
|
|
||||||
|
# 添加助手消息到历史
|
||||||
|
st.session_state.messages.append({"role": "assistant", "content": response})
|
||||||
28
src/train_baseline.py
Normal file
28
src/train_baseline.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import classification_report, roc_auc_score
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
from sklearn.pipeline import Pipeline
|
||||||
|
from src.data_pipeline import load_data, CleanTransformer, build_preprocess, split_Xy
|
||||||
|
|
||||||
|
def main():
|
||||||
|
df = load_data("creditcard.csv")
|
||||||
|
target = "Class"
|
||||||
|
cleaner = CleanTransformer()
|
||||||
|
df = cleaner.transform(df)
|
||||||
|
ct, num_cols = build_preprocess(df.columns.tolist(), target)
|
||||||
|
X, y = split_Xy(df, target)
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
|
||||||
|
clf = LogisticRegression(max_iter=1000, class_weight="balanced", n_jobs=1)
|
||||||
|
pipe = Pipeline(steps=[("preprocess", ct), ("clf", clf)])
|
||||||
|
pipe.fit(X_train, y_train)
|
||||||
|
y_pred = pipe.predict(X_test)
|
||||||
|
y_proba = pipe.predict_proba(X_test)[:, 1]
|
||||||
|
report = classification_report(y_test, y_pred, digits=4)
|
||||||
|
auc = roc_auc_score(y_test, y_proba)
|
||||||
|
print("Classification Report")
|
||||||
|
print(report)
|
||||||
|
print("ROC-AUC", round(auc, 4))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
3
uv.toml
Normal file
3
uv.toml
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
[[index]]
|
||||||
|
url = "https://mirrors.aliyun.com/pypi/simple/"
|
||||||
|
default = true
|
||||||
Loading…
Reference in New Issue
Block a user