chore: 初始化项目结构与基线模型
This commit is contained in:
commit
b077bae62d
1
.env.example
Normal file
1
.env.example
Normal file
@ -0,0 +1 @@
|
||||
DEEPSEEK_API_KEY=sk-aae8c48b3c654e9983dd2a8d612861d3
|
||||
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
.env
|
||||
.venv/
|
||||
__pycache__/
|
||||
.ipynb_checkpoints/
|
||||
.DS_Store
|
||||
1
.python-version
Normal file
1
.python-version
Normal file
@ -0,0 +1 @@
|
||||
3.12
|
||||
BIN
.~完成记录.docx
Normal file
BIN
.~完成记录.docx
Normal file
Binary file not shown.
BIN
.~课设要求.docx
Normal file
BIN
.~课设要求.docx
Normal file
Binary file not shown.
284808
creditcard.csv
Normal file
284808
creditcard.csv
Normal file
File diff suppressed because it is too large
Load Diff
6
main.py
Normal file
6
main.py
Normal file
@ -0,0 +1,6 @@
|
||||
def main():
|
||||
print("Hello from ml-course-project!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
17
pyproject.toml
Normal file
17
pyproject.toml
Normal file
@ -0,0 +1,17 @@
|
||||
[project]
|
||||
name = "ml-course-project"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"jupyter>=1.1.1",
|
||||
"matplotlib>=3.10.8",
|
||||
"numpy>=2.4.1",
|
||||
"openai>=2.15.0",
|
||||
"pandas>=2.3.3",
|
||||
"python-dotenv>=1.2.1",
|
||||
"scikit-learn>=1.8.0",
|
||||
"seaborn>=0.13.2",
|
||||
"streamlit>=1.52.2",
|
||||
]
|
||||
1
src/__init__.py
Normal file
1
src/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
__all__ = []
|
||||
31
src/data_pipeline.py
Normal file
31
src/data_pipeline.py
Normal file
@ -0,0 +1,31 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.base import BaseEstimator, TransformerMixin
|
||||
from sklearn.impute import SimpleImputer
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.pipeline import Pipeline
|
||||
|
||||
class CleanTransformer(BaseEstimator, TransformerMixin):
|
||||
def __init__(self):
|
||||
pass
|
||||
def fit(self, X, y=None):
|
||||
return self
|
||||
def transform(self, X):
|
||||
X = X.drop_duplicates()
|
||||
return X
|
||||
|
||||
def build_preprocess(columns, target):
|
||||
num_cols = [c for c in columns if c != target]
|
||||
numeric = Pipeline(steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())])
|
||||
ct = ColumnTransformer([("num", numeric, num_cols)], remainder="drop")
|
||||
return ct, num_cols
|
||||
|
||||
def load_data(path):
|
||||
df = pd.read_csv(path)
|
||||
return df
|
||||
|
||||
def split_Xy(df, target):
|
||||
X = df.drop(columns=[target])
|
||||
y = df[target].astype(int)
|
||||
return X, y
|
||||
53
src/streamlit_app.py
Normal file
53
src/streamlit_app.py
Normal file
@ -0,0 +1,53 @@
|
||||
import streamlit as st
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from openai import OpenAI
|
||||
|
||||
# 加载环境变量
|
||||
load_dotenv()
|
||||
|
||||
# 获取 API Key
|
||||
api_key = os.getenv("DEEPSEEK_API_KEY")
|
||||
|
||||
st.title("DeepSeek Chat Demo")
|
||||
|
||||
if not api_key or api_key == "your-key-here":
|
||||
st.error("请在 .env 文件中配置 DEEPSEEK_API_KEY")
|
||||
st.stop()
|
||||
|
||||
# 初始化 DeepSeek 客户端
|
||||
client = OpenAI(
|
||||
api_key=api_key,
|
||||
base_url="https://api.deepseek.com"
|
||||
)
|
||||
|
||||
# 初始化聊天历史
|
||||
if "messages" not in st.session_state:
|
||||
st.session_state.messages = []
|
||||
|
||||
# 显示聊天历史
|
||||
for message in st.session_state.messages:
|
||||
with st.chat_message(message["role"]):
|
||||
st.markdown(message["content"])
|
||||
|
||||
# 接收用户输入
|
||||
if prompt := st.chat_input("What is up?"):
|
||||
# 添加用户消息到历史
|
||||
st.session_state.messages.append({"role": "user", "content": prompt})
|
||||
with st.chat_message("user"):
|
||||
st.markdown(prompt)
|
||||
|
||||
# 获取回复
|
||||
with st.chat_message("assistant"):
|
||||
stream = client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=[
|
||||
{"role": m["role"], "content": m["content"]}
|
||||
for m in st.session_state.messages
|
||||
],
|
||||
stream=True,
|
||||
)
|
||||
response = st.write_stream(stream)
|
||||
|
||||
# 添加助手消息到历史
|
||||
st.session_state.messages.append({"role": "assistant", "content": response})
|
||||
28
src/train_baseline.py
Normal file
28
src/train_baseline.py
Normal file
@ -0,0 +1,28 @@
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import classification_report, roc_auc_score
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.pipeline import Pipeline
|
||||
from src.data_pipeline import load_data, CleanTransformer, build_preprocess, split_Xy
|
||||
|
||||
def main():
|
||||
df = load_data("creditcard.csv")
|
||||
target = "Class"
|
||||
cleaner = CleanTransformer()
|
||||
df = cleaner.transform(df)
|
||||
ct, num_cols = build_preprocess(df.columns.tolist(), target)
|
||||
X, y = split_Xy(df, target)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
|
||||
clf = LogisticRegression(max_iter=1000, class_weight="balanced", n_jobs=1)
|
||||
pipe = Pipeline(steps=[("preprocess", ct), ("clf", clf)])
|
||||
pipe.fit(X_train, y_train)
|
||||
y_pred = pipe.predict(X_test)
|
||||
y_proba = pipe.predict_proba(X_test)[:, 1]
|
||||
report = classification_report(y_test, y_pred, digits=4)
|
||||
auc = roc_auc_score(y_test, y_proba)
|
||||
print("Classification Report")
|
||||
print(report)
|
||||
print("ROC-AUC", round(auc, 4))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
3
uv.toml
Normal file
3
uv.toml
Normal file
@ -0,0 +1,3 @@
|
||||
[[index]]
|
||||
url = "https://mirrors.aliyun.com/pypi/simple/"
|
||||
default = true
|
||||
Loading…
Reference in New Issue
Block a user