import streamlit as st import pandas as pd import sys import os # 添加项目根目录到Python路径 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) from src.agent import agent, get_agent from src.data import load_data, preprocess_data, split_data from src.models import train_model, save_model, load_model, compare_models # 设置页面配置 st.set_page_config( page_title="垃圾短信分类器", page_icon="�️", layout="wide", initial_sidebar_state="expanded" ) # 自定义CSS - 欧洲中世纪风格 st.markdown("""""", unsafe_allow_html=True) # 应用标题 - 中世纪风格 st.markdown("""

⚔️ 中世纪垃圾短信分类器

保护您的通信，抵御垃圾信息的入侵

""", unsafe_allow_html=True) st.markdown("---") # 侧边栏 - 中世纪风格 with st.sidebar: st.markdown("""

🛡️ 骑士工坊

系统配置

""", unsafe_allow_html=True) # 模型选择 - 中世纪风格 st.markdown("""

⚔️ 选择武器

选择用于抵御垃圾信息的武器

""", unsafe_allow_html=True) model_option = st.selectbox( label="", options=["lightgbm", "logistic_regression"], index=0, format_func=lambda x: "圣光使者 (LightGBM)" if x == "lightgbm" else "智慧之剑 (Logistic Regression)" ) # 语言选择 - 中世纪风格 st.markdown("""

📜 选择语言

选择预言师的语言

""", unsafe_allow_html=True) lang_option = st.selectbox( label="", options=["中文", "英文"], index=0 ) # 系统说明 - 中世纪风格 st.markdown("---") st.markdown("""

🏰 关于城堡

""", unsafe_allow_html=True) st.markdown("""

🛡️ 城堡防御系统

这是一座由现代魔法和古老智慧构建的防御城堡：

💫 使用圣光使者 (LightGBM) 和智慧之剑 (Logistic Regression) 守护
🧙 由DeepSeek预言师提供智慧解释
🤖 通过魔法使者 (Agent) 整合所有力量

保护您的通信不受垃圾信息的侵袭！

""", unsafe_allow_html=True) # 主内容区域 - 中世纪风格 col1, col2 = st.columns([1, 1], gap="large") with col1: # 短信输入 - 中世纪风格 st.markdown("""

📜 信件输入

输入需要检查的信件内容

""", unsafe_allow_html=True) # 单条短信输入 sms_input = st.text_area( label="", height=200, placeholder="例如：WINNER!! As a valued network customer you have been selected to receivea £900 prize reward!", help="输入需要分类的短信内容" ) # 分类按钮 - 中世纪风格 classify_button = st.button( "⚔️ 开始检查", type="primary", use_container_width=True, disabled=sms_input.strip() == "" ) # 批量上传功能 - 中世纪风格 st.markdown("---") st.markdown("""

📦 批量检查

上传多封信件进行批量检查

""", unsafe_allow_html=True) uploaded_file = st.file_uploader( label="", type=["csv"], help="上传包含短信文本的CSV文件（需要包含text列）" ) # 模型训练功能（可选） - 中世纪风格 with st.expander("🔧 锻造武器", expanded=False): st.markdown("""

铁匠工坊

重新锻造您的武器，提升防御能力

""", unsafe_allow_html=True) if st.button("⚒️ 重新锻造武器"): with st.spinner("🔨 铁匠正在锻造武器..."): try: # 加载和预处理数据 df = load_data("../data/spam.csv") processed_df = preprocess_data(df) train_df, test_df = split_data(processed_df) # 训练模型 model, params = train_model(train_df, model_type=model_option) save_model(model, model_option) st.markdown("""

✨ 武器锻造完成！

您的 {} 已准备好进行战斗！

""" .format("圣光使者 (LightGBM)" if model_option == "lightgbm" else "智慧之剑 (Logistic Regression)"), unsafe_allow_html=True) except Exception as e: st.markdown("""

❌ 锻造失败！

铁匠遇到了问题：{}

""" .format(e), unsafe_allow_html=True) with col2: # 分类结果显示 st.header("分类结果") # 单条短信分类结果 if classify_button and sms_input.strip(): with st.spinner("正在分类..."): try: # 使用Agent进行分类和解释 result = agent.classify_and_explain(sms_input) # 显示分类结果 st.subheader("📋 分类标签") # 根据标签显示不同的样式 - 中世纪风格 if result['classification']['label'] == "spam": st.markdown("""

⚠️ 这是一封**垃圾信件**！

建议您谨慎对待此信件！

""", unsafe_allow_html=True) else: st.markdown("""

✅ 这是一封**正常信件**

此信件安全，可以放心阅读！

""", unsafe_allow_html=True) # 显示概率 - 中世纪风格 st.markdown("""

📊 预言概率

""", unsafe_allow_html=True) prob_df = pd.DataFrame.from_dict( result['classification']['probability'], orient='index', columns=['概率'] ) prob_df.index = ['垃圾信件', '正常信件'] if lang_option == '中文' else ['Spam', 'Ham'] st.bar_chart(prob_df) # 显示详细结果 - 中世纪风格 st.markdown("""

� 详细预言

""", unsafe_allow_html=True) with st.expander("查看详细分类结果", expanded=True): st.json(result['classification'], expanded=False) # 显示解释和建议 - 中世纪风格 st.markdown("""

� 预言师的解释

""", unsafe_allow_html=True) with st.expander("查看预言解释", expanded=True): st.markdown("""

📝 内容摘要：{}

⚖️ 预言原因：{}

🔮 可信度：{} - {}

""" .format( result['explanation']['content_summary'], result['explanation']['classification_reason'], result['explanation']['confidence_level'], result['explanation']['confidence_explanation'] ), unsafe_allow_html=True) st.markdown("""

💡 行动建议

""", unsafe_allow_html=True) suggestion_html = """

{suggestion}

""" st.markdown(suggestion_html, unsafe_allow_html=True) except Exception as e: st.markdown("""

❌ 预言失败！

预言师遇到了问题：{}

""" .format(e), unsafe_allow_html=True) # 批量分类结果 - 中世纪风格 if uploaded_file is not None: with st.spinner("🧙‍♂️ 预言师正在批量解析信件..."): try: # 读取上传的文件 df = pd.read_csv(uploaded_file) if "text" not in df.columns: st.markdown("""

❌ 预言失败！

信件文件必须包含'text'列

""", unsafe_allow_html=True) else: # 限制处理数量 max_rows = 100 if len(df) > max_rows: st.markdown("""

⚠️ 警告

信件文件包含 {len(df)} 封信件，预言师将只解析前 {max_rows} 封

""", unsafe_allow_html=True) df = df.head(max_rows) # 批量分类 results = [] for text in df["text"].tolist(): result = agent.classify_and_explain(text) results.append({ "text": text, "label": result['classification']['label'], "spam_probability": result['classification']['probability']['spam'], "ham_probability": result['classification']['probability']['ham'], "content_summary": result['explanation']['content_summary'], "classification_reason": result['explanation']['classification_reason'] }) # 转换为DataFrame results_df = pd.DataFrame(results) # 显示结果统计 - 中世纪风格 st.markdown("""

📊 预言统计

""", unsafe_allow_html=True) label_counts = results_df["label"].value_counts() label_counts.index = label_counts.index.map({"spam": "垃圾信件", "ham": "正常信件"}) st.bar_chart(label_counts) # 显示结果表格 - 中世纪风格 st.markdown("""

� 预言结果

""", unsafe_allow_html=True) st.dataframe( results_df, use_container_width=True, column_config={ "text": st.column_config.TextColumn("信件内容", width="medium"), "label": st.column_config.TextColumn("预言标签"), "spam_probability": st.column_config.ProgressColumn( "垃圾信件概率", format="%.2f", min_value=0.0, max_value=1.0 ), "ham_probability": st.column_config.ProgressColumn( "正常信件概率", format="%.2f", min_value=0.0, max_value=1.0 ), "content_summary": st.column_config.TextColumn("内容摘要", width="medium"), "classification_reason": st.column_config.TextColumn("预言原因", width="medium") } ) # 下载结果 - 中世纪风格 st.markdown("""

💾 保存预言

""", unsafe_allow_html=True) csv = results_df.to_csv(index=False).encode('utf-8') st.download_button( label="📄 下载预言结果", data=csv, file_name="spam_classification_results.csv", mime="text/csv", use_container_width=True ) except Exception as e: st.markdown("""

❌ 预言失败！

预言师遇到了问题：{}

""" .format(e), unsafe_allow_html=True) # 页脚 - 中世纪风格 st.markdown("---") st.markdown("""

🏰 中世纪垃圾短信防御城堡

保护您的通信不受垃圾信息的侵袭！

""", unsafe_allow_html=True)