Group02-notes/note_organizer.py
st2411020204 ea47c7ad09 feat(note): 添加标签系统和内容提炼功能
- 实现笔记标签功能,支持添加、编辑和按标签筛选
- 新增内容提炼功能,可自动生成摘要和关键词
- 添加批量提炼功能,支持一键处理所有笔记
- 在统计面板中增加标签统计信息
- 更新笔记数据结构和UI以支持新功能
2026-01-08 15:41:08 +08:00

596 lines
25 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import streamlit as st
from datetime import datetime
import json
import os
import re
from collections import Counter
# 1. 页面配置
st.set_page_config(page_title="学习笔记整理器", page_icon="📚", layout="wide")
st.title("📚 学习笔记整理器")
# 文本提炼函数
def extract_key_content(content, max_sentences=3, max_keywords=5):
"""提炼笔记的关键内容"""
if not content or len(content.strip()) < 10:
return {"summary": "内容太短,无法提炼", "keywords": []}
# 分句
sentences = re.split(r'[。!?\n]', content)
sentences = [s.strip() for s in sentences if s.strip()]
# 提取关键句子(基于句子长度和关键词密度)
sentence_scores = []
for sentence in sentences:
score = len(sentence)
# 检查是否包含常见关键词
keywords = ['重要', '关键', '核心', '主要', '注意', '必须', '应该', '需要', '总结', '结论']
for kw in keywords:
if kw in sentence:
score += 20
sentence_scores.append((sentence, score))
# 按分数排序,取前几句
sentence_scores.sort(key=lambda x: x[1], reverse=True)
top_sentences = [s[0] for s in sentence_scores[:max_sentences]]
# 提取关键词
words = re.findall(r'[\u4e00-\u9fa5]{2,}', content)
word_freq = Counter(words)
# 过滤常见词
stop_words = ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '我们', '你们', '他们']
keywords = [w for w, c in word_freq.most_common(max_keywords * 2) if w not in stop_words][:max_keywords]
return {
"summary": "".join(top_sentences) + "" if top_sentences else "无法提取摘要",
"keywords": keywords
}
# 2. 数据持久化函数
def save_notes():
"""保存笔记到 JSON 文件"""
data = {
"notes": st.session_state.notes,
"categories": st.session_state.categories
}
with open('notes_data.json', 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
def load_notes():
"""从 JSON 文件加载笔记"""
if os.path.exists('notes_data.json'):
with open('notes_data.json', 'r', encoding='utf-8') as f:
data = json.load(f)
st.session_state.notes = data.get("notes", [])
st.session_state.categories = data.get("categories", ["编程", "数学", "英语", "其他"])
# 3. 初始化数据
if "notes" not in st.session_state:
st.session_state.notes = []
load_notes()
if "categories" not in st.session_state:
st.session_state.categories = ["编程", "数学", "英语", "其他"]
load_notes()
# 初始化编辑状态
if "editing_note" not in st.session_state:
st.session_state.editing_note = None
# 4. 左侧侧边栏 - 添加笔记和管理类型
with st.sidebar:
st.header("📝 添加笔记")
# 笔记标题
title = st.text_input("标题", placeholder="输入笔记标题")
# 分类选择(支持自定义)
category = st.selectbox(
"选择分类",
st.session_state.categories,
help="选择笔记的分类"
)
# 自定义分类输入
new_category = st.text_input(
"或创建新分类",
placeholder="输入新分类名称,按回车添加",
key="new_category_input"
)
# 添加新分类按钮
if st.button(" 添加新分类"):
if new_category and new_category not in st.session_state.categories:
st.session_state.categories.append(new_category)
save_notes()
st.success(f"已添加新分类:{new_category}")
st.rerun()
elif new_category in st.session_state.categories:
st.warning("该分类已存在")
# 笔记内容
content = st.text_area(
"笔记内容",
placeholder="输入笔记内容...",
height=200
)
# 标签输入
tags_input = st.text_input(
"标签",
placeholder="输入标签,用逗号分隔 (例如: 重要, 复习, 考试)",
help="为笔记添加多个标签,便于分类和搜索"
)
tags = [tag.strip() for tag in tags_input.split(",") if tag.strip()]
# 实时预览提炼结果
if content and len(content.strip()) > 10:
with st.expander("🔍 预览提炼结果", expanded=False):
extracted = extract_key_content(content)
st.markdown("**摘要预览:**")
st.info(extracted['summary'])
st.markdown("**关键词预览:**")
if extracted['keywords']:
keywords_html = " ".join([f'<span style="background-color: #fff3e0; color: #e65100; padding: 2px 8px; border-radius: 4px; margin-right: 5px; font-size: 12px;">{kw}</span>' for kw in extracted['keywords']])
st.markdown(keywords_html, unsafe_allow_html=True)
# 添加按钮
if st.button("添加笔记", type="primary"):
if title and content:
note = {
"id": len(st.session_state.notes) + 1,
"title": title,
"category": category,
"content": content,
"tags": tags,
"created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
st.session_state.notes.append(note)
save_notes()
st.success("笔记添加成功!")
else:
st.warning("请填写标题和内容")
st.divider()
# 显示所有分类
st.subheader("📂 所有分类")
for cat in st.session_state.categories:
st.markdown(f"- {cat}")
# 5. 主界面 - 笔记展示和搜索
st.header("📖 我的笔记")
# 搜索和排序区域
col1, col2, col3, col4, col5 = st.columns([2, 1, 1, 1, 1])
with col1:
# 搜索关键词
search_query = st.text_input(
"🔍 搜索关键词",
placeholder="输入关键词...",
help="支持模糊搜索"
)
with col2:
# 搜索范围选择
search_scope = st.selectbox(
"搜索范围",
["全部", "仅标题", "仅内容", "仅类型", "仅标签"],
help="选择搜索的范围"
)
with col3:
# 分类筛选
filter_category = st.selectbox(
"筛选分类",
["全部"] + st.session_state.categories,
help="按分类筛选笔记"
)
with col4:
# 标签筛选
all_tags = list(set(tag for note in st.session_state.notes for tag in note.get("tags", [])))
filter_tag = st.selectbox(
"筛选标签",
["全部"] + sorted(all_tags),
help="按标签筛选笔记"
)
with col5:
# 排序方式
sort_by = st.selectbox(
"排序方式",
["最新", "最旧", "标题A-Z", "标题Z-A", "类型A-Z", "类型Z-A"],
help="选择笔记的排序方式"
)
# 批量提炼功能
st.divider()
col_batch1, col_batch2 = st.columns([3, 1])
with col_batch1:
st.markdown("### ✨ 批量提炼")
with col_batch2:
if st.button("🔄 提炼所有笔记", key="batch_extract"):
for note in st.session_state.notes:
if len(note['content'].strip()) > 10:
extracted = extract_key_content(note['content'])
st.session_state[f'extracted_{note["id"]}'] = extracted
st.success(f"已提炼 {len(st.session_state.notes)} 条笔记的关键内容!")
st.rerun()
# 6. 显示笔记
if st.session_state.notes:
# 筛选和搜索逻辑
filtered_notes = []
for note in st.session_state.notes:
# 分类筛选
category_match = filter_category == "全部" or note["category"] == filter_category
# 标签筛选
tag_match = filter_tag == "全部" or filter_tag in note.get("tags", [])
# 关键词搜索(根据选择范围)
search_match = True
if search_query:
query = search_query.lower()
if search_scope == "全部":
search_match = (
query in note["title"].lower() or
query in note["content"].lower() or
query in note["category"].lower() or
any(query in tag.lower() for tag in note.get("tags", []))
)
elif search_scope == "仅标题":
search_match = query in note["title"].lower()
elif search_scope == "仅内容":
search_match = query in note["content"].lower()
elif search_scope == "仅类型":
search_match = query in note["category"].lower()
elif search_scope == "仅标签":
search_match = any(query in tag.lower() for tag in note.get("tags", []))
if category_match and tag_match and search_match:
filtered_notes.append(note)
# 排序逻辑
if sort_by == "最新":
filtered_notes.sort(key=lambda x: x["created_at"], reverse=True)
elif sort_by == "最旧":
filtered_notes.sort(key=lambda x: x["created_at"])
elif sort_by == "标题A-Z":
filtered_notes.sort(key=lambda x: x["title"].lower())
elif sort_by == "标题Z-A":
filtered_notes.sort(key=lambda x: x["title"].lower(), reverse=True)
elif sort_by == "类型A-Z":
filtered_notes.sort(key=lambda x: x["category"].lower())
elif sort_by == "类型Z-A":
filtered_notes.sort(key=lambda x: x["category"].lower(), reverse=True)
# 显示筛选结果
if filtered_notes:
st.info(f"找到 {len(filtered_notes)} 条笔记")
# 使用两列布局展示笔记
cols = st.columns(2)
for i, note in enumerate(filtered_notes):
with cols[i % 2]:
# 检查是否正在编辑此笔记
if st.session_state.editing_note == note['id']:
with st.expander(f"✏️ 编辑: {note['title']}", expanded=True):
# 编辑表单
edit_title = st.text_input("标题", value=note['title'], key=f"edit_title_{note['id']}")
edit_category = st.selectbox(
"分类",
st.session_state.categories,
index=st.session_state.categories.index(note['category']) if note['category'] in st.session_state.categories else 0,
key=f"edit_category_{note['id']}"
)
edit_content = st.text_area("内容", value=note['content'], height=200, key=f"edit_content_{note['id']}")
# 编辑标签
current_tags = ", ".join(note.get("tags", []))
edit_tags = st.text_input(
"标签",
value=current_tags,
key=f"edit_tags_{note['id']}",
help="输入标签,用逗号分隔"
)
edit_tags_list = [tag.strip() for tag in edit_tags.split(",") if tag.strip()]
# 保存和取消按钮
col_save, col_cancel = st.columns(2)
with col_save:
if st.button("💾 保存修改", key=f"save_{note['id']}", type="primary"):
note['title'] = edit_title
note['category'] = edit_category
note['content'] = edit_content
note['tags'] = edit_tags_list
note['updated_at'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
st.session_state.editing_note = None
save_notes()
st.success("笔记已更新!")
st.rerun()
with col_cancel:
if st.button("❌ 取消", key=f"cancel_{note['id']}"):
st.session_state.editing_note = None
st.rerun()
else:
# 正常显示笔记
with st.expander(f"📌 {note['title']}", expanded=False):
# 显示笔记元信息
st.markdown(f"**分类:** {note['category']}")
# 显示标签
if note.get("tags"):
tags_html = " ".join([f'<span style="background-color: #e1f5fe; color: #01579b; padding: 2px 8px; border-radius: 4px; margin-right: 5px; font-size: 12px;">{tag}</span>' for tag in note["tags"]])
st.markdown(f"**标签:** {tags_html}", unsafe_allow_html=True)
st.markdown(f"**创建时间:** {note['created_at']}")
if 'updated_at' in note:
st.markdown(f"**更新时间:** {note['updated_at']}")
st.markdown("---")
# 提炼内容按钮
if st.button("✨ 提炼内容", key=f"extract_{note['id']}"):
extracted = extract_key_content(note['content'])
st.session_state[f'extracted_{note["id"]}'] = extracted
# 显示提炼结果
if f'extracted_{note["id"]}' in st.session_state:
extracted = st.session_state[f'extracted_{note["id"]}']
with st.expander("📝 提炼结果", expanded=True):
st.markdown("**摘要:**")
st.info(extracted['summary'])
st.markdown("**关键词:**")
if extracted['keywords']:
keywords_html = " ".join([f'<span style="background-color: #fff3e0; color: #e65100; padding: 2px 8px; border-radius: 4px; margin-right: 5px; font-size: 12px;">{kw}</span>' for kw in extracted['keywords']])
st.markdown(keywords_html, unsafe_allow_html=True)
else:
st.warning("未找到关键词")
# 显示笔记内容
st.markdown(note['content'])
# 操作按钮
col_edit, col_delete = st.columns(2)
with col_edit:
if st.button("✏️ 编辑", key=f"edit_{note['id']}"):
st.session_state.editing_note = note['id']
st.rerun()
with col_delete:
if st.button("🗑️ 删除", key=f"delete_{note['id']}"):
st.session_state.notes.remove(note)
st.session_state.editing_note = None
save_notes()
st.rerun()
else:
st.warning("没有找到匹配的笔记")
else:
st.info("还没有笔记,请在左侧添加您的第一条笔记!")
# 7. 统计信息
st.divider()
st.subheader("📊 统计信息")
# 笔记和分类统计
col_stats1, col_stats2 = st.columns(2)
with col_stats1:
st.metric("总笔记数", len(st.session_state.notes))
with col_stats2:
all_tags = list(set(tag for note in st.session_state.notes for tag in note.get("tags", [])))
st.metric("总标签数", len(all_tags))
# 分类统计
st.markdown("### 分类统计")
cat_stat_cols = st.columns(min(5, len(st.session_state.categories)))
for i, cat in enumerate(st.session_state.categories):
if i < len(cat_stat_cols):
with cat_stat_cols[i]:
count = len([n for n in st.session_state.notes if n["category"] == cat])
st.metric(cat, count)
# 标签统计
if all_tags:
st.markdown("### 标签统计")
tag_stat_cols = st.columns(min(5, len(all_tags)))
for i, tag in enumerate(sorted(all_tags)):
if i < len(tag_stat_cols):
with tag_stat_cols[i]:
count = len([n for n in st.session_state.notes if tag in n.get("tags", [])])
st.metric(tag, count)
# 8. 数据管理
st.divider()
st.subheader("💾 数据管理")
# 导入JSON文件
st.markdown("### 📥 导入笔记")
tab_json, tab_txt = st.tabs(["JSON文件", "文本文件"])
with tab_json:
uploaded_file = st.file_uploader("选择JSON文件", type=['json'], help="选择之前导出的JSON文件", key="json_uploader")
if uploaded_file is not None:
try:
# 读取并解析JSON文件
import_data = json.load(uploaded_file)
# 显示导入预览
st.info(f"文件包含 {len(import_data.get('notes', []))} 条笔记")
# 显示笔记列表预览
with st.expander("📋 查看文件内容", expanded=False):
for i, note in enumerate(import_data.get('notes', [])[:5]):
st.markdown(f"**{i+1}. {note.get('title', '无标题')}**")
st.markdown(f" 分类: {note.get('category', '未知')}")
st.markdown(f" 时间: {note.get('created_at', '未知')}")
st.markdown("---")
if len(import_data.get('notes', [])) > 5:
st.info(f"还有 {len(import_data.get('notes', [])) - 5} 条笔记...")
# 导入选项
col_merge, col_replace = st.columns(2)
with col_merge:
if st.button("🔄 合并导入", type="primary", key="merge_json"):
# 合并导入:保留现有笔记,添加新笔记
new_notes = import_data.get('notes', [])
for note in new_notes:
note['id'] = len(st.session_state.notes) + 1
st.session_state.notes.append(note)
# 合并分类
new_categories = import_data.get('categories', [])
for cat in new_categories:
if cat not in st.session_state.categories:
st.session_state.categories.append(cat)
save_notes()
st.success(f"成功导入 {len(new_notes)} 条笔记!")
st.rerun()
with col_replace:
if st.button("⚠️ 替换导入", key="replace_json"):
# 替换导入:清空现有笔记,导入新笔记
st.session_state.notes = import_data.get('notes', [])
st.session_state.categories = import_data.get('categories', ["编程", "数学", "英语", "其他"])
st.session_state.editing_note = None
save_notes()
st.success(f"成功替换为 {len(st.session_state.notes)} 条笔记!")
st.rerun()
except Exception as e:
st.error(f"文件解析失败: {str(e)}")
with tab_txt:
# 导入文本文件
txt_file = st.file_uploader("选择文本文件", type=['txt'], help="选择要导入的文本文件", key="txt_uploader")
if txt_file is not None:
try:
# 读取文本文件内容
content = txt_file.read().decode('utf-8')
# 显示文件内容预览
st.info(f"文件大小: {len(content)} 字符")
with st.expander("📋 查看文件内容", expanded=False):
st.text(content[:1000])
if len(content) > 1000:
st.info(f"还有 {len(content) - 1000} 个字符...")
# 设置导入参数
col_title, col_cat = st.columns(2)
with col_title:
import_title = st.text_input("笔记标题", value=txt_file.name.replace('.txt', ''), help="为导入的笔记设置标题")
with col_cat:
import_category = st.selectbox("笔记分类", st.session_state.categories)
# 导入按钮
if st.button("📥 导入文本文件", type="primary"):
note = {
"id": len(st.session_state.notes) + 1,
"title": import_title,
"category": import_category,
"content": content,
"created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
st.session_state.notes.append(note)
save_notes()
st.success(f"成功导入笔记:{import_title}")
st.rerun()
except Exception as e:
st.error(f"文件读取失败: {str(e)}")
st.divider()
# 导出功能
st.markdown("### 📤 导出笔记")
export_tab_json, export_tab_txt = st.tabs(["JSON格式", "文本格式"])
with export_tab_json:
if st.button("📤 导出为JSON", key="export_json"):
st.download_button(
label="下载 JSON 文件",
data=json.dumps({"notes": st.session_state.notes, "categories": st.session_state.categories}, ensure_ascii=False, indent=2),
file_name=f"notes_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
mime="application/json"
)
with export_tab_txt:
# 导出选项
export_option = st.radio("导出选项", ["导出所有笔记", "导出单个笔记"])
if export_option == "导出所有笔记":
if st.button("📤 导出所有笔记为文本", type="primary", key="export_all_txt"):
# 合并所有笔记为一个文本文件
all_content = "=" * 50 + "\n"
all_content += f"学习笔记备份 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
all_content += "=" * 50 + "\n\n"
for note in st.session_state.notes:
all_content += "-" * 40 + "\n"
all_content += f"标题: {note['title']}\n"
all_content += f"分类: {note['category']}\n"
all_content += f"创建时间: {note['created_at']}\n"
all_content += "-" * 40 + "\n\n"
all_content += note['content'] + "\n\n"
st.download_button(
label="下载文本文件",
data=all_content,
file_name=f"notes_all_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
mime="text/plain"
)
else:
# 导出单个笔记
if st.session_state.notes:
note_titles = [f"{note['id']}. {note['title']}" for note in st.session_state.notes]
selected_note = st.selectbox("选择要导出的笔记", note_titles)
if st.button("📤 导出选中笔记", type="primary", key="export_single_txt"):
# 找到选中的笔记
note_id = int(selected_note.split('.')[0])
note = next((n for n in st.session_state.notes if n['id'] == note_id), None)
if note:
# 格式化笔记内容
note_content = "=" * 50 + "\n"
note_content += f"标题: {note['title']}\n"
note_content += f"分类: {note['category']}\n"
note_content += f"创建时间: {note['created_at']}\n"
if 'updated_at' in note:
note_content += f"更新时间: {note['updated_at']}\n"
note_content += "=" * 50 + "\n\n"
note_content += note['content']
st.download_button(
label="下载文本文件",
data=note_content,
file_name=f"note_{note['id']}_{note['title']}.txt",
mime="text/plain"
)
else:
st.info("没有笔记可导出")
st.divider()
# 清空功能
col_clear = st.columns(1)[0]
with col_clear:
if st.button("🗑️ 清空所有笔记"):
if st.session_state.notes:
st.warning("⚠️ 此操作不可恢复!")
if st.button("确认清空"):
st.session_state.notes = []
st.session_state.editing_note = None
save_notes()
st.success("所有笔记已清空!")
st.rerun()
else:
st.info("没有笔记可清空")