Group02-notes/note_organizer.py

import streamlit as st
from datetime import datetime
import json
import os
import re
from collections import Counter

# 1. 页面配置
st.set_page_config(page_title="学习笔记整理器", page_icon="📚", layout="wide")
st.title("📚 学习笔记整理器")

# 文本提炼函数
def extract_key_content(content, max_sentences=3, max_keywords=5):
    """提炼笔记的关键内容"""
    if not content or len(content.strip()) < 10:
        return {"summary": "内容太短，无法提炼", "keywords": []}

    # 分句
    sentences = re.split(r'[。！？\n]', content)
    sentences = [s.strip() for s in sentences if s.strip()]

    # 提取关键句子（基于句子长度和关键词密度）
    sentence_scores = []
    for sentence in sentences:
        score = len(sentence)
        # 检查是否包含常见关键词
        keywords = ['重要', '关键', '核心', '主要', '注意', '必须', '应该', '需要', '总结', '结论']
        for kw in keywords:
            if kw in sentence:
                score += 20
        sentence_scores.append((sentence, score))

    # 按分数排序，取前几句
    sentence_scores.sort(key=lambda x: x[1], reverse=True)
    top_sentences = [s[0] for s in sentence_scores[:max_sentences]]

    # 提取关键词
    words = re.findall(r'[\u4e00-\u9fa5]{2,}', content)
    word_freq = Counter(words)
    # 过滤常见词
    stop_words = ['的', '是', '在', '有', '和', '了', '不', '也', '都', '这', '那', '就', '会', '要', '可', '以', '我', '你', '他', '她', '它', '我们', '你们', '他们']
    keywords = [w for w, c in word_freq.most_common(max_keywords * 2) if w not in stop_words][:max_keywords]

    return {
        "summary": "。".join(top_sentences) + "。" if top_sentences else "无法提取摘要",
        "keywords": keywords
    }

# 2. 数据持久化函数
def save_notes():
    """保存笔记到 JSON 文件"""
    data = {
        "notes": st.session_state.notes,
        "categories": st.session_state.categories
    }
    with open('notes_data.json', 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

def load_notes():
    """从 JSON 文件加载笔记"""
    if os.path.exists('notes_data.json'):
        with open('notes_data.json', 'r', encoding='utf-8') as f:
            data = json.load(f)
            st.session_state.notes = data.get("notes", [])
            st.session_state.categories = data.get("categories", ["编程", "数学", "英语", "其他"])

# 3. 初始化数据
if "notes" not in st.session_state:
    st.session_state.notes = []
    load_notes()

if "categories" not in st.session_state:
    st.session_state.categories = ["编程", "数学", "英语", "其他"]
    load_notes()

# 初始化编辑状态
if "editing_note" not in st.session_state:
    st.session_state.editing_note = None

# 4. 左侧侧边栏 - 添加笔记和管理类型
with st.sidebar:
    st.header("📝 添加笔记")

    # 笔记标题
    title = st.text_input("标题", placeholder="输入笔记标题")

    # 分类选择（支持自定义）
    category = st.selectbox(
        "选择分类",
        st.session_state.categories,
        help="选择笔记的分类"
    )

    # 自定义分类输入
    new_category = st.text_input(
        "或创建新分类",
        placeholder="输入新分类名称，按回车添加",
        key="new_category_input"
    )

    # 添加新分类按钮
    if st.button("➕ 添加新分类"):
        if new_category and new_category not in st.session_state.categories:
            st.session_state.categories.append(new_category)
            save_notes()
            st.success(f"已添加新分类：{new_category}")
            st.rerun()
        elif new_category in st.session_state.categories:
            st.warning("该分类已存在")

    # 笔记内容
    content = st.text_area(
        "笔记内容",
        placeholder="输入笔记内容...",
        height=200
    )

    # 标签输入
    tags_input = st.text_input(
        "标签",
        placeholder="输入标签，用逗号分隔 (例如: 重要, 复习, 考试)",
        help="为笔记添加多个标签，便于分类和搜索"
    )
    tags = [tag.strip() for tag in tags_input.split(",") if tag.strip()]

    # 实时预览提炼结果
    if content and len(content.strip()) > 10:
        with st.expander("🔍 预览提炼结果", expanded=False):
            extracted = extract_key_content(content)
            st.markdown("**摘要预览：**")
            st.info(extracted['summary'])
            st.markdown("**关键词预览：**")
            if extracted['keywords']:
                keywords_html = " ".join([f'<span style="background-color: #fff3e0; color: #e65100; padding: 2px 8px; border-radius: 4px; margin-right: 5px; font-size: 12px;">{kw}</span>' for kw in extracted['keywords']])
                st.markdown(keywords_html, unsafe_allow_html=True)

    # 添加按钮
    if st.button("添加笔记", type="primary"):
        if title and content:
            note = {
                "id": len(st.session_state.notes) + 1,
                "title": title,
                "category": category,
                "content": content,
                "tags": tags,
                "created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            }
            st.session_state.notes.append(note)
            save_notes()
            st.success("笔记添加成功！")
        else:
            st.warning("请填写标题和内容")

    st.divider()

    # 显示所有分类
    st.subheader("📂 所有分类")
    for cat in st.session_state.categories:
        st.markdown(f"- {cat}")

# 5. 主界面 - 笔记展示和搜索
st.header("📖 我的笔记")

# 搜索和排序区域
col1, col2, col3, col4, col5 = st.columns([2, 1, 1, 1, 1])

with col1:
    # 搜索关键词
    search_query = st.text_input(
        "🔍 搜索关键词",
        placeholder="输入关键词...",
        help="支持模糊搜索"
    )

with col2:
    # 搜索范围选择
    search_scope = st.selectbox(
        "搜索范围",
        ["全部", "仅标题", "仅内容", "仅类型", "仅标签"],
        help="选择搜索的范围"
    )

with col3:
    # 分类筛选
    filter_category = st.selectbox(
        "筛选分类",
        ["全部"] + st.session_state.categories,
        help="按分类筛选笔记"
    )

with col4:
    # 标签筛选
    all_tags = list(set(tag for note in st.session_state.notes for tag in note.get("tags", [])))
    filter_tag = st.selectbox(
        "筛选标签",
        ["全部"] + sorted(all_tags),
        help="按标签筛选笔记"
    )

with col5:
    # 排序方式
    sort_by = st.selectbox(
        "排序方式",
        ["最新", "最旧", "标题A-Z", "标题Z-A", "类型A-Z", "类型Z-A"],
        help="选择笔记的排序方式"
    )

# 批量提炼功能
st.divider()
col_batch1, col_batch2 = st.columns([3, 1])
with col_batch1:
    st.markdown("### ✨ 批量提炼")
with col_batch2:
    if st.button("🔄 提炼所有笔记", key="batch_extract"):
        for note in st.session_state.notes:
            if len(note['content'].strip()) > 10:
                extracted = extract_key_content(note['content'])
                st.session_state[f'extracted_{note["id"]}'] = extracted
        st.success(f"已提炼 {len(st.session_state.notes)} 条笔记的关键内容！")
        st.rerun()

# 6. 显示笔记
if st.session_state.notes:
    # 筛选和搜索逻辑
    filtered_notes = []
    for note in st.session_state.notes:
        # 分类筛选
        category_match = filter_category == "全部" or note["category"] == filter_category

        # 标签筛选
        tag_match = filter_tag == "全部" or filter_tag in note.get("tags", [])

        # 关键词搜索（根据选择范围）
        search_match = True
        if search_query:
            query = search_query.lower()
            if search_scope == "全部":
                search_match = (
                    query in note["title"].lower() or
                    query in note["content"].lower() or
                    query in note["category"].lower() or
                    any(query in tag.lower() for tag in note.get("tags", []))
                )
            elif search_scope == "仅标题":
                search_match = query in note["title"].lower()
            elif search_scope == "仅内容":
                search_match = query in note["content"].lower()
            elif search_scope == "仅类型":
                search_match = query in note["category"].lower()
            elif search_scope == "仅标签":
                search_match = any(query in tag.lower() for tag in note.get("tags", []))

        if category_match and tag_match and search_match:
            filtered_notes.append(note)

    # 排序逻辑
    if sort_by == "最新":
        filtered_notes.sort(key=lambda x: x["created_at"], reverse=True)
    elif sort_by == "最旧":
        filtered_notes.sort(key=lambda x: x["created_at"])
    elif sort_by == "标题A-Z":
        filtered_notes.sort(key=lambda x: x["title"].lower())
    elif sort_by == "标题Z-A":
        filtered_notes.sort(key=lambda x: x["title"].lower(), reverse=True)
    elif sort_by == "类型A-Z":
        filtered_notes.sort(key=lambda x: x["category"].lower())
    elif sort_by == "类型Z-A":
        filtered_notes.sort(key=lambda x: x["category"].lower(), reverse=True)

    # 显示筛选结果
    if filtered_notes:
        st.info(f"找到 {len(filtered_notes)} 条笔记")

        # 使用两列布局展示笔记
        cols = st.columns(2)
        for i, note in enumerate(filtered_notes):
            with cols[i % 2]:
                # 检查是否正在编辑此笔记
                if st.session_state.editing_note == note['id']:
                    with st.expander(f"✏️ 编辑: {note['title']}", expanded=True):
                        # 编辑表单
                        edit_title = st.text_input("标题", value=note['title'], key=f"edit_title_{note['id']}")
                        edit_category = st.selectbox(
                            "分类",
                            st.session_state.categories,
                            index=st.session_state.categories.index(note['category']) if note['category'] in st.session_state.categories else 0,
                            key=f"edit_category_{note['id']}"
                        )
                        edit_content = st.text_area("内容", value=note['content'], height=200, key=f"edit_content_{note['id']}")

                        # 编辑标签
                        current_tags = ", ".join(note.get("tags", []))
                        edit_tags = st.text_input(
                            "标签",
                            value=current_tags,
                            key=f"edit_tags_{note['id']}",
                            help="输入标签，用逗号分隔"
                        )
                        edit_tags_list = [tag.strip() for tag in edit_tags.split(",") if tag.strip()]

                        # 保存和取消按钮
                        col_save, col_cancel = st.columns(2)
                        with col_save:
                            if st.button("💾 保存修改", key=f"save_{note['id']}", type="primary"):
                                note['title'] = edit_title
                                note['category'] = edit_category
                                note['content'] = edit_content
                                note['tags'] = edit_tags_list
                                note['updated_at'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                                st.session_state.editing_note = None
                                save_notes()
                                st.success("笔记已更新！")
                                st.rerun()
                        with col_cancel:
                            if st.button("❌ 取消", key=f"cancel_{note['id']}"):
                                st.session_state.editing_note = None
                                st.rerun()
                else:
                    # 正常显示笔记
                    with st.expander(f"📌 {note['title']}", expanded=False):
                        # 显示笔记元信息
                        st.markdown(f"**分类：** {note['category']}")

                        # 显示标签
                        if note.get("tags"):
                            tags_html = " ".join([f'<span style="background-color: #e1f5fe; color: #01579b; padding: 2px 8px; border-radius: 4px; margin-right: 5px; font-size: 12px;">{tag}</span>' for tag in note["tags"]])
                            st.markdown(f"**标签：** {tags_html}", unsafe_allow_html=True)

                        st.markdown(f"**创建时间：** {note['created_at']}")
                        if 'updated_at' in note:
                            st.markdown(f"**更新时间：** {note['updated_at']}")
                        st.markdown("---")

                        # 提炼内容按钮
                        if st.button("✨ 提炼内容", key=f"extract_{note['id']}"):
                            extracted = extract_key_content(note['content'])
                            st.session_state[f'extracted_{note["id"]}'] = extracted

                        # 显示提炼结果
                        if f'extracted_{note["id"]}' in st.session_state:
                            extracted = st.session_state[f'extracted_{note["id"]}']
                            with st.expander("📝 提炼结果", expanded=True):
                                st.markdown("**摘要：**")
                                st.info(extracted['summary'])
                                st.markdown("**关键词：**")
                                if extracted['keywords']:
                                    keywords_html = " ".join([f'<span style="background-color: #fff3e0; color: #e65100; padding: 2px 8px; border-radius: 4px; margin-right: 5px; font-size: 12px;">{kw}</span>' for kw in extracted['keywords']])
                                    st.markdown(keywords_html, unsafe_allow_html=True)
                                else:
                                    st.warning("未找到关键词")

                        # 显示笔记内容
                        st.markdown(note['content'])

                        # 操作按钮
                        col_edit, col_delete = st.columns(2)
                        with col_edit:
                            if st.button("✏️ 编辑", key=f"edit_{note['id']}"):
                                st.session_state.editing_note = note['id']
                                st.rerun()
                        with col_delete:
                            if st.button("🗑️ 删除", key=f"delete_{note['id']}"):
                                st.session_state.notes.remove(note)
                                st.session_state.editing_note = None
                                save_notes()
                                st.rerun()
    else:
        st.warning("没有找到匹配的笔记")
else:
    st.info("还没有笔记，请在左侧添加您的第一条笔记！")

# 7. 统计信息
st.divider()
st.subheader("📊 统计信息")

# 笔记和分类统计
col_stats1, col_stats2 = st.columns(2)
with col_stats1:
    st.metric("总笔记数", len(st.session_state.notes))
with col_stats2:
    all_tags = list(set(tag for note in st.session_state.notes for tag in note.get("tags", [])))
    st.metric("总标签数", len(all_tags))

# 分类统计
st.markdown("### 分类统计")
cat_stat_cols = st.columns(min(5, len(st.session_state.categories)))
for i, cat in enumerate(st.session_state.categories):
    if i < len(cat_stat_cols):
        with cat_stat_cols[i]:
            count = len([n for n in st.session_state.notes if n["category"] == cat])
            st.metric(cat, count)

# 标签统计
if all_tags:
    st.markdown("### 标签统计")
    tag_stat_cols = st.columns(min(5, len(all_tags)))
    for i, tag in enumerate(sorted(all_tags)):
        if i < len(tag_stat_cols):
            with tag_stat_cols[i]:
                count = len([n for n in st.session_state.notes if tag in n.get("tags", [])])
                st.metric(tag, count)

# 8. 数据管理
st.divider()
st.subheader("💾 数据管理")

# 导入JSON文件
st.markdown("### 📥 导入笔记")
tab_json, tab_txt = st.tabs(["JSON文件", "文本文件"])

with tab_json:
    uploaded_file = st.file_uploader("选择JSON文件", type=['json'], help="选择之前导出的JSON文件", key="json_uploader")

    if uploaded_file is not None:
        try:
            # 读取并解析JSON文件
            import_data = json.load(uploaded_file)

            # 显示导入预览
            st.info(f"文件包含 {len(import_data.get('notes', []))} 条笔记")

            # 显示笔记列表预览
            with st.expander("📋 查看文件内容", expanded=False):
                for i, note in enumerate(import_data.get('notes', [])[:5]):
                    st.markdown(f"**{i+1}. {note.get('title', '无标题')}**")
                    st.markdown(f"   分类: {note.get('category', '未知')}")
                    st.markdown(f"   时间: {note.get('created_at', '未知')}")
                    st.markdown("---")

                if len(import_data.get('notes', [])) > 5:
                    st.info(f"还有 {len(import_data.get('notes', [])) - 5} 条笔记...")

            # 导入选项
            col_merge, col_replace = st.columns(2)
            with col_merge:
                if st.button("🔄 合并导入", type="primary", key="merge_json"):
                    # 合并导入：保留现有笔记，添加新笔记
                    new_notes = import_data.get('notes', [])
                    for note in new_notes:
                        note['id'] = len(st.session_state.notes) + 1
                        st.session_state.notes.append(note)

                    # 合并分类
                    new_categories = import_data.get('categories', [])
                    for cat in new_categories:
                        if cat not in st.session_state.categories:
                            st.session_state.categories.append(cat)

                    save_notes()
                    st.success(f"成功导入 {len(new_notes)} 条笔记！")
                    st.rerun()

            with col_replace:
                if st.button("⚠️ 替换导入", key="replace_json"):
                    # 替换导入：清空现有笔记，导入新笔记
                    st.session_state.notes = import_data.get('notes', [])
                    st.session_state.categories = import_data.get('categories', ["编程", "数学", "英语", "其他"])
                    st.session_state.editing_note = None
                    save_notes()
                    st.success(f"成功替换为 {len(st.session_state.notes)} 条笔记！")
                    st.rerun()

        except Exception as e:
            st.error(f"文件解析失败: {str(e)}")

with tab_txt:
    # 导入文本文件
    txt_file = st.file_uploader("选择文本文件", type=['txt'], help="选择要导入的文本文件", key="txt_uploader")

    if txt_file is not None:
        try:
            # 读取文本文件内容
            content = txt_file.read().decode('utf-8')

            # 显示文件内容预览
            st.info(f"文件大小: {len(content)} 字符")
            with st.expander("📋 查看文件内容", expanded=False):
                st.text(content[:1000])
                if len(content) > 1000:
                    st.info(f"还有 {len(content) - 1000} 个字符...")

            # 设置导入参数
            col_title, col_cat = st.columns(2)
            with col_title:
                import_title = st.text_input("笔记标题", value=txt_file.name.replace('.txt', ''), help="为导入的笔记设置标题")
            with col_cat:
                import_category = st.selectbox("笔记分类", st.session_state.categories)

            # 导入按钮
            if st.button("📥 导入文本文件", type="primary"):
                note = {
                    "id": len(st.session_state.notes) + 1,
                    "title": import_title,
                    "category": import_category,
                    "content": content,
                    "created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                    "updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                }
                st.session_state.notes.append(note)
                save_notes()
                st.success(f"成功导入笔记：{import_title}")
                st.rerun()

        except Exception as e:
            st.error(f"文件读取失败: {str(e)}")

st.divider()

# 导出功能
st.markdown("### 📤 导出笔记")
export_tab_json, export_tab_txt = st.tabs(["JSON格式", "文本格式"])

with export_tab_json:
    if st.button("📤 导出为JSON", key="export_json"):
        st.download_button(
            label="下载 JSON 文件",
            data=json.dumps({"notes": st.session_state.notes, "categories": st.session_state.categories}, ensure_ascii=False, indent=2),
            file_name=f"notes_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
            mime="application/json"
        )

with export_tab_txt:
    # 导出选项
    export_option = st.radio("导出选项", ["导出所有笔记", "导出单个笔记"])

    if export_option == "导出所有笔记":
        if st.button("📤 导出所有笔记为文本", type="primary", key="export_all_txt"):
            # 合并所有笔记为一个文本文件
            all_content = "=" * 50 + "\n"
            all_content += f"学习笔记备份 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
            all_content += "=" * 50 + "\n\n"

            for note in st.session_state.notes:
                all_content += "-" * 40 + "\n"
                all_content += f"标题: {note['title']}\n"
                all_content += f"分类: {note['category']}\n"
                all_content += f"创建时间: {note['created_at']}\n"
                all_content += "-" * 40 + "\n\n"
                all_content += note['content'] + "\n\n"

            st.download_button(
                label="下载文本文件",
                data=all_content,
                file_name=f"notes_all_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
                mime="text/plain"
            )

    else:
        # 导出单个笔记
        if st.session_state.notes:
            note_titles = [f"{note['id']}. {note['title']}" for note in st.session_state.notes]
            selected_note = st.selectbox("选择要导出的笔记", note_titles)

            if st.button("📤 导出选中笔记", type="primary", key="export_single_txt"):
                # 找到选中的笔记
                note_id = int(selected_note.split('.')[0])
                note = next((n for n in st.session_state.notes if n['id'] == note_id), None)

                if note:
                    # 格式化笔记内容
                    note_content = "=" * 50 + "\n"
                    note_content += f"标题: {note['title']}\n"
                    note_content += f"分类: {note['category']}\n"
                    note_content += f"创建时间: {note['created_at']}\n"
                    if 'updated_at' in note:
                        note_content += f"更新时间: {note['updated_at']}\n"
                    note_content += "=" * 50 + "\n\n"
                    note_content += note['content']

                    st.download_button(
                        label="下载文本文件",
                        data=note_content,
                        file_name=f"note_{note['id']}_{note['title']}.txt",
                        mime="text/plain"
                    )
        else:
            st.info("没有笔记可导出")

st.divider()

# 清空功能
col_clear = st.columns(1)[0]
with col_clear:
    if st.button("🗑️ 清空所有笔记"):
        if st.session_state.notes:
            st.warning("⚠️ 此操作不可恢复！")
            if st.button("确认清空"):
                st.session_state.notes = []
                st.session_state.editing_note = None
                save_notes()
                st.success("所有笔记已清空！")
                st.rerun()
        else:
            st.info("没有笔记可清空")