Initial commit

2025-12-07 05:34:33 +08:00 · 2025-12-07 05:34:33 +08:00 · dca2f421c2
commit dca2f421c2
8 changed files with 662 additions and 0 deletions
--- a/.gitea/workflows/autograde.yml
+++ b/.gitea/workflows/autograde.yml
@ -0,0 +1,238 @@
 name: autograde-assignment-04-visualization
 on:
  push:
    branches:
      - main
    tags:
      - 'submit'
      - 'submit-*'
  workflow_dispatch:
 permissions:
  contents: read
  pull-requests: write
 jobs:
  check-trigger:
    runs-on: docker
    container:
      image: alpine:latest
    outputs:
      should_run: ${{ steps.check.outputs.trigger }}
    steps:
      - name: Check commit message for trigger keyword
        id: check
        run: |
          COMMIT_MSG="${{ github.event.head_commit.message || '' }}"
          echo "Commit message: $COMMIT_MSG"
          if echo "$COMMIT_MSG" | grep -q "完成作业"; then
            echo "trigger=true" >> $GITHUB_OUTPUT
            echo "✅ Commit contains \"完成作业\"，即将执行评分"
          else
            echo "trigger=false" >> $GITHUB_OUTPUT
            echo "⛔ 只有包含"完成作业"的提交才会执行自动评分" >&2
          fi
  grade:
    needs: check-trigger
    if: needs.check-trigger.outputs.should_run == 'true'
    runs-on: docker
    container:
      image: python:3.11
      options: --user root
    timeout-minutes: 20
    steps:
      - name: Configure APT mirror (Aliyun)
        run: |
          set -e
          for f in /etc/apt/sources.list /etc/apt/sources.list.d/*.list /etc/apt/sources.list.d/*.sources; do
            [ -f "$f" ] || continue
            sed -i -E 's|https?://deb.debian.org|http://mirrors.aliyun.com|g' "$f" || true
            sed -i -E 's|https?://security.debian.org|http://mirrors.aliyun.com/debian-security|g' "$f" || true
            sed -i -E 's|https?://archive.ubuntu.com|http://mirrors.aliyun.com|g' "$f" || true
            sed -i -E 's|https?://ports.ubuntu.com|http://mirrors.aliyun.com|g' "$f" || true
          done
          apt-get -o Acquire::Check-Valid-Until=false update -y
          DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends git ca-certificates python3-pip rsync fonts-noto-cjk fonts-wqy-microhei
          rm -rf /var/lib/apt/lists/*
      - name: Checkout code
        env:
          GITHUB_TOKEN: ${{ github.token }}
        run: |
          git config --global --add safe.directory ${{ github.workspace }}
          git init
          REPO_URL="${{ github.server_url }}/${{ github.repository }}.git"
          AUTH_URL=$(echo "$REPO_URL" | sed "s|://|://${GITHUB_TOKEN}@|")
          git remote add origin "$AUTH_URL"
          git fetch --depth=1 origin ${{ github.sha }}
          git checkout ${{ github.sha }}
      - name: Fix permissions
        run: chown -R $(whoami):$(whoami) ${{ github.workspace }} || true
      - name: Fetch hidden tests and grading scripts
        working-directory: ${{ github.workspace }}
        env:
          EXTERNAL_GITEA_HOST: ${{ secrets.EXTERNAL_GITEA_HOST }}
        run: |
          set -e
          TESTS_USERNAME="${RUNNER_TESTS_USERNAME:-}"
          TESTS_TOKEN="${RUNNER_TESTS_TOKEN:-}"
          if [ -z "$TESTS_TOKEN" ] || [ -z "$TESTS_USERNAME" ]; then
            echo "❌ RUNNER_TESTS_USERNAME / RUNNER_TESTS_TOKEN not set!"
            exit 1
          fi
          # Resolve host
          if [ -n "$EXTERNAL_GITEA_HOST" ]; then
            HOST="$EXTERNAL_GITEA_HOST"
          elif [ -n "$GITEA_ROOT_URL" ]; then
            HOST=$(echo "$GITEA_ROOT_URL" | sed 's|https\?://||' | sed 's|/$||')
          else
            HOST=$(echo "${{ github.server_url }}" | sed 's|https\?://||' | cut -d'/' -f1)
          fi
          ORG=$(echo "${{ github.repository }}" | cut -d'/' -f1)
          REPO_NAME=$(echo "${{ github.repository }}" | cut -d'/' -f2)
          if echo "$REPO_NAME" | grep -q -- '-stu_'; then
            ASSIGNMENT_ID=$(echo "$REPO_NAME" | sed 's/-stu_.*//')
          elif echo "$REPO_NAME" | grep -q -- '-template'; then
            ASSIGNMENT_ID=$(echo "$REPO_NAME" | sed 's/-template.*//')
          else
            ASSIGNMENT_ID="assignment-04-visualization"
          fi
          echo "📥 Fetching tests from ${ORG}/${ASSIGNMENT_ID}-tests..."
          AUTH_URL="http://${TESTS_USERNAME}:${TESTS_TOKEN}@${HOST}/${ORG}/${ASSIGNMENT_ID}-tests.git"
          git -c http.sslVerify=false clone --depth=1 "$AUTH_URL" _priv_tests
          rm -rf .autograde
          mkdir -p .autograde
          cp _priv_tests/autograde/*.py .autograde/
          cp _priv_tests/autograde/*.sh .autograde/ 2>/dev/null || true
          # Copy metadata scripts if available
          if [ -f "_priv_tests/autograde/create_minimal_metadata.py" ]; then
            cp _priv_tests/autograde/create_minimal_metadata.py .autograde/ 2>/dev/null || true
          fi
          if [ -f "_priv_tests/autograde/upload_metadata.py" ]; then
            cp _priv_tests/autograde/upload_metadata.py .autograde/ 2>/dev/null || true
          fi
          # Copy Python tests
          if [ -d "_priv_tests/python" ]; then
            mkdir -p tests
            rsync -a _priv_tests/python/ tests/
            echo "✅ Private tests copied"
          fi
          # Copy test groups
          if [ -f "_priv_tests/test_groups.json" ]; then
            cp _priv_tests/test_groups.json .
          fi
          # Copy LLM rubrics
          if [ -d "_priv_tests/llm" ]; then
            mkdir -p .llm_rubrics
            cp _priv_tests/llm/*.json .llm_rubrics/ 2>/dev/null || true
          fi
          rm -rf _priv_tests
      - name: Install Python dependencies
        run: |
          pip config set global.index-url https://mirrors.aliyun.com/pypi/simple
          pip install --no-cache-dir -r requirements.txt
          # 安装评分脚本依赖
          pip install --no-cache-dir pytest requests python-dotenv
      - name: Run tests
        working-directory: ${{ github.workspace }}
        run: |
          mkdir -p test-results
          export PYTHONPATH="$(pwd):${PYTHONPATH}"
          echo "📋 Tests to be executed:"
          find tests -name "test_*.py" -type f 2>/dev/null || echo "No test files found"
          pytest tests/ -v --junitxml=test-results/junit.xml || true
          echo "📊 JUnit report generated"
      - name: Grade programming tests
        run: |
          python ./.autograde/grade_grouped.py \
            --junit-dir test-results \
            --groups test_groups.json \
            --out grade.json \
            --summary summary.md
      - name: Grade REPORT.md
        run: |
          if [ -f REPORT.md ] && [ -f .llm_rubrics/rubric_report.json ]; then
            python ./.autograde/llm_grade.py \
              --question "请评估这份反思报告" \
              --answer REPORT.md \
              --rubric .llm_rubrics/rubric_report.json \
              --out report_grade.json \
              --summary report_summary.md
            echo "✅ REPORT.md graded"
          else
            echo '{"total": 0, "flags": ["missing_file"]}' > report_grade.json
            echo "⚠️ REPORT.md or rubric not found"
          fi
      - name: Aggregate grades
        run: |
          python ./.autograde/aggregate_grade.py \
            --programming grade.json \
            --report report_grade.json \
            --out final_grade.json \
            --summary final_summary.md
      - name: Create metadata
        working-directory: ${{ github.workspace }}
        env:
          REPO: ${{ github.repository }}
          LANGUAGE: python
        run: |
          if [ -f final_grade.json ]; then
            # Use final grade type for aggregated grades
            export GRADE_TYPE=final
            export GRADE_FILE=final_grade.json
            if [ -f .autograde/create_minimal_metadata.py ]; then
              python ./.autograde/create_minimal_metadata.py > metadata.json || echo "{}" > metadata.json
            else
              echo "⚠️ create_minimal_metadata.py not found, skipping metadata creation"
              echo "{}" > metadata.json
            fi
          fi
      - name: Upload metadata
        if: env.RUNNER_METADATA_TOKEN != ''
        working-directory: ${{ github.workspace }}
        env:
          # 使用当前组织的 course-metadata 仓库
          METADATA_REPO: ${{ github.repository_owner }}/course-metadata
          METADATA_TOKEN: ${{ env.RUNNER_METADATA_TOKEN }}
          METADATA_BRANCH: ${{ env.RUNNER_METADATA_BRANCH }}
          STUDENT_REPO: ${{ github.repository }}
          RUN_ID: ${{ github.run_id }}
          COMMIT_SHA: ${{ github.sha }}
          SERVER_URL: ${{ github.server_url }}
        run: |
          if [ -f metadata.json ] && [ -f .autograde/upload_metadata.py ]; then
            python ./.autograde/upload_metadata.py \
              --metadata-file metadata.json \
              --metadata-repo "${METADATA_REPO}" \
              --branch "${METADATA_BRANCH:-main}" \
              --student-repo "${STUDENT_REPO}" \
              --run-id "${RUN_ID}" \
              --commit-sha "${COMMIT_SHA}" \
              --workflow grade \
              --server-url "${SERVER_URL}" \
              --external-host "${EXTERNAL_GITEA_HOST}"
          else
            echo "⚠️ metadata.json or upload_metadata.py not found, skipping upload"
          fi
--- a/README.md
+++ b/README.md
@ -0,0 +1,27 @@
 # 作业 4：数据可视化仪表板
 ## 任务
 - 在 `src/dashboard.py` 中完成 `DataDashboard` 类：实现数据加载、统计分析、图表生成等功能。
 - 从数据中发现规律，生成有意义的可视化报告。
 - 通过公开测试与隐藏测试；提交 `REPORT.md` 反思报告。
 🎯 **重点**：不只是画图，而是从数据中发现规律。
 ## 环境与依赖
 - Python 3.11+
 - 安装依赖：`pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple`
 ## 本地运行
 ```bash
 python -m pytest -v
 ```
 ## 提交要求
 - 提交信息需包含关键字"完成作业"以触发评分。
 - 确保 `REPORT.md` 已填写，特别是"数据发现"部分。
 ## 评分构成（总分 20）
 - Core 测试：10 分
 - Edge 测试：4 分
 - REPORT.md：6 分
--- a/REPORT.md
+++ b/REPORT.md
@ -0,0 +1,55 @@
 # 作业 4 反思报告
 ## 1. 数据发现（重点，3分）
 你从数据中发现了什么？不是"我画了什么图"，而是"我发现了什么"。
 ### 发现 1：[用一句话描述你的发现]
 - **现象**：具体描述你观察到的现象
 - **数据支撑**：用具体数字或图表说明
 - **可能原因**：你对这个现象的解释或猜测
 - **价值**：这个发现有什么用？谁会关心？
 > [在此处回答]
 ### 发现 2：[用一句话描述你的发现]
 - **现象**：...
 - **数据支撑**：...
 - **可能原因**：...
 - **价值**：...
 > [在此处回答]
 ## 2. 图表选择的思考
 你选择了哪些类型的图表？为什么？
 - 为什么用柱状图而不是饼图？
 - 为什么用折线图而不是散点图？
 - 你放弃了哪些图表？为什么？
 > [在此处回答]
 ## 3. AI 图表的问题
 AI 生成的图表代码，有什么问题？
 ### 问题 1：[问题描述]
 - AI 原代码的行为：
 - 问题所在：
 - 你的修改：
 > [在此处回答]
 ## 4. 从"画图"到"讲故事"
 如果你要用这些图表给领导/客户做汇报，你会如何组织？
 - 先展示什么？后展示什么？
 - 每张图要传达什么信息？
 - 哪些细节需要强调？
 > [在此处回答]
--- a/data/air_quality.csv
+++ b/data/air_quality.csv
@ -0,0 +1,32 @@
 日期,城市,AQI,PM2.5,PM10,SO2,NO2,CO,O3
 2024-01-01,北京,120,80,95,12,45,1.2,65
 2024-01-01,上海,85,55,68,8,32,0.8,78
 2024-01-01,广州,75,48,62,6,28,0.7,82
 2024-01-02,北京,100,65,78,10,40,1.0,70
 2024-01-02,上海,90,60,72,9,35,0.9,75
 2024-01-02,广州,68,42,55,5,25,0.6,85
 2024-01-03,北京,150,100,120,15,55,1.5,55
 2024-01-03,上海,75,50,60,7,30,0.7,80
 2024-01-03,广州,62,38,48,4,22,0.5,88
 2024-01-04,北京,95,62,75,9,38,0.9,72
 2024-01-04,上海,82,52,65,7,30,0.8,78
 2024-01-04,广州,70,45,58,5,26,0.6,84
 2024-01-05,北京,180,130,150,18,65,1.8,48
 2024-01-05,上海,95,65,78,10,38,1.0,72
 2024-01-05,广州,58,35,45,4,20,0.5,90
 2024-01-06,北京,110,72,88,11,42,1.1,68
 2024-01-06,上海,78,50,62,7,28,0.7,80
 2024-01-06,广州,65,40,52,5,24,0.6,86
 2024-01-07,北京,88,58,70,8,35,0.9,75
 2024-01-07,上海,72,45,55,6,26,0.6,82
 2024-01-07,广州,55,32,42,3,18,0.4,92
 2024-01-08,北京,,75,90,12,48,1.3,62
 2024-01-08,上海,80,52,65,,30,0.8,78
 2024-01-08,广州,60,38,50,4,22,,88
 2024-01-09,北京,125,85,100,13,50,1.4,60
 2024-01-09,上海,88,58,72,8,34,0.9,76
 2024-01-09,广州,72,46,60,6,26,0.6,84
 2024-01-10,北京,140,95,115,14,52,1.5,55
 2024-01-10,上海,92,62,75,9,36,1.0,74
 2024-01-10,广州,68,42,55,5,24,0.5,86
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,6 @@
 pytest>=7.0.0
 pandas>=2.0.0
 matplotlib>=3.7.0
 seaborn>=0.12.0
 pillow>=9.0.0
--- a/src/init.py
+++ b/src/init.py
--- a/src/dashboard.py
+++ b/src/dashboard.py
@ -0,0 +1,242 @@
 """
 数据可视化仪表板
 你的任务是实现 DataDashboard 类，从 CSV 文件加载数据，进行分析并生成可视化报告。
 功能要求：
 1. 加载 CSV 数据（处理编码问题）
 2. 计算基本统计量
 3. 生成柱状图、折线图、热图等可视化
 4. 生成完整分析报告
 🎯 核心问题：AI 可以生成图表代码，但"什么图表值得做"、"数据背后有什么故事"——这些需要你来判断。
 数据集选项：
 - air_quality.csv: 空气质量数据（城市、日期、AQI、PM2.5 等）
 - ecommerce_sales.csv: 电商销售数据
 - exam_results.csv: 考试成绩数据
 边界情况处理：
 - 缺失值：不能导致绘图崩溃
 - 中文标签：需要配置字体正确显示
 - 异常值：不能完全破坏图表
 - 空数据：空列不导致崩溃
 中文字体配置提示：
 ```python
 import matplotlib.pyplot as plt
 # macOS
 plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
 # Windows
 # plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei']
 # Linux
 # plt.rcParams['font.sans-serif'] = ['WenQuanYi Micro Hei']
 plt.rcParams['axes.unicode_minus'] = False
 ```
 示例用法：
 dashboard = DataDashboard("data/air_quality.csv")
 stats = dashboard.get_basic_stats()
 dashboard.create_bar_chart('城市', 'AQI', title='各城市平均AQI', save_path='bar.png')
 dashboard.create_line_chart('日期', 'AQI', title='AQI变化趋势', save_path='line.png')
 dashboard.generate_report('output/')
 """
 import pandas as pd
 import matplotlib.pyplot as plt
 from typing import Dict, List, Optional
 class DataDashboard:
    """
    数据可视化仪表板
    从 CSV 文件加载数据，提供统计分析和可视化功能。
    """
    def __init__(self, filepath: str):
        """
        初始化并加载数据
        Args:
            filepath: 数据文件路径（CSV）
        """
        self.df: pd.DataFrame = None
        self.filepath = filepath
        self.load_data(filepath)
    def load_data(self, filepath: str) -> bool:
        """
        加载并初步清洗数据
        处理要求：
        - 自动检测编码（UTF-8 或 GBK）
        - 记录缺失值情况
        - 尝试转换日期列为 datetime
        Args:
            filepath: CSV 文件路径
        Returns:
            bool: 是否加载成功
        提示：
        - 先尝试 UTF-8，失败再尝试 GBK
        - 可以使用 pd.to_datetime 转换日期列
        """
        # TODO: 在此实现你的代码
        pass
    def get_basic_stats(self) -> Dict:
        """
        计算基本统计量
        Returns:
            {
                'row_count': 1000,
                'column_count': 10,
                'columns': ['col1', 'col2', ...],
                'missing_count': {'col1': 5, 'col2': 10, ...},
                'numeric_summary': {
                    'col1': {'mean': 50, 'std': 10, 'min': 0, 'max': 100},
                    ...
                }
            }
        注意：
        - numeric_summary 只包含数值列
        - missing_count 包含所有列的缺失值数量
        """
        # TODO: 在此实现你的代码
        pass
    def create_bar_chart(self, x_col: str, y_col: str, 
                         title: Optional[str] = None,
                         aggfunc: str = 'mean',
                         save_path: Optional[str] = None) -> None:
        """
        生成柱状图
        Args:
            x_col: X 轴列名（分类变量）
            y_col: Y 轴列名（数值变量）
            title: 图表标题
            aggfunc: 聚合函数（'mean', 'sum', 'count'）
            save_path: 保存路径，如果为 None 则显示图表
        示例：
            dashboard.create_bar_chart('城市', 'AQI', title='各城市平均AQI')
        注意：
        - 需要配置中文字体
        - 缺失值不应导致崩溃
        """
        # TODO: 在此实现你的代码
        pass
    def create_line_chart(self, x_col: str, y_col: str,
                          title: Optional[str] = None,
                          save_path: Optional[str] = None) -> None:
        """
        生成折线趋势图
        Args:
            x_col: X 轴列名（通常是时间）
            y_col: Y 轴列名
            title: 图表标题
            save_path: 保存路径
        示例：
            dashboard.create_line_chart('日期', 'AQI', title='AQI变化趋势')
        """
        # TODO: 在此实现你的代码
        pass
    def create_heatmap(self, columns: Optional[List[str]] = None,
                       title: Optional[str] = None,
                       save_path: Optional[str] = None) -> None:
        """
        生成相关性热图
        Args:
            columns: 要计算相关性的列，None 表示所有数值列
            title: 图表标题
            save_path: 保存路径
        示例：
            dashboard.create_heatmap(columns=['AQI', 'PM2.5', 'PM10'])
        """
        # TODO: 在此实现你的代码
        pass
    def create_distribution(self, column: str,
                            bins: int = 20,
                            title: Optional[str] = None,
                            save_path: Optional[str] = None) -> None:
        """
        生成分布直方图
        Args:
            column: 列名
            bins: 分箱数量
            title: 图表标题
            save_path: 保存路径
        """
        # TODO: 在此实现你的代码
        pass
    def generate_report(self, output_dir: str) -> Dict:
        """
        生成完整分析报告（多个图表）
        Args:
            output_dir: 输出目录
        Returns:
            {'generated_files': ['bar.png', 'line.png', ...]}
        报告应包含：
        - 至少一个柱状图
        - 至少一个折线图
        - 可选：热图、分布图等
        """
        # TODO: 在此实现你的代码
        pass
 if __name__ == "__main__":
    # 测试你的实现
    import os
    # 配置中文字体
    plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
    plt.rcParams['axes.unicode_minus'] = False
    # 检查数据文件是否存在
    data_file = "data/air_quality.csv"
    if not os.path.exists(data_file):
        print(f"请先准备数据文件: {data_file}")
        print("可以从作业说明中获取示例数据")
    else:
        dashboard = DataDashboard(data_file)
        # 测试基本统计
        print("=== 基本统计 ===")
        stats = dashboard.get_basic_stats()
        print(f"行数: {stats.get('row_count', 'N/A')}")
        print(f"列数: {stats.get('column_count', 'N/A')}")
        print(f"缺失值: {stats.get('missing_count', {})}")
        # 测试图表生成
        print("\n=== 生成图表 ===")
        os.makedirs("output", exist_ok=True)
        dashboard.create_bar_chart(
            '城市', 'AQI', 
            title='各城市平均AQI',
            save_path='output/bar_chart.png'
        )
        print("✅ 柱状图已生成")
        print("\n✅ 测试完成")
--- a/tests/test_public.py
+++ b/tests/test_public.py
@ -0,0 +1,62 @@
 """
 公开测试 - 学生可见
 这些测试帮助你验证基本功能是否正确
 """
 import pytest
 from pathlib import Path
 import pandas as pd
 from src.dashboard import DataDashboard
@pytest.fixture
 def sample_csv(tmp_path):
    """创建测试用的 CSV 文件"""
    content = """日期,城市,AQI,PM2.5
 2024-01-01,北京,120,80
 2024-01-01,上海,85,55
 2024-01-02,北京,100,65
 2024-01-02,上海,90,60
 2024-01-03,北京,150,100
 2024-01-03,上海,75,50"""
    p = tmp_path / "test_data.csv"
    p.write_text(content, encoding='utf-8')
    return str(p)
 def test_load_csv(sample_csv):
    """测试能否成功加载 CSV 文件"""
    dashboard = DataDashboard(sample_csv)
    assert dashboard.df is not None
    assert len(dashboard.df) == 6
    assert isinstance(dashboard.df, pd.DataFrame)
 def test_basic_stats(sample_csv):
    """测试基本统计功能"""
    dashboard = DataDashboard(sample_csv)
    stats = dashboard.get_basic_stats()
    assert 'row_count' in stats
    assert 'column_count' in stats
    assert 'missing_count' in stats
    assert stats['row_count'] == 6
    assert stats['column_count'] == 4
 def test_bar_chart_save(sample_csv, tmp_path):
    """测试柱状图保存"""
    dashboard = DataDashboard(sample_csv)
    output_path = tmp_path / "bar_chart.png"
    dashboard.create_bar_chart(
        x_col='城市',
        y_col='AQI',
        title='各城市平均AQI',
        save_path=str(output_path)
    )
    assert output_path.exists()
    assert output_path.stat().st_size > 0