From dca2f421c2062e4c214f45257e2a4bdf407dbad2 Mon Sep 17 00:00:00 2001 From: hblu Date: Sun, 7 Dec 2025 05:34:33 +0800 Subject: [PATCH] Initial commit --- .gitea/workflows/autograde.yml | 238 ++++++++++++++++++++++++++++++++ README.md | 27 ++++ REPORT.md | 55 ++++++++ data/air_quality.csv | 32 +++++ requirements.txt | 6 + src/__init__.py | 0 src/dashboard.py | 242 +++++++++++++++++++++++++++++++++ tests/test_public.py | 62 +++++++++ 8 files changed, 662 insertions(+) create mode 100644 .gitea/workflows/autograde.yml create mode 100644 README.md create mode 100644 REPORT.md create mode 100644 data/air_quality.csv create mode 100644 requirements.txt create mode 100644 src/__init__.py create mode 100644 src/dashboard.py create mode 100644 tests/test_public.py diff --git a/.gitea/workflows/autograde.yml b/.gitea/workflows/autograde.yml new file mode 100644 index 0000000..fe15c7a --- /dev/null +++ b/.gitea/workflows/autograde.yml @@ -0,0 +1,238 @@ +name: autograde-assignment-04-visualization + +on: + push: + branches: + - main + tags: + - 'submit' + - 'submit-*' + workflow_dispatch: + +permissions: + contents: read + pull-requests: write + +jobs: + check-trigger: + runs-on: docker + container: + image: alpine:latest + outputs: + should_run: ${{ steps.check.outputs.trigger }} + steps: + - name: Check commit message for trigger keyword + id: check + run: | + COMMIT_MSG="${{ github.event.head_commit.message || '' }}" + echo "Commit message: $COMMIT_MSG" + if echo "$COMMIT_MSG" | grep -q "完成作业"; then + echo "trigger=true" >> $GITHUB_OUTPUT + echo "✅ Commit contains \"完成作业\",即将执行评分" + else + echo "trigger=false" >> $GITHUB_OUTPUT + echo "⛔ 只有包含"完成作业"的提交才会执行自动评分" >&2 + fi + + grade: + needs: check-trigger + if: needs.check-trigger.outputs.should_run == 'true' + runs-on: docker + container: + image: python:3.11 + options: --user root + timeout-minutes: 20 + + steps: + - name: Configure APT mirror (Aliyun) + run: | + set -e + for f in /etc/apt/sources.list /etc/apt/sources.list.d/*.list /etc/apt/sources.list.d/*.sources; do + [ -f "$f" ] || continue + sed -i -E 's|https?://deb.debian.org|http://mirrors.aliyun.com|g' "$f" || true + sed -i -E 's|https?://security.debian.org|http://mirrors.aliyun.com/debian-security|g' "$f" || true + sed -i -E 's|https?://archive.ubuntu.com|http://mirrors.aliyun.com|g' "$f" || true + sed -i -E 's|https?://ports.ubuntu.com|http://mirrors.aliyun.com|g' "$f" || true + done + apt-get -o Acquire::Check-Valid-Until=false update -y + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends git ca-certificates python3-pip rsync fonts-noto-cjk fonts-wqy-microhei + rm -rf /var/lib/apt/lists/* + + - name: Checkout code + env: + GITHUB_TOKEN: ${{ github.token }} + run: | + git config --global --add safe.directory ${{ github.workspace }} + git init + REPO_URL="${{ github.server_url }}/${{ github.repository }}.git" + AUTH_URL=$(echo "$REPO_URL" | sed "s|://|://${GITHUB_TOKEN}@|") + git remote add origin "$AUTH_URL" + git fetch --depth=1 origin ${{ github.sha }} + git checkout ${{ github.sha }} + + - name: Fix permissions + run: chown -R $(whoami):$(whoami) ${{ github.workspace }} || true + + - name: Fetch hidden tests and grading scripts + working-directory: ${{ github.workspace }} + env: + EXTERNAL_GITEA_HOST: ${{ secrets.EXTERNAL_GITEA_HOST }} + run: | + set -e + TESTS_USERNAME="${RUNNER_TESTS_USERNAME:-}" + TESTS_TOKEN="${RUNNER_TESTS_TOKEN:-}" + if [ -z "$TESTS_TOKEN" ] || [ -z "$TESTS_USERNAME" ]; then + echo "❌ RUNNER_TESTS_USERNAME / RUNNER_TESTS_TOKEN not set!" + exit 1 + fi + + # Resolve host + if [ -n "$EXTERNAL_GITEA_HOST" ]; then + HOST="$EXTERNAL_GITEA_HOST" + elif [ -n "$GITEA_ROOT_URL" ]; then + HOST=$(echo "$GITEA_ROOT_URL" | sed 's|https\?://||' | sed 's|/$||') + else + HOST=$(echo "${{ github.server_url }}" | sed 's|https\?://||' | cut -d'/' -f1) + fi + + ORG=$(echo "${{ github.repository }}" | cut -d'/' -f1) + REPO_NAME=$(echo "${{ github.repository }}" | cut -d'/' -f2) + + if echo "$REPO_NAME" | grep -q -- '-stu_'; then + ASSIGNMENT_ID=$(echo "$REPO_NAME" | sed 's/-stu_.*//') + elif echo "$REPO_NAME" | grep -q -- '-template'; then + ASSIGNMENT_ID=$(echo "$REPO_NAME" | sed 's/-template.*//') + else + ASSIGNMENT_ID="assignment-04-visualization" + fi + + echo "📥 Fetching tests from ${ORG}/${ASSIGNMENT_ID}-tests..." + AUTH_URL="http://${TESTS_USERNAME}:${TESTS_TOKEN}@${HOST}/${ORG}/${ASSIGNMENT_ID}-tests.git" + git -c http.sslVerify=false clone --depth=1 "$AUTH_URL" _priv_tests + + rm -rf .autograde + mkdir -p .autograde + cp _priv_tests/autograde/*.py .autograde/ + cp _priv_tests/autograde/*.sh .autograde/ 2>/dev/null || true + # Copy metadata scripts if available + if [ -f "_priv_tests/autograde/create_minimal_metadata.py" ]; then + cp _priv_tests/autograde/create_minimal_metadata.py .autograde/ 2>/dev/null || true + fi + if [ -f "_priv_tests/autograde/upload_metadata.py" ]; then + cp _priv_tests/autograde/upload_metadata.py .autograde/ 2>/dev/null || true + fi + + # Copy Python tests + if [ -d "_priv_tests/python" ]; then + mkdir -p tests + rsync -a _priv_tests/python/ tests/ + echo "✅ Private tests copied" + fi + + # Copy test groups + if [ -f "_priv_tests/test_groups.json" ]; then + cp _priv_tests/test_groups.json . + fi + + # Copy LLM rubrics + if [ -d "_priv_tests/llm" ]; then + mkdir -p .llm_rubrics + cp _priv_tests/llm/*.json .llm_rubrics/ 2>/dev/null || true + fi + + rm -rf _priv_tests + + - name: Install Python dependencies + run: | + pip config set global.index-url https://mirrors.aliyun.com/pypi/simple + pip install --no-cache-dir -r requirements.txt + # 安装评分脚本依赖 + pip install --no-cache-dir pytest requests python-dotenv + + - name: Run tests + working-directory: ${{ github.workspace }} + run: | + mkdir -p test-results + export PYTHONPATH="$(pwd):${PYTHONPATH}" + echo "📋 Tests to be executed:" + find tests -name "test_*.py" -type f 2>/dev/null || echo "No test files found" + pytest tests/ -v --junitxml=test-results/junit.xml || true + echo "📊 JUnit report generated" + + - name: Grade programming tests + run: | + python ./.autograde/grade_grouped.py \ + --junit-dir test-results \ + --groups test_groups.json \ + --out grade.json \ + --summary summary.md + + - name: Grade REPORT.md + run: | + if [ -f REPORT.md ] && [ -f .llm_rubrics/rubric_report.json ]; then + python ./.autograde/llm_grade.py \ + --question "请评估这份反思报告" \ + --answer REPORT.md \ + --rubric .llm_rubrics/rubric_report.json \ + --out report_grade.json \ + --summary report_summary.md + echo "✅ REPORT.md graded" + else + echo '{"total": 0, "flags": ["missing_file"]}' > report_grade.json + echo "⚠️ REPORT.md or rubric not found" + fi + + - name: Aggregate grades + run: | + python ./.autograde/aggregate_grade.py \ + --programming grade.json \ + --report report_grade.json \ + --out final_grade.json \ + --summary final_summary.md + + - name: Create metadata + working-directory: ${{ github.workspace }} + env: + REPO: ${{ github.repository }} + LANGUAGE: python + run: | + if [ -f final_grade.json ]; then + # Use final grade type for aggregated grades + export GRADE_TYPE=final + export GRADE_FILE=final_grade.json + if [ -f .autograde/create_minimal_metadata.py ]; then + python ./.autograde/create_minimal_metadata.py > metadata.json || echo "{}" > metadata.json + else + echo "⚠️ create_minimal_metadata.py not found, skipping metadata creation" + echo "{}" > metadata.json + fi + fi + + - name: Upload metadata + if: env.RUNNER_METADATA_TOKEN != '' + working-directory: ${{ github.workspace }} + env: + # 使用当前组织的 course-metadata 仓库 + METADATA_REPO: ${{ github.repository_owner }}/course-metadata + METADATA_TOKEN: ${{ env.RUNNER_METADATA_TOKEN }} + METADATA_BRANCH: ${{ env.RUNNER_METADATA_BRANCH }} + STUDENT_REPO: ${{ github.repository }} + RUN_ID: ${{ github.run_id }} + COMMIT_SHA: ${{ github.sha }} + SERVER_URL: ${{ github.server_url }} + run: | + if [ -f metadata.json ] && [ -f .autograde/upload_metadata.py ]; then + python ./.autograde/upload_metadata.py \ + --metadata-file metadata.json \ + --metadata-repo "${METADATA_REPO}" \ + --branch "${METADATA_BRANCH:-main}" \ + --student-repo "${STUDENT_REPO}" \ + --run-id "${RUN_ID}" \ + --commit-sha "${COMMIT_SHA}" \ + --workflow grade \ + --server-url "${SERVER_URL}" \ + --external-host "${EXTERNAL_GITEA_HOST}" + else + echo "⚠️ metadata.json or upload_metadata.py not found, skipping upload" + fi + diff --git a/README.md b/README.md new file mode 100644 index 0000000..f8d9b20 --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# 作业 4:数据可视化仪表板 + +## 任务 +- 在 `src/dashboard.py` 中完成 `DataDashboard` 类:实现数据加载、统计分析、图表生成等功能。 +- 从数据中发现规律,生成有意义的可视化报告。 +- 通过公开测试与隐藏测试;提交 `REPORT.md` 反思报告。 + +🎯 **重点**:不只是画图,而是从数据中发现规律。 + +## 环境与依赖 +- Python 3.11+ +- 安装依赖:`pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple` + +## 本地运行 +```bash +python -m pytest -v +``` + +## 提交要求 +- 提交信息需包含关键字"完成作业"以触发评分。 +- 确保 `REPORT.md` 已填写,特别是"数据发现"部分。 + +## 评分构成(总分 20) +- Core 测试:10 分 +- Edge 测试:4 分 +- REPORT.md:6 分 + diff --git a/REPORT.md b/REPORT.md new file mode 100644 index 0000000..eb500fd --- /dev/null +++ b/REPORT.md @@ -0,0 +1,55 @@ +# 作业 4 反思报告 + +## 1. 数据发现(重点,3分) + +你从数据中发现了什么?不是"我画了什么图",而是"我发现了什么"。 + +### 发现 1:[用一句话描述你的发现] + +- **现象**:具体描述你观察到的现象 +- **数据支撑**:用具体数字或图表说明 +- **可能原因**:你对这个现象的解释或猜测 +- **价值**:这个发现有什么用?谁会关心? + +> [在此处回答] + +### 发现 2:[用一句话描述你的发现] + +- **现象**:... +- **数据支撑**:... +- **可能原因**:... +- **价值**:... + +> [在此处回答] + +## 2. 图表选择的思考 + +你选择了哪些类型的图表?为什么? + +- 为什么用柱状图而不是饼图? +- 为什么用折线图而不是散点图? +- 你放弃了哪些图表?为什么? + +> [在此处回答] + +## 3. AI 图表的问题 + +AI 生成的图表代码,有什么问题? + +### 问题 1:[问题描述] +- AI 原代码的行为: +- 问题所在: +- 你的修改: + +> [在此处回答] + +## 4. 从"画图"到"讲故事" + +如果你要用这些图表给领导/客户做汇报,你会如何组织? + +- 先展示什么?后展示什么? +- 每张图要传达什么信息? +- 哪些细节需要强调? + +> [在此处回答] + diff --git a/data/air_quality.csv b/data/air_quality.csv new file mode 100644 index 0000000..e3ab676 --- /dev/null +++ b/data/air_quality.csv @@ -0,0 +1,32 @@ +日期,城市,AQI,PM2.5,PM10,SO2,NO2,CO,O3 +2024-01-01,北京,120,80,95,12,45,1.2,65 +2024-01-01,上海,85,55,68,8,32,0.8,78 +2024-01-01,广州,75,48,62,6,28,0.7,82 +2024-01-02,北京,100,65,78,10,40,1.0,70 +2024-01-02,上海,90,60,72,9,35,0.9,75 +2024-01-02,广州,68,42,55,5,25,0.6,85 +2024-01-03,北京,150,100,120,15,55,1.5,55 +2024-01-03,上海,75,50,60,7,30,0.7,80 +2024-01-03,广州,62,38,48,4,22,0.5,88 +2024-01-04,北京,95,62,75,9,38,0.9,72 +2024-01-04,上海,82,52,65,7,30,0.8,78 +2024-01-04,广州,70,45,58,5,26,0.6,84 +2024-01-05,北京,180,130,150,18,65,1.8,48 +2024-01-05,上海,95,65,78,10,38,1.0,72 +2024-01-05,广州,58,35,45,4,20,0.5,90 +2024-01-06,北京,110,72,88,11,42,1.1,68 +2024-01-06,上海,78,50,62,7,28,0.7,80 +2024-01-06,广州,65,40,52,5,24,0.6,86 +2024-01-07,北京,88,58,70,8,35,0.9,75 +2024-01-07,上海,72,45,55,6,26,0.6,82 +2024-01-07,广州,55,32,42,3,18,0.4,92 +2024-01-08,北京,,75,90,12,48,1.3,62 +2024-01-08,上海,80,52,65,,30,0.8,78 +2024-01-08,广州,60,38,50,4,22,,88 +2024-01-09,北京,125,85,100,13,50,1.4,60 +2024-01-09,上海,88,58,72,8,34,0.9,76 +2024-01-09,广州,72,46,60,6,26,0.6,84 +2024-01-10,北京,140,95,115,14,52,1.5,55 +2024-01-10,上海,92,62,75,9,36,1.0,74 +2024-01-10,广州,68,42,55,5,24,0.5,86 + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d3a8558 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +pytest>=7.0.0 +pandas>=2.0.0 +matplotlib>=3.7.0 +seaborn>=0.12.0 +pillow>=9.0.0 + diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/dashboard.py b/src/dashboard.py new file mode 100644 index 0000000..d4534fc --- /dev/null +++ b/src/dashboard.py @@ -0,0 +1,242 @@ +""" +数据可视化仪表板 + +你的任务是实现 DataDashboard 类,从 CSV 文件加载数据,进行分析并生成可视化报告。 + +功能要求: +1. 加载 CSV 数据(处理编码问题) +2. 计算基本统计量 +3. 生成柱状图、折线图、热图等可视化 +4. 生成完整分析报告 + +🎯 核心问题:AI 可以生成图表代码,但"什么图表值得做"、"数据背后有什么故事"——这些需要你来判断。 + +数据集选项: +- air_quality.csv: 空气质量数据(城市、日期、AQI、PM2.5 等) +- ecommerce_sales.csv: 电商销售数据 +- exam_results.csv: 考试成绩数据 + +边界情况处理: +- 缺失值:不能导致绘图崩溃 +- 中文标签:需要配置字体正确显示 +- 异常值:不能完全破坏图表 +- 空数据:空列不导致崩溃 + +中文字体配置提示: +```python +import matplotlib.pyplot as plt +# macOS +plt.rcParams['font.sans-serif'] = ['Arial Unicode MS'] +# Windows +# plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei'] +# Linux +# plt.rcParams['font.sans-serif'] = ['WenQuanYi Micro Hei'] +plt.rcParams['axes.unicode_minus'] = False +``` + +示例用法: +dashboard = DataDashboard("data/air_quality.csv") +stats = dashboard.get_basic_stats() +dashboard.create_bar_chart('城市', 'AQI', title='各城市平均AQI', save_path='bar.png') +dashboard.create_line_chart('日期', 'AQI', title='AQI变化趋势', save_path='line.png') +dashboard.generate_report('output/') +""" + +import pandas as pd +import matplotlib.pyplot as plt +from typing import Dict, List, Optional + + +class DataDashboard: + """ + 数据可视化仪表板 + + 从 CSV 文件加载数据,提供统计分析和可视化功能。 + """ + + def __init__(self, filepath: str): + """ + 初始化并加载数据 + + Args: + filepath: 数据文件路径(CSV) + """ + self.df: pd.DataFrame = None + self.filepath = filepath + self.load_data(filepath) + + def load_data(self, filepath: str) -> bool: + """ + 加载并初步清洗数据 + + 处理要求: + - 自动检测编码(UTF-8 或 GBK) + - 记录缺失值情况 + - 尝试转换日期列为 datetime + + Args: + filepath: CSV 文件路径 + + Returns: + bool: 是否加载成功 + + 提示: + - 先尝试 UTF-8,失败再尝试 GBK + - 可以使用 pd.to_datetime 转换日期列 + """ + # TODO: 在此实现你的代码 + pass + + def get_basic_stats(self) -> Dict: + """ + 计算基本统计量 + + Returns: + { + 'row_count': 1000, + 'column_count': 10, + 'columns': ['col1', 'col2', ...], + 'missing_count': {'col1': 5, 'col2': 10, ...}, + 'numeric_summary': { + 'col1': {'mean': 50, 'std': 10, 'min': 0, 'max': 100}, + ... + } + } + + 注意: + - numeric_summary 只包含数值列 + - missing_count 包含所有列的缺失值数量 + """ + # TODO: 在此实现你的代码 + pass + + def create_bar_chart(self, x_col: str, y_col: str, + title: Optional[str] = None, + aggfunc: str = 'mean', + save_path: Optional[str] = None) -> None: + """ + 生成柱状图 + + Args: + x_col: X 轴列名(分类变量) + y_col: Y 轴列名(数值变量) + title: 图表标题 + aggfunc: 聚合函数('mean', 'sum', 'count') + save_path: 保存路径,如果为 None 则显示图表 + + 示例: + dashboard.create_bar_chart('城市', 'AQI', title='各城市平均AQI') + + 注意: + - 需要配置中文字体 + - 缺失值不应导致崩溃 + """ + # TODO: 在此实现你的代码 + pass + + def create_line_chart(self, x_col: str, y_col: str, + title: Optional[str] = None, + save_path: Optional[str] = None) -> None: + """ + 生成折线趋势图 + + Args: + x_col: X 轴列名(通常是时间) + y_col: Y 轴列名 + title: 图表标题 + save_path: 保存路径 + + 示例: + dashboard.create_line_chart('日期', 'AQI', title='AQI变化趋势') + """ + # TODO: 在此实现你的代码 + pass + + def create_heatmap(self, columns: Optional[List[str]] = None, + title: Optional[str] = None, + save_path: Optional[str] = None) -> None: + """ + 生成相关性热图 + + Args: + columns: 要计算相关性的列,None 表示所有数值列 + title: 图表标题 + save_path: 保存路径 + + 示例: + dashboard.create_heatmap(columns=['AQI', 'PM2.5', 'PM10']) + """ + # TODO: 在此实现你的代码 + pass + + def create_distribution(self, column: str, + bins: int = 20, + title: Optional[str] = None, + save_path: Optional[str] = None) -> None: + """ + 生成分布直方图 + + Args: + column: 列名 + bins: 分箱数量 + title: 图表标题 + save_path: 保存路径 + """ + # TODO: 在此实现你的代码 + pass + + def generate_report(self, output_dir: str) -> Dict: + """ + 生成完整分析报告(多个图表) + + Args: + output_dir: 输出目录 + + Returns: + {'generated_files': ['bar.png', 'line.png', ...]} + + 报告应包含: + - 至少一个柱状图 + - 至少一个折线图 + - 可选:热图、分布图等 + """ + # TODO: 在此实现你的代码 + pass + + +if __name__ == "__main__": + # 测试你的实现 + import os + + # 配置中文字体 + plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans'] + plt.rcParams['axes.unicode_minus'] = False + + # 检查数据文件是否存在 + data_file = "data/air_quality.csv" + if not os.path.exists(data_file): + print(f"请先准备数据文件: {data_file}") + print("可以从作业说明中获取示例数据") + else: + dashboard = DataDashboard(data_file) + + # 测试基本统计 + print("=== 基本统计 ===") + stats = dashboard.get_basic_stats() + print(f"行数: {stats.get('row_count', 'N/A')}") + print(f"列数: {stats.get('column_count', 'N/A')}") + print(f"缺失值: {stats.get('missing_count', {})}") + + # 测试图表生成 + print("\n=== 生成图表 ===") + os.makedirs("output", exist_ok=True) + + dashboard.create_bar_chart( + '城市', 'AQI', + title='各城市平均AQI', + save_path='output/bar_chart.png' + ) + print("✅ 柱状图已生成") + + print("\n✅ 测试完成") + diff --git a/tests/test_public.py b/tests/test_public.py new file mode 100644 index 0000000..08e1f56 --- /dev/null +++ b/tests/test_public.py @@ -0,0 +1,62 @@ +""" +公开测试 - 学生可见 +这些测试帮助你验证基本功能是否正确 +""" + +import pytest +from pathlib import Path +import pandas as pd +from src.dashboard import DataDashboard + + +@pytest.fixture +def sample_csv(tmp_path): + """创建测试用的 CSV 文件""" + content = """日期,城市,AQI,PM2.5 +2024-01-01,北京,120,80 +2024-01-01,上海,85,55 +2024-01-02,北京,100,65 +2024-01-02,上海,90,60 +2024-01-03,北京,150,100 +2024-01-03,上海,75,50""" + p = tmp_path / "test_data.csv" + p.write_text(content, encoding='utf-8') + return str(p) + + +def test_load_csv(sample_csv): + """测试能否成功加载 CSV 文件""" + dashboard = DataDashboard(sample_csv) + + assert dashboard.df is not None + assert len(dashboard.df) == 6 + assert isinstance(dashboard.df, pd.DataFrame) + + +def test_basic_stats(sample_csv): + """测试基本统计功能""" + dashboard = DataDashboard(sample_csv) + stats = dashboard.get_basic_stats() + + assert 'row_count' in stats + assert 'column_count' in stats + assert 'missing_count' in stats + assert stats['row_count'] == 6 + assert stats['column_count'] == 4 + + +def test_bar_chart_save(sample_csv, tmp_path): + """测试柱状图保存""" + dashboard = DataDashboard(sample_csv) + + output_path = tmp_path / "bar_chart.png" + dashboard.create_bar_chart( + x_col='城市', + y_col='AQI', + title='各城市平均AQI', + save_path=str(output_path) + ) + + assert output_path.exists() + assert output_path.stat().st_size > 0 +