Initial commit
All checks were successful
autograde-assignment-04-visualization / check-trigger (push) Successful in 2s
autograde-assignment-04-visualization / grade (push) Has been skipped

This commit is contained in:
hblu 2025-12-07 05:34:33 +08:00
commit dca2f421c2
8 changed files with 662 additions and 0 deletions

View File

@ -0,0 +1,238 @@
name: autograde-assignment-04-visualization
on:
push:
branches:
- main
tags:
- 'submit'
- 'submit-*'
workflow_dispatch:
permissions:
contents: read
pull-requests: write
jobs:
check-trigger:
runs-on: docker
container:
image: alpine:latest
outputs:
should_run: ${{ steps.check.outputs.trigger }}
steps:
- name: Check commit message for trigger keyword
id: check
run: |
COMMIT_MSG="${{ github.event.head_commit.message || '' }}"
echo "Commit message: $COMMIT_MSG"
if echo "$COMMIT_MSG" | grep -q "完成作业"; then
echo "trigger=true" >> $GITHUB_OUTPUT
echo "✅ Commit contains \"完成作业\",即将执行评分"
else
echo "trigger=false" >> $GITHUB_OUTPUT
echo "⛔ 只有包含"完成作业"的提交才会执行自动评分" >&2
fi
grade:
needs: check-trigger
if: needs.check-trigger.outputs.should_run == 'true'
runs-on: docker
container:
image: python:3.11
options: --user root
timeout-minutes: 20
steps:
- name: Configure APT mirror (Aliyun)
run: |
set -e
for f in /etc/apt/sources.list /etc/apt/sources.list.d/*.list /etc/apt/sources.list.d/*.sources; do
[ -f "$f" ] || continue
sed -i -E 's|https?://deb.debian.org|http://mirrors.aliyun.com|g' "$f" || true
sed -i -E 's|https?://security.debian.org|http://mirrors.aliyun.com/debian-security|g' "$f" || true
sed -i -E 's|https?://archive.ubuntu.com|http://mirrors.aliyun.com|g' "$f" || true
sed -i -E 's|https?://ports.ubuntu.com|http://mirrors.aliyun.com|g' "$f" || true
done
apt-get -o Acquire::Check-Valid-Until=false update -y
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends git ca-certificates python3-pip rsync fonts-noto-cjk fonts-wqy-microhei
rm -rf /var/lib/apt/lists/*
- name: Checkout code
env:
GITHUB_TOKEN: ${{ github.token }}
run: |
git config --global --add safe.directory ${{ github.workspace }}
git init
REPO_URL="${{ github.server_url }}/${{ github.repository }}.git"
AUTH_URL=$(echo "$REPO_URL" | sed "s|://|://${GITHUB_TOKEN}@|")
git remote add origin "$AUTH_URL"
git fetch --depth=1 origin ${{ github.sha }}
git checkout ${{ github.sha }}
- name: Fix permissions
run: chown -R $(whoami):$(whoami) ${{ github.workspace }} || true
- name: Fetch hidden tests and grading scripts
working-directory: ${{ github.workspace }}
env:
EXTERNAL_GITEA_HOST: ${{ secrets.EXTERNAL_GITEA_HOST }}
run: |
set -e
TESTS_USERNAME="${RUNNER_TESTS_USERNAME:-}"
TESTS_TOKEN="${RUNNER_TESTS_TOKEN:-}"
if [ -z "$TESTS_TOKEN" ] || [ -z "$TESTS_USERNAME" ]; then
echo "❌ RUNNER_TESTS_USERNAME / RUNNER_TESTS_TOKEN not set!"
exit 1
fi
# Resolve host
if [ -n "$EXTERNAL_GITEA_HOST" ]; then
HOST="$EXTERNAL_GITEA_HOST"
elif [ -n "$GITEA_ROOT_URL" ]; then
HOST=$(echo "$GITEA_ROOT_URL" | sed 's|https\?://||' | sed 's|/$||')
else
HOST=$(echo "${{ github.server_url }}" | sed 's|https\?://||' | cut -d'/' -f1)
fi
ORG=$(echo "${{ github.repository }}" | cut -d'/' -f1)
REPO_NAME=$(echo "${{ github.repository }}" | cut -d'/' -f2)
if echo "$REPO_NAME" | grep -q -- '-stu_'; then
ASSIGNMENT_ID=$(echo "$REPO_NAME" | sed 's/-stu_.*//')
elif echo "$REPO_NAME" | grep -q -- '-template'; then
ASSIGNMENT_ID=$(echo "$REPO_NAME" | sed 's/-template.*//')
else
ASSIGNMENT_ID="assignment-04-visualization"
fi
echo "📥 Fetching tests from ${ORG}/${ASSIGNMENT_ID}-tests..."
AUTH_URL="http://${TESTS_USERNAME}:${TESTS_TOKEN}@${HOST}/${ORG}/${ASSIGNMENT_ID}-tests.git"
git -c http.sslVerify=false clone --depth=1 "$AUTH_URL" _priv_tests
rm -rf .autograde
mkdir -p .autograde
cp _priv_tests/autograde/*.py .autograde/
cp _priv_tests/autograde/*.sh .autograde/ 2>/dev/null || true
# Copy metadata scripts if available
if [ -f "_priv_tests/autograde/create_minimal_metadata.py" ]; then
cp _priv_tests/autograde/create_minimal_metadata.py .autograde/ 2>/dev/null || true
fi
if [ -f "_priv_tests/autograde/upload_metadata.py" ]; then
cp _priv_tests/autograde/upload_metadata.py .autograde/ 2>/dev/null || true
fi
# Copy Python tests
if [ -d "_priv_tests/python" ]; then
mkdir -p tests
rsync -a _priv_tests/python/ tests/
echo "✅ Private tests copied"
fi
# Copy test groups
if [ -f "_priv_tests/test_groups.json" ]; then
cp _priv_tests/test_groups.json .
fi
# Copy LLM rubrics
if [ -d "_priv_tests/llm" ]; then
mkdir -p .llm_rubrics
cp _priv_tests/llm/*.json .llm_rubrics/ 2>/dev/null || true
fi
rm -rf _priv_tests
- name: Install Python dependencies
run: |
pip config set global.index-url https://mirrors.aliyun.com/pypi/simple
pip install --no-cache-dir -r requirements.txt
# 安装评分脚本依赖
pip install --no-cache-dir pytest requests python-dotenv
- name: Run tests
working-directory: ${{ github.workspace }}
run: |
mkdir -p test-results
export PYTHONPATH="$(pwd):${PYTHONPATH}"
echo "📋 Tests to be executed:"
find tests -name "test_*.py" -type f 2>/dev/null || echo "No test files found"
pytest tests/ -v --junitxml=test-results/junit.xml || true
echo "📊 JUnit report generated"
- name: Grade programming tests
run: |
python ./.autograde/grade_grouped.py \
--junit-dir test-results \
--groups test_groups.json \
--out grade.json \
--summary summary.md
- name: Grade REPORT.md
run: |
if [ -f REPORT.md ] && [ -f .llm_rubrics/rubric_report.json ]; then
python ./.autograde/llm_grade.py \
--question "请评估这份反思报告" \
--answer REPORT.md \
--rubric .llm_rubrics/rubric_report.json \
--out report_grade.json \
--summary report_summary.md
echo "✅ REPORT.md graded"
else
echo '{"total": 0, "flags": ["missing_file"]}' > report_grade.json
echo "⚠️ REPORT.md or rubric not found"
fi
- name: Aggregate grades
run: |
python ./.autograde/aggregate_grade.py \
--programming grade.json \
--report report_grade.json \
--out final_grade.json \
--summary final_summary.md
- name: Create metadata
working-directory: ${{ github.workspace }}
env:
REPO: ${{ github.repository }}
LANGUAGE: python
run: |
if [ -f final_grade.json ]; then
# Use final grade type for aggregated grades
export GRADE_TYPE=final
export GRADE_FILE=final_grade.json
if [ -f .autograde/create_minimal_metadata.py ]; then
python ./.autograde/create_minimal_metadata.py > metadata.json || echo "{}" > metadata.json
else
echo "⚠️ create_minimal_metadata.py not found, skipping metadata creation"
echo "{}" > metadata.json
fi
fi
- name: Upload metadata
if: env.RUNNER_METADATA_TOKEN != ''
working-directory: ${{ github.workspace }}
env:
# 使用当前组织的 course-metadata 仓库
METADATA_REPO: ${{ github.repository_owner }}/course-metadata
METADATA_TOKEN: ${{ env.RUNNER_METADATA_TOKEN }}
METADATA_BRANCH: ${{ env.RUNNER_METADATA_BRANCH }}
STUDENT_REPO: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
COMMIT_SHA: ${{ github.sha }}
SERVER_URL: ${{ github.server_url }}
run: |
if [ -f metadata.json ] && [ -f .autograde/upload_metadata.py ]; then
python ./.autograde/upload_metadata.py \
--metadata-file metadata.json \
--metadata-repo "${METADATA_REPO}" \
--branch "${METADATA_BRANCH:-main}" \
--student-repo "${STUDENT_REPO}" \
--run-id "${RUN_ID}" \
--commit-sha "${COMMIT_SHA}" \
--workflow grade \
--server-url "${SERVER_URL}" \
--external-host "${EXTERNAL_GITEA_HOST}"
else
echo "⚠️ metadata.json or upload_metadata.py not found, skipping upload"
fi

27
README.md Normal file
View File

@ -0,0 +1,27 @@
# 作业 4数据可视化仪表板
## 任务
- 在 `src/dashboard.py` 中完成 `DataDashboard` 类:实现数据加载、统计分析、图表生成等功能。
- 从数据中发现规律,生成有意义的可视化报告。
- 通过公开测试与隐藏测试;提交 `REPORT.md` 反思报告。
🎯 **重点**:不只是画图,而是从数据中发现规律。
## 环境与依赖
- Python 3.11+
- 安装依赖:`pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple`
## 本地运行
```bash
python -m pytest -v
```
## 提交要求
- 提交信息需包含关键字"完成作业"以触发评分。
- 确保 `REPORT.md` 已填写,特别是"数据发现"部分。
## 评分构成(总分 20
- Core 测试10 分
- Edge 测试4 分
- REPORT.md6 分

55
REPORT.md Normal file
View File

@ -0,0 +1,55 @@
# 作业 4 反思报告
## 1. 数据发现重点3分
你从数据中发现了什么?不是"我画了什么图",而是"我发现了什么"。
### 发现 1[用一句话描述你的发现]
- **现象**:具体描述你观察到的现象
- **数据支撑**:用具体数字或图表说明
- **可能原因**:你对这个现象的解释或猜测
- **价值**:这个发现有什么用?谁会关心?
> [在此处回答]
### 发现 2[用一句话描述你的发现]
- **现象**...
- **数据支撑**...
- **可能原因**...
- **价值**...
> [在此处回答]
## 2. 图表选择的思考
你选择了哪些类型的图表?为什么?
- 为什么用柱状图而不是饼图?
- 为什么用折线图而不是散点图?
- 你放弃了哪些图表?为什么?
> [在此处回答]
## 3. AI 图表的问题
AI 生成的图表代码,有什么问题?
### 问题 1[问题描述]
- AI 原代码的行为:
- 问题所在:
- 你的修改:
> [在此处回答]
## 4. 从"画图"到"讲故事"
如果你要用这些图表给领导/客户做汇报,你会如何组织?
- 先展示什么?后展示什么?
- 每张图要传达什么信息?
- 哪些细节需要强调?
> [在此处回答]

32
data/air_quality.csv Normal file
View File

@ -0,0 +1,32 @@
日期,城市,AQI,PM2.5,PM10,SO2,NO2,CO,O3
2024-01-01,北京,120,80,95,12,45,1.2,65
2024-01-01,上海,85,55,68,8,32,0.8,78
2024-01-01,广州,75,48,62,6,28,0.7,82
2024-01-02,北京,100,65,78,10,40,1.0,70
2024-01-02,上海,90,60,72,9,35,0.9,75
2024-01-02,广州,68,42,55,5,25,0.6,85
2024-01-03,北京,150,100,120,15,55,1.5,55
2024-01-03,上海,75,50,60,7,30,0.7,80
2024-01-03,广州,62,38,48,4,22,0.5,88
2024-01-04,北京,95,62,75,9,38,0.9,72
2024-01-04,上海,82,52,65,7,30,0.8,78
2024-01-04,广州,70,45,58,5,26,0.6,84
2024-01-05,北京,180,130,150,18,65,1.8,48
2024-01-05,上海,95,65,78,10,38,1.0,72
2024-01-05,广州,58,35,45,4,20,0.5,90
2024-01-06,北京,110,72,88,11,42,1.1,68
2024-01-06,上海,78,50,62,7,28,0.7,80
2024-01-06,广州,65,40,52,5,24,0.6,86
2024-01-07,北京,88,58,70,8,35,0.9,75
2024-01-07,上海,72,45,55,6,26,0.6,82
2024-01-07,广州,55,32,42,3,18,0.4,92
2024-01-08,北京,,75,90,12,48,1.3,62
2024-01-08,上海,80,52,65,,30,0.8,78
2024-01-08,广州,60,38,50,4,22,,88
2024-01-09,北京,125,85,100,13,50,1.4,60
2024-01-09,上海,88,58,72,8,34,0.9,76
2024-01-09,广州,72,46,60,6,26,0.6,84
2024-01-10,北京,140,95,115,14,52,1.5,55
2024-01-10,上海,92,62,75,9,36,1.0,74
2024-01-10,广州,68,42,55,5,24,0.5,86
1 日期 城市 AQI PM2.5 PM10 SO2 NO2 CO O3
2 2024-01-01 北京 120 80 95 12 45 1.2 65
3 2024-01-01 上海 85 55 68 8 32 0.8 78
4 2024-01-01 广州 75 48 62 6 28 0.7 82
5 2024-01-02 北京 100 65 78 10 40 1.0 70
6 2024-01-02 上海 90 60 72 9 35 0.9 75
7 2024-01-02 广州 68 42 55 5 25 0.6 85
8 2024-01-03 北京 150 100 120 15 55 1.5 55
9 2024-01-03 上海 75 50 60 7 30 0.7 80
10 2024-01-03 广州 62 38 48 4 22 0.5 88
11 2024-01-04 北京 95 62 75 9 38 0.9 72
12 2024-01-04 上海 82 52 65 7 30 0.8 78
13 2024-01-04 广州 70 45 58 5 26 0.6 84
14 2024-01-05 北京 180 130 150 18 65 1.8 48
15 2024-01-05 上海 95 65 78 10 38 1.0 72
16 2024-01-05 广州 58 35 45 4 20 0.5 90
17 2024-01-06 北京 110 72 88 11 42 1.1 68
18 2024-01-06 上海 78 50 62 7 28 0.7 80
19 2024-01-06 广州 65 40 52 5 24 0.6 86
20 2024-01-07 北京 88 58 70 8 35 0.9 75
21 2024-01-07 上海 72 45 55 6 26 0.6 82
22 2024-01-07 广州 55 32 42 3 18 0.4 92
23 2024-01-08 北京 75 90 12 48 1.3 62
24 2024-01-08 上海 80 52 65 30 0.8 78
25 2024-01-08 广州 60 38 50 4 22 88
26 2024-01-09 北京 125 85 100 13 50 1.4 60
27 2024-01-09 上海 88 58 72 8 34 0.9 76
28 2024-01-09 广州 72 46 60 6 26 0.6 84
29 2024-01-10 北京 140 95 115 14 52 1.5 55
30 2024-01-10 上海 92 62 75 9 36 1.0 74
31 2024-01-10 广州 68 42 55 5 24 0.5 86

6
requirements.txt Normal file
View File

@ -0,0 +1,6 @@
pytest>=7.0.0
pandas>=2.0.0
matplotlib>=3.7.0
seaborn>=0.12.0
pillow>=9.0.0

0
src/__init__.py Normal file
View File

242
src/dashboard.py Normal file
View File

@ -0,0 +1,242 @@
"""
数据可视化仪表板
你的任务是实现 DataDashboard CSV 文件加载数据进行分析并生成可视化报告
功能要求
1. 加载 CSV 数据处理编码问题
2. 计算基本统计量
3. 生成柱状图折线图热图等可视化
4. 生成完整分析报告
🎯 核心问题AI 可以生成图表代码"什么图表值得做""数据背后有什么故事"这些需要你来判断
数据集选项
- air_quality.csv: 空气质量数据城市日期AQIPM2.5
- ecommerce_sales.csv: 电商销售数据
- exam_results.csv: 考试成绩数据
边界情况处理
- 缺失值不能导致绘图崩溃
- 中文标签需要配置字体正确显示
- 异常值不能完全破坏图表
- 空数据空列不导致崩溃
中文字体配置提示
```python
import matplotlib.pyplot as plt
# macOS
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
# Windows
# plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei']
# Linux
# plt.rcParams['font.sans-serif'] = ['WenQuanYi Micro Hei']
plt.rcParams['axes.unicode_minus'] = False
```
示例用法
dashboard = DataDashboard("data/air_quality.csv")
stats = dashboard.get_basic_stats()
dashboard.create_bar_chart('城市', 'AQI', title='各城市平均AQI', save_path='bar.png')
dashboard.create_line_chart('日期', 'AQI', title='AQI变化趋势', save_path='line.png')
dashboard.generate_report('output/')
"""
import pandas as pd
import matplotlib.pyplot as plt
from typing import Dict, List, Optional
class DataDashboard:
"""
数据可视化仪表板
CSV 文件加载数据提供统计分析和可视化功能
"""
def __init__(self, filepath: str):
"""
初始化并加载数据
Args:
filepath: 数据文件路径CSV
"""
self.df: pd.DataFrame = None
self.filepath = filepath
self.load_data(filepath)
def load_data(self, filepath: str) -> bool:
"""
加载并初步清洗数据
处理要求
- 自动检测编码UTF-8 GBK
- 记录缺失值情况
- 尝试转换日期列为 datetime
Args:
filepath: CSV 文件路径
Returns:
bool: 是否加载成功
提示
- 先尝试 UTF-8失败再尝试 GBK
- 可以使用 pd.to_datetime 转换日期列
"""
# TODO: 在此实现你的代码
pass
def get_basic_stats(self) -> Dict:
"""
计算基本统计量
Returns:
{
'row_count': 1000,
'column_count': 10,
'columns': ['col1', 'col2', ...],
'missing_count': {'col1': 5, 'col2': 10, ...},
'numeric_summary': {
'col1': {'mean': 50, 'std': 10, 'min': 0, 'max': 100},
...
}
}
注意
- numeric_summary 只包含数值列
- missing_count 包含所有列的缺失值数量
"""
# TODO: 在此实现你的代码
pass
def create_bar_chart(self, x_col: str, y_col: str,
title: Optional[str] = None,
aggfunc: str = 'mean',
save_path: Optional[str] = None) -> None:
"""
生成柱状图
Args:
x_col: X 轴列名分类变量
y_col: Y 轴列名数值变量
title: 图表标题
aggfunc: 聚合函数'mean', 'sum', 'count'
save_path: 保存路径如果为 None 则显示图表
示例
dashboard.create_bar_chart('城市', 'AQI', title='各城市平均AQI')
注意
- 需要配置中文字体
- 缺失值不应导致崩溃
"""
# TODO: 在此实现你的代码
pass
def create_line_chart(self, x_col: str, y_col: str,
title: Optional[str] = None,
save_path: Optional[str] = None) -> None:
"""
生成折线趋势图
Args:
x_col: X 轴列名通常是时间
y_col: Y 轴列名
title: 图表标题
save_path: 保存路径
示例
dashboard.create_line_chart('日期', 'AQI', title='AQI变化趋势')
"""
# TODO: 在此实现你的代码
pass
def create_heatmap(self, columns: Optional[List[str]] = None,
title: Optional[str] = None,
save_path: Optional[str] = None) -> None:
"""
生成相关性热图
Args:
columns: 要计算相关性的列None 表示所有数值列
title: 图表标题
save_path: 保存路径
示例
dashboard.create_heatmap(columns=['AQI', 'PM2.5', 'PM10'])
"""
# TODO: 在此实现你的代码
pass
def create_distribution(self, column: str,
bins: int = 20,
title: Optional[str] = None,
save_path: Optional[str] = None) -> None:
"""
生成分布直方图
Args:
column: 列名
bins: 分箱数量
title: 图表标题
save_path: 保存路径
"""
# TODO: 在此实现你的代码
pass
def generate_report(self, output_dir: str) -> Dict:
"""
生成完整分析报告多个图表
Args:
output_dir: 输出目录
Returns:
{'generated_files': ['bar.png', 'line.png', ...]}
报告应包含
- 至少一个柱状图
- 至少一个折线图
- 可选热图分布图等
"""
# TODO: 在此实现你的代码
pass
if __name__ == "__main__":
# 测试你的实现
import os
# 配置中文字体
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
# 检查数据文件是否存在
data_file = "data/air_quality.csv"
if not os.path.exists(data_file):
print(f"请先准备数据文件: {data_file}")
print("可以从作业说明中获取示例数据")
else:
dashboard = DataDashboard(data_file)
# 测试基本统计
print("=== 基本统计 ===")
stats = dashboard.get_basic_stats()
print(f"行数: {stats.get('row_count', 'N/A')}")
print(f"列数: {stats.get('column_count', 'N/A')}")
print(f"缺失值: {stats.get('missing_count', {})}")
# 测试图表生成
print("\n=== 生成图表 ===")
os.makedirs("output", exist_ok=True)
dashboard.create_bar_chart(
'城市', 'AQI',
title='各城市平均AQI',
save_path='output/bar_chart.png'
)
print("✅ 柱状图已生成")
print("\n✅ 测试完成")

62
tests/test_public.py Normal file
View File

@ -0,0 +1,62 @@
"""
公开测试 - 学生可见
这些测试帮助你验证基本功能是否正确
"""
import pytest
from pathlib import Path
import pandas as pd
from src.dashboard import DataDashboard
@pytest.fixture
def sample_csv(tmp_path):
"""创建测试用的 CSV 文件"""
content = """日期,城市,AQI,PM2.5
2024-01-01,北京,120,80
2024-01-01,上海,85,55
2024-01-02,北京,100,65
2024-01-02,上海,90,60
2024-01-03,北京,150,100
2024-01-03,上海,75,50"""
p = tmp_path / "test_data.csv"
p.write_text(content, encoding='utf-8')
return str(p)
def test_load_csv(sample_csv):
"""测试能否成功加载 CSV 文件"""
dashboard = DataDashboard(sample_csv)
assert dashboard.df is not None
assert len(dashboard.df) == 6
assert isinstance(dashboard.df, pd.DataFrame)
def test_basic_stats(sample_csv):
"""测试基本统计功能"""
dashboard = DataDashboard(sample_csv)
stats = dashboard.get_basic_stats()
assert 'row_count' in stats
assert 'column_count' in stats
assert 'missing_count' in stats
assert stats['row_count'] == 6
assert stats['column_count'] == 4
def test_bar_chart_save(sample_csv, tmp_path):
"""测试柱状图保存"""
dashboard = DataDashboard(sample_csv)
output_path = tmp_path / "bar_chart.png"
dashboard.create_bar_chart(
x_col='城市',
y_col='AQI',
title='各城市平均AQI',
save_path=str(output_path)
)
assert output_path.exists()
assert output_path.stat().st_size > 0