From 3dc343684db134a3f274a8ca4ead8fff6a075cf5 Mon Sep 17 00:00:00 2001 From: sit002 Date: Tue, 2 Dec 2025 14:36:33 +0800 Subject: [PATCH] fix: enforce integer scores matching rubric scoring_guide - Prompt now explicitly requires integer scores (0/1/2/3/4) - Code rounds any decimal scores to nearest integer - Prevents LLM from giving 2.5, 3.5 etc. --- .autograde/llm_grade.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/.autograde/llm_grade.py b/.autograde/llm_grade.py index 08fdbd9..7d22602 100644 --- a/.autograde/llm_grade.py +++ b/.autograde/llm_grade.py @@ -36,15 +36,16 @@ def read_file_or_string(value): PROMPT_TEMPLATE = """你是严格且一致的助教,按提供的评分量表为学生的简答题评分。 -- 只依据量表中各评分项的 max_score 和 scoring_guide 进行评分 -- 每个评分项的分数范围是 0 到该项的 max_score +评分规则: +- 严格依据量表中各评分项的 scoring_guide 进行评分 +- 每个评分项只能给出 scoring_guide 中定义的整数分值(如 0, 1, 2, 3, 4) - 不输出任何解释性文本;只输出 JSON 输出格式: {{ - "total": number (各项分数之和,保留两位小数), + "total": number (各项分数之和), "criteria": [ - {{"id": "评分项id", "score": number(0到该项max_score), "reason": "简短评语"}}, + {{"id": "评分项id", "score": 整数(必须是scoring_guide中定义的分值), "reason": "简短评语"}}, ... ], "flags": [], @@ -52,7 +53,8 @@ PROMPT_TEMPLATE = """你是严格且一致的助教,按提供的评分量表 }} 重要: -- 每个评分项的 score 必须在 0 到该项 max_score 范围内 +- score 必须是整数,只能是 scoring_guide 中定义的分值(如 0/1/2/3/4) +- 不要给出 2.5, 3.5 这样的中间值 - total 必须等于所有 criteria 的 score 之和 - 如果答案与题目无关或为空,total=0,并加 flag "need_review" @@ -175,11 +177,17 @@ def main(): "confidence": 0.0 } - # 重新计算 total(不信任 LLM 返回的 total,使用各项得分之和) + # 确保各项分数是整数,并重新计算 total criteria = resp.get("criteria", []) if criteria: - calculated_total = sum(float(c.get("score", 0)) for c in criteria) - resp["total"] = round(calculated_total, 2) + for c in criteria: + # 将分数四舍五入为整数(LLM 可能返回小数) + score = c.get("score", 0) + c["score"] = round(float(score)) + + # 重新计算 total(各项得分之和) + calculated_total = sum(c.get("score", 0) for c in criteria) + resp["total"] = calculated_total # 边界带自动送审 try: