From 3dc343684db134a3f274a8ca4ead8fff6a075cf5 Mon Sep 17 00:00:00 2001
From: sit002 <hblu1985@163.com>
Date: Tue, 2 Dec 2025 14:36:33 +0800
Subject: [PATCH] fix: enforce integer scores matching rubric scoring_guide

- Prompt now explicitly requires integer scores (0/1/2/3/4)
- Code rounds any decimal scores to nearest integer
- Prevents LLM from giving 2.5, 3.5 etc.
---
 .autograde/llm_grade.py | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/.autograde/llm_grade.py b/.autograde/llm_grade.py
index 08fdbd9..7d22602 100644
--- a/.autograde/llm_grade.py
+++ b/.autograde/llm_grade.py
@@ -36,15 +36,16 @@ def read_file_or_string(value):
 
 PROMPT_TEMPLATE = """你是严格且一致的助教，按提供的评分量表为学生的简答题评分。
 
-- 只依据量表中各评分项的 max_score 和 scoring_guide 进行评分
-- 每个评分项的分数范围是 0 到该项的 max_score
+评分规则：
+- 严格依据量表中各评分项的 scoring_guide 进行评分
+- 每个评分项只能给出 scoring_guide 中定义的整数分值（如 0, 1, 2, 3, 4）
 - 不输出任何解释性文本；只输出 JSON
 
 输出格式：
 {{
-  "total": number (各项分数之和，保留两位小数),
+  "total": number (各项分数之和),
   "criteria": [
-    {{"id": "评分项id", "score": number(0到该项max_score), "reason": "简短评语"}},
+    {{"id": "评分项id", "score": 整数(必须是scoring_guide中定义的分值), "reason": "简短评语"}},
     ...
   ],
   "flags": [],
@@ -52,7 +53,8 @@ PROMPT_TEMPLATE = """你是严格且一致的助教，按提供的评分量表
 }}
 
 重要：
-- 每个评分项的 score 必须在 0 到该项 max_score 范围内
+- score 必须是整数，只能是 scoring_guide 中定义的分值（如 0/1/2/3/4）
+- 不要给出 2.5, 3.5 这样的中间值
 - total 必须等于所有 criteria 的 score 之和
 - 如果答案与题目无关或为空，total=0，并加 flag "need_review"
 
@@ -175,11 +177,17 @@ def main():
                 "confidence": 0.0
             }
     
-    # 重新计算 total（不信任 LLM 返回的 total，使用各项得分之和）
+    # 确保各项分数是整数，并重新计算 total
     criteria = resp.get("criteria", [])
     if criteria:
-        calculated_total = sum(float(c.get("score", 0)) for c in criteria)
-        resp["total"] = round(calculated_total, 2)
+        for c in criteria:
+            # 将分数四舍五入为整数（LLM 可能返回小数）
+            score = c.get("score", 0)
+            c["score"] = round(float(score))
+        
+        # 重新计算 total（各项得分之和）
+        calculated_total = sum(c.get("score", 0) for c in criteria)
+        resp["total"] = calculated_total
     
     # 边界带自动送审
     try: