@unify.log
def evaluate(
question,
student_answer,
available_marks_total,
markscheme,
correct_marks,
per_question_breakdown,
_system_message,
):
pred_marks = call_agent(
_system_message,
question,
markscheme,
student_answer,
available_marks_total,
)
pred_marks_total = sum([v["marks"] for v in pred_marks.values()])
diff = {
k: vcor["marks"] - vpred["marks"]
for (k, vcor), (_, vpred) in zip(correct_marks.items(), pred_marks.items())
}
error = {k: abs(v) for k, v in diff.items()}
diff_total = sum(diff.values())
error_total = sum(error.values())
per_question_breakdown = {
k: {
**per_question_breakdown[k],
"predicted_marks": pm,
"diff": d,
}
for (k, pqb), pm, d in zip(
per_question_breakdown.items(),
pred_marks.values(),
diff.values(),
)
}
return error_total