| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import re |
| | from typing import Dict, List |
| |
|
| | from mathruler.grader import extract_boxed_content, grade_answer |
| |
|
| |
|
| | def format_reward(predict: str) -> float: |
| | pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL) |
| | format_match = re.fullmatch(pattern, predict) |
| | return 1.0 if format_match else 0.0 |
| |
|
| |
|
| | def accuracy_reward(predict: str, ground_truth: str) -> float: |
| | answer = extract_boxed_content(predict) |
| | return 1.0 if grade_answer(answer, ground_truth) else 0.0 |
| |
|
| |
|
| | def compute_score(predicts: List[str], ground_truths: List[str], format_weight: float = 0.1) -> List[Dict[str, float]]: |
| | scores = [] |
| | for predict, ground_truth in zip(predicts, ground_truths): |
| | predict = re.sub(r"\s*(<|>|/)\s*", r"\1", predict) |
| | format_score = format_reward(predict) |
| | accuracy_score = accuracy_reward(predict, ground_truth) |
| | scores.append( |
| | { |
| | "overall": (1 - format_weight) * accuracy_score + format_weight * format_score, |
| | "format": format_score, |
| | "accuracy": accuracy_score, |
| | } |
| | ) |
| |
|
| | return scores |
| |
|