| | import os |
| | import json |
| | import random |
| | import json |
| | import os |
| | import numpy as np |
| | from pathlib import Path |
| | from typing import Iterable, Union, Any |
| |
|
| | from examples import get_examples |
| |
|
| |
|
| | def set_seed(seed: int = 42) -> None: |
| | np.random.seed(seed) |
| | random.seed(seed) |
| | os.environ["PYTHONHASHSEED"] = str(seed) |
| | print(f"Random seed set as {seed}") |
| |
|
| |
|
| | def load_jsonl(file: Union[str, Path]) -> Iterable[Any]: |
| | with open(file, "r", encoding="utf-8") as f: |
| | for line in f: |
| | try: |
| | yield json.loads(line) |
| | except: |
| | print("Error in loading:", line) |
| | exit() |
| |
|
| |
|
| | def save_jsonl(samples, save_path): |
| | |
| | folder = os.path.dirname(save_path) |
| | os.makedirs(folder, exist_ok=True) |
| |
|
| | with open(save_path, "w", encoding="utf-8") as f: |
| | for sample in samples: |
| | f.write(json.dumps(sample, ensure_ascii=False) + "\n") |
| | print("Saved to", save_path) |
| |
|
| |
|
| | def lower_keys(example): |
| | new_example = {} |
| | for key, value in example.items(): |
| | if key != key.lower(): |
| | new_key = key.lower() |
| | new_example[new_key] = value |
| | else: |
| | new_example[key] = value |
| | return new_example |
| |
|
| |
|
| | EXAMPLES = get_examples() |
| |
|
| |
|
| | def load_prompt(data_name, prompt_type, num_shots): |
| | if not num_shots: |
| | return [] |
| |
|
| | if data_name in ["gsm_hard", "svamp", "tabmwp", "asdiv", "mawps"]: |
| | data_name = "gsm8k" |
| | if data_name in ["math_oai", "hungarian_exam", "math-oai", "aime24", "amc23"]: |
| | data_name = "math" |
| | if data_name in ["sat_math"]: |
| | data_name = "mmlu_stem" |
| | if data_name in [ |
| | "gaokao2024_I", |
| | "gaokao2024_II", |
| | "gaokao_math_qa", |
| | "gaokao2024_mix", |
| | "cn_middle_school", |
| | ]: |
| | data_name = "gaokao" |
| |
|
| | if prompt_type in ["tool-integrated"]: |
| | prompt_type = "tora" |
| |
|
| | return EXAMPLES[data_name][:num_shots] |
| |
|
| |
|
| | PROMPT_TEMPLATES = { |
| | "direct": ("Question: {input}\nAnswer: ", "{output}", "\n\n"), |
| | "cot": ("Question: {input}\nAnswer: ", "{output}", "\n\n\n"), |
| | "pal": ("Question: {input}\n\n", "{output}", "\n---\n"), |
| | "tool-integrated": ("Question: {input}\n\nSolution:\n", "{output}", "\n---\n"), |
| | "self-instruct": ("<|user|>\n{input}\n<|assistant|>\n", "{output}", "\n"), |
| | "tora": ("<|user|>\n{input}\n<|assistant|>\n", "{output}", "\n"), |
| | "wizard_zs": ( |
| | "### Instruction:\n{input}\n\n### Response: Let's think step by step.", |
| | "{output}", |
| | "\n\n\n", |
| | ), |
| | "platypus_fs": ( |
| | "### Instruction:\n{input}\n\n### Response:\n", |
| | "{output}", |
| | "\n\n\n", |
| | ), |
| | "deepseek-math": ( |
| | "User: {input}\nPlease reason step by step, " |
| | "and put your final answer within \\boxed{{}}.\n\nAssistant:", |
| | "{output}", |
| | "\n\n\n", |
| | ), |
| | "kpmath": ( |
| | "User: Please reason step by step and put your final answer at the end " |
| | 'with "The answer is: ".\n\n{input}\n\nAssistant:', |
| | "{output}", |
| | ), |
| | "jiuzhang": ( |
| | "## Question\n{input}\n\n## Solution\n", |
| | "{output}", |
| | "\n\n\n", |
| | ), |
| | "jiuzhang_tora": ( |
| | "## Question\n{input}\n\n## Code Solution\n", |
| | "{output}", |
| | "\n\n\n", |
| | ), |
| | "jiuzhang_nl": ( |
| | "## Question\n{input}\n\n## Natural Language Solution\n", |
| | "{output}", |
| | "\n\n\n", |
| | ), |
| | "mmiqc": ( |
| | 'Please solve the following problem and put your answer at the end with "The answer is: ".\n\n{input}\n\n', |
| | "{output}", |
| | "\n\n\n", |
| | ), |
| | "abel": ( |
| | "Question:\n{input}\nAnswer:\nLet's think step by step.\n", |
| | "{output}", |
| | "\n\n", |
| | ), |
| | "shepherd": ("{input}\n", "{output}", "\n\n\n"), |
| | "qwen-boxed": ( |
| | "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" |
| | "<|im_start|>user\n{input}\nPlease reason step by step, and put your final answer within \\boxed{{}}.<|im_end|>\n" |
| | "<|im_start|>assistant\n", |
| | "{output}", |
| | "\n\n", |
| | ), |
| | "qwen25-math-cot": ( |
| | "<|im_start|>system\nPlease reason step by step, and put your final answer within \\boxed{{}}.<|im_end|>\n" |
| | "<|im_start|>user\n{input}<|im_end|>\n" |
| | "<|im_start|>assistant\n", |
| | "{output}", |
| | "\n\n", |
| | ), |
| | "deepseek3": ( |
| | "<|User|>{input}<|Assistant|>", |
| | "{output}", |
| | "\n\n" |
| | ), |
| | "mathstral": ( |
| | "{input}\nPlease reason step by step, and put your final answer within \\boxed{{}}.", |
| | "{output}", |
| | "\n\n", |
| | ), |
| | "internlm-math-fs": ("Question:{input}\nAnswer:", "{output}", "\n"), |
| | "internlm-math-chat": ( |
| | "<|im_start|>user\n{input}<|im_end|>\n" "<|im_start|>assistant\n", |
| | "{output}", |
| | "\n\n", |
| | ), |
| | "mistral": ( |
| | "[INST] {input}[/INST]", |
| | "{output}", |
| | "\n\n", |
| | ), |
| | "numina": ("### Problem: {input}\n### Solution:", " {output}", "\n\n"), |
| | } |
| |
|
| |
|
| | def construct_prompt(example, data_name, args): |
| | if args.adapt_few_shot and data_name in [ |
| | "gaokao2024_I", |
| | "gaokao2024_II", |
| | "gaokao_math_qa", |
| | "gaokao2024_mix", |
| | "cn_middle_school", |
| | ]: |
| | demos = load_prompt(data_name, args.prompt_type, 5) |
| | else: |
| | demos = load_prompt(data_name, args.prompt_type, args.num_shots) |
| | prompt_type = args.prompt_type |
| | if prompt_type == "platypus_fs": |
| | prompt_type = "cot" |
| | if prompt_type == "tool-integrated": |
| | prompt_type = "tora" |
| |
|
| | prompt_temp = PROMPT_TEMPLATES[args.prompt_type] |
| |
|
| | splitter = prompt_temp[2] |
| | input_template, output_template, splitter = ( |
| | prompt_temp[0], |
| | prompt_temp[1], |
| | prompt_temp[2], |
| | ) |
| | if args.prompt_type == "qwen25-math-cot": |
| | |
| | demo_prompt = splitter.join([q + "\n" + a for q, a in demos]) |
| | else: |
| | demo_prompt = splitter.join( |
| | [ |
| | input_template.format(input=q) + output_template.format(output=a) |
| | for q, a in demos |
| | ] |
| | ) |
| | context = input_template.format(input=example["question"]) |
| | if len(demo_prompt) == 0 or ( |
| | args.adapt_few_shot and example["gt_ans"] not in ["A", "B", "C", "D", "E"] |
| | ): |
| | full_prompt = context |
| | else: |
| | if args.prompt_type == "qwen25-math-cot": |
| | |
| | full_prompt = demo_prompt + splitter + example["question"] |
| | full_prompt = input_template.format(input=full_prompt) |
| | else: |
| | full_prompt = demo_prompt + splitter + context |
| |
|
| | if args.prompt_type == "platypus_fs": |
| | full_prompt_temp = ( |
| | "Below is an instruction that describes a task. " |
| | "Write a response that appropriately completes the request.\n\n" |
| | "### Instruction:\n{instruction}\n\n### Response:\n" |
| | ) |
| | full_prompt = full_prompt_temp.format(instruction=full_prompt) |
| |
|
| | if prompt_type == "tora": |
| | full_prompt = ( |
| | """Integrate step-by-step reasoning and Python code to solve math problems using the following guidelines: |
| | |
| | - Analyze the question and write functions to solve the problem; the function should not take any arguments. |
| | - Present the final result in LaTeX using a `\boxed{}` without any units. |
| | - Utilize the `pi` symbol and `Rational`` from Sympy for $\pi$ and fractions, and simplify all fractions and square roots without converting them to decimal values. |
| | |
| | Here are some examples you may refer to: |
| | |
| | --- |
| | |
| | """ |
| | + full_prompt |
| | ) |
| |
|
| | return full_prompt.strip(" ") |
| |
|
| |
|
| | key_map = { |
| | "gt": "Ground Truth", |
| | "pred": "Prediction", |
| | "gt_cot": "Reference CoT", |
| | "score": "Score", |
| | } |
| |
|
| |
|
| | def show_sample(sample, print_all_preds=False): |
| | print("==" * 20) |
| | for key in ["idx", "type", "level", "dataset"]: |
| | if key in sample: |
| | |
| | print("{}: {}".format(key[0].upper() + key[1:], sample[key])) |
| | print("Question:", repr(sample["question"])) |
| | if "code" in sample: |
| | if print_all_preds: |
| | for code in sample["code"]: |
| | print("-" * 20) |
| | print("code:", code) |
| | print("Execution:", sample["report"]) |
| | else: |
| | print("Solution:\n", sample["code"][0]) |
| | print("Execution:", sample["report"][0]) |
| | if "pred" in sample: |
| | print("Prediction:", repr(sample["pred"][0])) |
| | for key in ["gt", "score", "unit", "gt_cot"]: |
| | if key in sample: |
| | _key = key_map.get(key, key) |
| | print("{}: {}".format(_key, repr(sample[key]))) |
| | print() |
| |
|