HumanEval: Move BOS token to individual prompt template, don't prepend by default when tokenizing

2026-04-20 14:29:51 +00:00 · 2025-05-11 23:02:07 +02:00
parent f5127e87f8
commit 1e1754787e
1 changed files with 11 additions and 7 deletions
--- a/eval/humaneval.py
+++ b/eval/humaneval.py
@@ -14,12 +14,12 @@ prompt_formats = {
        "    "
    ),
    "granite": (
-        "Question:\nComplete the following Python function:\n\n{{problem}}\n\nAnswer:\n"
+        "<|endoftext|>Question:\nComplete the following Python function:\n\n{{problem}}\n\nAnswer:\n"
        "Sure! Here is how you might implement the function:\n\n```python\n{{problem}}",
        "    "
    ),
    "llama": (
-        "[INST] <<SYS>>\n"
+        "<s>[INST] <<SYS>>\n"
        "You are a helpful AI coding assistant.\n"
        "<</SYS>>\n\n"
        "Complete the following Python function:\n\n"
@@ -28,7 +28,7 @@ prompt_formats = {
        "    "
    ),
    "llama3": (
-        "<|start_header_id|>system<|end_header_id|>\n\n"
+        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
        "You are a helpful AI coding assistant.<|eot_id|>"
        "<|start_header_id|>user<|end_header_id|>\n\n"
        "Complete the following Python function:\n\n{{problem}}<|eot_id|>"
@@ -37,7 +37,7 @@ prompt_formats = {
        "    "
    ),
    "mistral": (
-        "[INST] You are a helpful AI coding assistant.\n\n"
+        "<s>[INST] You are a helpful AI coding assistant.\n\n"
        "Complete the following Python function:\n\n"
        "{{problem}}[/INST]"
        " Sure! Here is how you might implement the function:\n\n```python\n{{problem}}",
@@ -51,7 +51,7 @@ prompt_formats = {
        "    "
    ),
    "reka": (
-        "human: Complete the following Python function."
+        "<|endoftext|>human: Complete the following Python function."
        " Provide your reasoning in comments, but be concise and don't second-guess."
        "\n\n{{problem}}"
        " <sep> assistant: ```python\n{{problem}}",
@@ -76,7 +76,7 @@ prompt_formats = {
        "    "
    ),
    "deepseek": (
-        "You are a helpful AI coding assistant.\n"
+        "<｜begin▁of▁sentence｜>You are a helpful AI coding assistant.\n"
        "<｜User｜>Complete the following Python function:\n\n{{problem}}"
        "<｜Assistant｜>Sure! Here is how you might implement the function:\n\n```python\n{{problem}}",
        "    "
@@ -124,7 +124,11 @@ def main(args):
        for idx, (problem_id, problem) in enumerate(problems.items()):
            b_problem = problem["prompt"]
            f_problem = prompt_format.replace("{{problem}}", b_problem)
-            input_ids = tokenizer.encode(f_problem, encode_special_tokens = True, add_bos = True)
+            input_ids = tokenizer.encode(
+                f_problem,
+                encode_special_tokens = True,
+                add_bos = (args.prompt_format == "raw")
+            )
            for s in range(num_samples_per_task):
                job = Job(
                    input_ids = input_ids,