mirror of
https://github.com/turboderp-org/exllamav3.git
synced 2026-04-29 18:51:34 +00:00
Add simple long-context evaluation script
This commit is contained in:
1111
eval/eval_texts/illustrious_client.txt
Normal file
1111
eval/eval_texts/illustrious_client.txt
Normal file
File diff suppressed because it is too large
Load Diff
1111
eval/eval_texts/illustrious_client_c1.txt
Normal file
1111
eval/eval_texts/illustrious_client_c1.txt
Normal file
File diff suppressed because it is too large
Load Diff
1110
eval/eval_texts/illustrious_client_c2.txt
Normal file
1110
eval/eval_texts/illustrious_client_c2.txt
Normal file
File diff suppressed because it is too large
Load Diff
1
eval/eval_texts/illustrious_client_sum.txt
Normal file
1
eval/eval_texts/illustrious_client_sum.txt
Normal file
@@ -0,0 +1 @@
|
||||
Sherlock Holmes is hired to stop the dangerous Baron Gruner from marrying an innocent woman. With help from an ex-lover of the Baron and a forged identity, Holmes exposes the Baron’s vile past and secures proof of his crimes. The wedding is called off, and Gruner is horribly disfigured in revenge.
|
||||
16
eval/eval_texts/variable_man_char.txt
Normal file
16
eval/eval_texts/variable_man_char.txt
Normal file
@@ -0,0 +1,16 @@
|
||||
- Security Commissioner Eric Paddington
|
||||
- Jeremy – the Council SRB lab organizer
|
||||
- Peter Gibson – head of Military Designs
|
||||
- Margaret Wheeler – President of the Council
|
||||
- E. Fredman – senior official at Histo-Research
|
||||
- Goodwin – Fredman’s superior at Histo-Research
|
||||
- Director Blackwell – of Federal Stockpile Conservation
|
||||
- Steven Hurst – Richard Hurst’s elder son
|
||||
- Earl (Hurst) – Steven’s younger brother
|
||||
- Jennifer Preston – the children’s friend / neighbor
|
||||
- Edward Milsom – “the Variable Man”
|
||||
- Richard Hurst – electrical-engineer father of Steven and Earl
|
||||
- Fleet Commander Tucker – Terran war-fleet commander
|
||||
- Vice-Commander Dickerson – second-in-command of the fleet
|
||||
- Joseph Dixon – senior Security officer
|
||||
- Douglas West – late physicist who discovered the f-t-l principle
|
||||
3485
eval/eval_texts/variable_man_mod.txt
Normal file
3485
eval/eval_texts/variable_man_mod.txt
Normal file
File diff suppressed because it is too large
Load Diff
3500
eval/eval_texts/variable_man_mod_c1.txt
Normal file
3500
eval/eval_texts/variable_man_mod_c1.txt
Normal file
File diff suppressed because it is too large
Load Diff
1
eval/eval_texts/variable_man_sum.txt
Normal file
1
eval/eval_texts/variable_man_sum.txt
Normal file
@@ -0,0 +1 @@
|
||||
War‑bent 22nd‑century Earth relies on giant SRB computers and the unfinished faster‑than‑light “Icarus” bomb to break the Proxima Jorblax Empire’s grip, but when a time‑research accident pulls 1913 handyman Edward Milsom into the future his intuitive, boundary‑less repair genius baffles the computers, panics Security Commissioner Paddington, and ultimately rewires Icarus not to explode but to decelerate safely from FTL speed; Paddington’s attempt to kill Milsom and launch the war backfires, the Terran fleet is defeated, and only after the debacle does physicist Sherikov realize Milsom has solved the fundamental FTL‑re‑entry problem, giving humanity true interstellar travel and rendering the lost war irrelevant—while the burned but alive “variable man,” awaiting return to his own time, is already studying a new device to fix.
|
||||
189
eval/longctx.py
Normal file
189
eval/longctx.py
Normal file
@@ -0,0 +1,189 @@
|
||||
import sys, os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import argparse
|
||||
from transformers import AutoTokenizer
|
||||
from exllamav3.util.progress import ProgressBar
|
||||
from exllamav3 import Config, Model, Cache, Tokenizer, model_init, Generator, Job, GreedySampler
|
||||
import torch
|
||||
|
||||
# ANSI codes
|
||||
ESC = "\u001b"
|
||||
col_default = "\u001b[0m"
|
||||
col_yellow = "\u001b[33;1m"
|
||||
col_blue = "\u001b[34;1m"
|
||||
col_green = "\u001b[32;1m"
|
||||
col_red = "\u001b[31;1m"
|
||||
col_gray = "\u001b[37;1m"
|
||||
|
||||
@torch.inference_mode()
|
||||
def main(args):
|
||||
|
||||
# Load model
|
||||
model, config, cache, tokenizer = model_init.init(args)
|
||||
generator = Generator(model, cache, tokenizer, show_visualizer = args.visualize_cache)
|
||||
bpw_layer, bpw_head, vram_bits = model.get_storage_info()
|
||||
|
||||
print(f" -- Model: {args.model_dir}")
|
||||
print(f" -- Bitrate: {bpw_layer:.2f} bpw / {bpw_head:.2f} bpw (head)")
|
||||
|
||||
# Load Transformers tokenizers
|
||||
t_tokenizer = AutoTokenizer.from_pretrained(args.model_dir)
|
||||
|
||||
# Get
|
||||
texts_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "eval_texts")
|
||||
with open(os.path.join(texts_dir, "illustrious_client.txt"), "r") as file:
|
||||
text_ic_orig = file.read()
|
||||
with open(os.path.join(texts_dir, "illustrious_client_c1.txt"), "r") as file:
|
||||
text_ic_french = file.read()
|
||||
with open(os.path.join(texts_dir, "illustrious_client_c2.txt"), "r") as file:
|
||||
text_ic_zoomer = file.read()
|
||||
with open(os.path.join(texts_dir, "illustrious_client_sum.txt"), "r") as file:
|
||||
text_ic_sum = file.read()
|
||||
with open(os.path.join(texts_dir, "variable_man_mod.txt"), "r") as file:
|
||||
text_vm_mod = file.read()
|
||||
with open(os.path.join(texts_dir, "variable_man_mod_c1.txt"), "r") as file:
|
||||
text_vm_pony = file.read()
|
||||
with open(os.path.join(texts_dir, "variable_man_sum.txt"), "r") as file:
|
||||
text_vm_sum = file.read()
|
||||
with open(os.path.join(texts_dir, "variable_man_char.txt"), "r") as file:
|
||||
text_vm_char = file.read()
|
||||
|
||||
# Template
|
||||
def make_job(instruction):
|
||||
chat = [{
|
||||
"role": "user",
|
||||
"content": instruction
|
||||
}]
|
||||
input_ids = t_tokenizer.apply_chat_template(chat, add_generation_prompt = True)
|
||||
input_ids = torch.tensor(input_ids, dtype = torch.long).unsqueeze(0)
|
||||
job = Job(
|
||||
input_ids = input_ids,
|
||||
max_new_tokens = 768,
|
||||
stop_conditions = config.eos_token_id_list,
|
||||
sampler = GreedySampler()
|
||||
)
|
||||
return job, input_ids.shape[-1]
|
||||
|
||||
# Tests
|
||||
# TODO: Find some original source material that models are sure to be entirely unfamiliar with
|
||||
job_ic_sum, len_ic_sum = make_job(text_ic_orig + "\n\n---\n\nProvide an extremely short summary of the story.")
|
||||
job_ic_french, _ = make_job(text_ic_french + "\n\n---\n\nOne paragraph in this story has been translated to a different language. Translate it back.")
|
||||
job_ic_zoomer, _ = make_job(text_ic_zoomer + "\n\n---\n\nTwo paragraphs have been rewritten in a zoomer slang style. Identify them.")
|
||||
job_vm_sum, len_vm_sum = make_job(text_vm_mod + "\n\n---\n\nProvide an extremely short summary of the story.")
|
||||
vm_q1 = "Why do the SRB computers stop giving reliable war-odds after Edward Milsom arrives in the 22nd century?"
|
||||
vm_a1 = "Milsom’s behavior is unpredictable to the machines because he comes from a different era and doesn’t fit their statistical patterns, so his presence introduces a “variable” they cannot factor."
|
||||
vm_q2 = "What does Edward Milsom secretly do to the Icarus bomb’s control turret?"
|
||||
vm_a2 = "Instead of wiring it to trigger an explosion, he rewires it so the craft can decelerate safely from faster-than-light speed, turning it from a bomb into a workable FTL drive."
|
||||
vm_q3 = "How does humanity ultimately benefit from Milsom’s interference, even though Earth loses the war against Jorblax?"
|
||||
vm_a3 = "Milsom’s solution delivers a practical faster-than-light return method, giving Earth true interstellar travel and opening the entire universe for exploration and colonization, making the war’s outcome irrelevant."
|
||||
job_vm_q1, _ = make_job(text_vm_mod + f"\n\n---\n\nAnswer in one paragraph: {vm_q1}")
|
||||
job_vm_q2, _ = make_job(text_vm_mod + f"\n\n---\n\nAnswer in one paragraph: {vm_q2}")
|
||||
job_vm_q3, _ = make_job(text_vm_mod + f"\n\n---\n\nAnswer in one paragraph: {vm_q3}")
|
||||
job_vm_char, _ = make_job(text_vm_mod + "\n\n---\n\nList all the named characters in the story.")
|
||||
job_vm_pony, _ = make_job(text_vm_pony+ "\n\n---\n\nA passage from an unrelated story is inserted in the middle of the text. Can you find it?")
|
||||
|
||||
# Inference
|
||||
jobs = [
|
||||
job_ic_sum,
|
||||
job_ic_french,
|
||||
job_ic_zoomer,
|
||||
job_vm_sum,
|
||||
job_vm_q1,
|
||||
job_vm_q2,
|
||||
job_vm_q3,
|
||||
job_vm_pony,
|
||||
job_vm_char,
|
||||
]
|
||||
generator.enqueue(jobs)
|
||||
|
||||
with ProgressBar("Inference", len(jobs)) as pb:
|
||||
while j := generator.num_remaining_jobs():
|
||||
generator.iterate()
|
||||
pb.update(len(jobs) - j)
|
||||
|
||||
# Results
|
||||
print()
|
||||
print(f"{col_green}------------{col_default}")
|
||||
print(f"{col_green}SUMMARY TEST{col_default}")
|
||||
print(f"{col_green}------------{col_default}")
|
||||
print(f"{col_blue}Short summary of 'The Illustrious Client', {len_ic_sum} tokens.\nReference summary:{col_default}")
|
||||
print(f"{col_gray}{text_ic_sum.strip()}{col_default}")
|
||||
print()
|
||||
print(job_ic_sum.full_completion.strip())
|
||||
print()
|
||||
|
||||
print()
|
||||
print(f"{col_green}-----------{col_default}")
|
||||
print(f"{col_green}FRENCH TEST{col_default}")
|
||||
print(f"{col_green}-----------{col_default}")
|
||||
print(f"{col_blue}One paragraph in this story has been translated to a different language. Translate it back.{col_default}")
|
||||
print()
|
||||
print(job_ic_french.full_completion.strip())
|
||||
print()
|
||||
|
||||
print()
|
||||
print(f"{col_green}-----------{col_default}")
|
||||
print(f"{col_green}ZOOMER TEST{col_default}")
|
||||
print(f"{col_green}-----------{col_default}")
|
||||
print(f"{col_blue}A zoomer has edited the text. Identify the edited passages.{col_default}")
|
||||
print()
|
||||
print(job_ic_zoomer.full_completion.strip())
|
||||
print()
|
||||
|
||||
print()
|
||||
print(f"{col_green}------------{col_default}")
|
||||
print(f"{col_green}SUMMARY TEST{col_default}")
|
||||
print(f"{col_green}------------{col_default}")
|
||||
print(f"{col_blue}Short summary of a version of 'The Variable Man' with some names replaced, {len_vm_sum} tokens.\nReference summary:{col_default}")
|
||||
print(f"{col_gray}{text_vm_sum.strip()}{col_default}")
|
||||
print()
|
||||
print(job_vm_sum.full_completion.strip())
|
||||
print()
|
||||
|
||||
print()
|
||||
print(f"{col_green}--------{col_default}")
|
||||
print(f"{col_green}Q&A TEST{col_default}")
|
||||
print(f"{col_green}--------{col_default}")
|
||||
print(f"{col_blue}{vm_q1} Reference answer:{col_default}")
|
||||
print(f"{col_gray}{vm_a1}{col_default}")
|
||||
print()
|
||||
print(job_vm_q1.full_completion.strip())
|
||||
print()
|
||||
print(f"{col_blue}{vm_q2} Reference answer:{col_default}")
|
||||
print(f"{col_gray}{vm_a2}{col_default}")
|
||||
print()
|
||||
print(job_vm_q2.full_completion.strip())
|
||||
print()
|
||||
print(f"{col_blue}{vm_q3} Reference answer:{col_default}")
|
||||
print(f"{col_gray}{vm_a3}{col_default}")
|
||||
print()
|
||||
print(job_vm_q3.full_completion.strip())
|
||||
print()
|
||||
|
||||
print()
|
||||
print(f"{col_green}---------------{col_default}")
|
||||
print(f"{col_green}CORRUPTION TEST{col_default}")
|
||||
print(f"{col_green}---------------{col_default}")
|
||||
print(f"{col_blue}Some MLP fan fiction has made it into the story. Can we detect it?{col_default}")
|
||||
print()
|
||||
print(job_vm_pony.full_completion.strip())
|
||||
print()
|
||||
|
||||
print()
|
||||
print(f"{col_green}--------------------{col_default}")
|
||||
print(f"{col_green}NAME EXTRACTION TEST{col_default}")
|
||||
print(f"{col_green}--------------------{col_default}")
|
||||
print(f"{col_blue}List all the named characters in the story.\nReference:{col_default}")
|
||||
print(f"{col_gray}{text_vm_char}{col_default}")
|
||||
print()
|
||||
print(job_vm_char.full_completion.strip())
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
model_init.add_args(parser, default_cache_size = 65536)
|
||||
parser.add_argument("-vis", "--visualize_cache", action = "store_true", help = "Show cache visualizer (slow)")
|
||||
_args = parser.parse_args()
|
||||
main(_args)
|
||||
Reference in New Issue
Block a user