Fail if tokenizer.json not found

This commit is contained in:
turboderp
2025-05-27 17:10:35 +02:00
parent a811641c3b
commit 97e4fd90f1

View File

@@ -135,15 +135,17 @@ class ExLlamaV2Tokenizer:
self.unspecial_piece_to_id = {} self.unspecial_piece_to_id = {}
tokenizer_json_path = os.path.join(self.config.model_dir, "tokenizer.json") tokenizer_json_path = os.path.join(self.config.model_dir, "tokenizer.json")
if os.path.exists(tokenizer_json_path): if not os.path.exists(tokenizer_json_path):
with open(tokenizer_json_path, encoding = "utf8") as f: raise ValueError(" ## Model does not include a tokenizer.json file. SentencePiece-only tokenizers are no longer supported")
tokenizer_json = json.load(f)
if "added_tokens" in tokenizer_json: with open(tokenizer_json_path, encoding = "utf8") as f:
for v in tokenizer_json["added_tokens"]: tokenizer_json = json.load(f)
if v["special"]: if "added_tokens" in tokenizer_json:
self.extended_piece_to_id[v["content"]] = v["id"] for v in tokenizer_json["added_tokens"]:
else: if v["special"]:
self.unspecial_piece_to_id[v["content"]] = v["id"] self.extended_piece_to_id[v["content"]] = v["id"]
else:
self.unspecial_piece_to_id[v["content"]] = v["id"]
# Attempt to load tokenizer_config.json # Attempt to load tokenizer_config.json