Fail if tokenizer.json not found

This commit is contained in:
turboderp
2025-05-27 17:10:35 +02:00
parent a811641c3b
commit 97e4fd90f1

View File

@@ -135,15 +135,17 @@ class ExLlamaV2Tokenizer:
self.unspecial_piece_to_id = {}
tokenizer_json_path = os.path.join(self.config.model_dir, "tokenizer.json")
if os.path.exists(tokenizer_json_path):
with open(tokenizer_json_path, encoding = "utf8") as f:
tokenizer_json = json.load(f)
if "added_tokens" in tokenizer_json:
for v in tokenizer_json["added_tokens"]:
if v["special"]:
self.extended_piece_to_id[v["content"]] = v["id"]
else:
self.unspecial_piece_to_id[v["content"]] = v["id"]
if not os.path.exists(tokenizer_json_path):
raise ValueError(" ## Model does not include a tokenizer.json file. SentencePiece-only tokenizers are no longer supported")
with open(tokenizer_json_path, encoding = "utf8") as f:
tokenizer_json = json.load(f)
if "added_tokens" in tokenizer_json:
for v in tokenizer_json["added_tokens"]:
if v["special"]:
self.extended_piece_to_id[v["content"]] = v["id"]
else:
self.unspecial_piece_to_id[v["content"]] = v["id"]
# Attempt to load tokenizer_config.json