mirror of
https://github.com/turboderp-org/exllamav2.git
synced 2026-04-29 10:41:28 +00:00
Fail if tokenizer.json not found
This commit is contained in:
@@ -135,15 +135,17 @@ class ExLlamaV2Tokenizer:
|
|||||||
self.unspecial_piece_to_id = {}
|
self.unspecial_piece_to_id = {}
|
||||||
|
|
||||||
tokenizer_json_path = os.path.join(self.config.model_dir, "tokenizer.json")
|
tokenizer_json_path = os.path.join(self.config.model_dir, "tokenizer.json")
|
||||||
if os.path.exists(tokenizer_json_path):
|
if not os.path.exists(tokenizer_json_path):
|
||||||
with open(tokenizer_json_path, encoding = "utf8") as f:
|
raise ValueError(" ## Model does not include a tokenizer.json file. SentencePiece-only tokenizers are no longer supported")
|
||||||
tokenizer_json = json.load(f)
|
|
||||||
if "added_tokens" in tokenizer_json:
|
with open(tokenizer_json_path, encoding = "utf8") as f:
|
||||||
for v in tokenizer_json["added_tokens"]:
|
tokenizer_json = json.load(f)
|
||||||
if v["special"]:
|
if "added_tokens" in tokenizer_json:
|
||||||
self.extended_piece_to_id[v["content"]] = v["id"]
|
for v in tokenizer_json["added_tokens"]:
|
||||||
else:
|
if v["special"]:
|
||||||
self.unspecial_piece_to_id[v["content"]] = v["id"]
|
self.extended_piece_to_id[v["content"]] = v["id"]
|
||||||
|
else:
|
||||||
|
self.unspecial_piece_to_id[v["content"]] = v["id"]
|
||||||
|
|
||||||
# Attempt to load tokenizer_config.json
|
# Attempt to load tokenizer_config.json
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user