Fail if tokenizer.json not found

2026-04-29 10:41:28 +00:00 · 2025-05-27 17:10:35 +02:00
parent a811641c3b
commit 97e4fd90f1
1 changed files with 11 additions and 9 deletions
--- a/exllamav2/tokenizer/tokenizer.py
+++ b/exllamav2/tokenizer/tokenizer.py
@@ -135,15 +135,17 @@ class ExLlamaV2Tokenizer:
        self.unspecial_piece_to_id = {}
        tokenizer_json_path = os.path.join(self.config.model_dir, "tokenizer.json")
-        if os.path.exists(tokenizer_json_path):
+        if not os.path.exists(tokenizer_json_path):
-            with open(tokenizer_json_path, encoding = "utf8") as f:
+            raise ValueError(" ## Model does not include a tokenizer.json file. SentencePiece-only tokenizers are no longer supported")
-                tokenizer_json = json.load(f)
+
-                if "added_tokens" in tokenizer_json:
+        with open(tokenizer_json_path, encoding = "utf8") as f:
-                    for v in tokenizer_json["added_tokens"]:
+            tokenizer_json = json.load(f)
-                        if v["special"]:
+            if "added_tokens" in tokenizer_json:
-                            self.extended_piece_to_id[v["content"]] = v["id"]
+                for v in tokenizer_json["added_tokens"]:
-                        else:
+                    if v["special"]:
-                            self.unspecial_piece_to_id[v["content"]] = v["id"]
+                        self.extended_piece_to_id[v["content"]] = v["id"]
                    else:
                        self.unspecial_piece_to_id[v["content"]] = v["id"]
        # Attempt to load tokenizer_config.json