diff --git a/conversion/quantize.py b/conversion/quantize.py index 3fe50cb..e05de38 100644 --- a/conversion/quantize.py +++ b/conversion/quantize.py @@ -417,7 +417,7 @@ def measure_quant(job, save_fn, model): print(f" -- Writing {job['output_measurement']}") for filename in measurement_files: - with open(filename, "w") as f: + with open(filename, "w", encoding = "utf8") as f: f.write(json.dumps(exp_measurement, indent = 4)) @@ -798,6 +798,6 @@ def quant(job, save_fn, model): "last_module_idx": job["last_module_idx"], "base_perplexity": job["base_perplexity"] } - with open(os.path.join(job["out_dir"], "measurement.json"), "w") as f: + with open(os.path.join(job["out_dir"], "measurement.json"), "w", encoding = "utf8") as f: f.write(json.dumps(exp_measurement, indent = 4)) diff --git a/convert.py b/convert.py index b4361e9..21fdcea 100644 --- a/convert.py +++ b/convert.py @@ -107,7 +107,7 @@ job_file = os.path.join(out_dir, "job.json") def save_job(): global job_file, job - with open(job_file, "w") as f: + with open(job_file, "w", encoding = "utf8") as f: f.write(json.dumps(job, indent = 4)) if no_resume or not os.path.exists(job_file): @@ -153,7 +153,7 @@ if no_resume or not os.path.exists(job_file): if reuse_measurement is not None: - with open(reuse_measurement, "r") as f: + with open(reuse_measurement, "r", encoding = "utf8") as f: imp_measurement = json.load(f) job["measurement"] = imp_measurement["measurement"] @@ -170,7 +170,7 @@ else: print(f" -- Resuming job") print(f" !! Note: Overriding options with settings from existing job") - with open(job_file, "r") as f: + with open(job_file, "r", encoding = "utf8") as f: job = json.load(f) if "invalid" in job: diff --git a/exllamav2/config.py b/exllamav2/config.py index 45976bc..cc398aa 100644 --- a/exllamav2/config.py +++ b/exllamav2/config.py @@ -71,7 +71,7 @@ class ExLlamaV2Config: self.model_config = os.path.join(self.model_dir, "config.json") assert os.path.exists(self.model_config), "Can't find " + self.model_config - with open(self.model_config) as f: + with open(self.model_config, encoding = "utf8") as f: read_config = json.load(f) if "LlamaForCausalLM" in read_config["architectures"]: self.architecture = "Llama" diff --git a/exllamav2/lora.py b/exllamav2/lora.py index e559247..e80023f 100644 --- a/exllamav2/lora.py +++ b/exllamav2/lora.py @@ -50,7 +50,7 @@ class ExLlamaV2Lora: # Grab relevant items from LoRA config - with open(lora_config_path) as f: + with open(lora_config_path, encoding = "utf8") as f: read_config = json.load(f) self.lora_r = read_config["r"] diff --git a/exllamav2/tokenizer.py b/exllamav2/tokenizer.py index 67fc6d8..a3a1371 100644 --- a/exllamav2/tokenizer.py +++ b/exllamav2/tokenizer.py @@ -73,7 +73,7 @@ class ExLlamaV2Tokenizer: tokenizer_json_path = os.path.join(self.config.model_dir, "tokenizer.json") if os.path.exists(tokenizer_json_path): - with open(tokenizer_json_path) as f: + with open(tokenizer_json_path, encoding = "utf8") as f: tokenizer_json = json.load(f) if "added_tokens" in tokenizer_json: for v in tokenizer_json["added_tokens"]: @@ -86,7 +86,7 @@ class ExLlamaV2Tokenizer: added_tokens_path = os.path.join(self.config.model_dir, "added_tokens.json") if os.path.exists(added_tokens_path): - with open(added_tokens_path) as f: + with open(added_tokens_path, encoding = "utf8") as f: self.extended_piece_to_id = json.load(f) # Remove unspecial added tokens that exist in the base tokenizer already, but only if they decode correctly diff --git a/setup.py b/setup.py index 9ce4a2f..964bd65 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ setup_kwargs = { } if precompile else {} version_py = {} -with open("exllamav2/version.py") as fp: +with open("exllamav2/version.py", encoding = "utf8") as fp: exec(fp.read(), version_py) version = version_py["__version__"] print("Version:", version) diff --git a/util/shard.py b/util/shard.py index 9004b02..a4cc26e 100644 --- a/util/shard.py +++ b/util/shard.py @@ -76,7 +76,7 @@ index_filename = f"{input_file}.index.json" print(f" -- Writing: {index_filename}") -with open(index_filename, 'w') as f: +with open(index_filename, 'w', encoding = "utf8") as f: json.dump(index, f, indent = 2) # Done