Tree: Force utf8 when opening files

The default encoding on linux is utf8, but Windows uses cp1252 which
isn't compatible with some unicode characters.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri
2023-11-29 19:21:29 -05:00
parent 395427ea30
commit 6bfcefe940
7 changed files with 11 additions and 11 deletions

View File

@@ -417,7 +417,7 @@ def measure_quant(job, save_fn, model):
print(f" -- Writing {job['output_measurement']}")
for filename in measurement_files:
with open(filename, "w") as f:
with open(filename, "w", encoding = "utf8") as f:
f.write(json.dumps(exp_measurement, indent = 4))
@@ -798,6 +798,6 @@ def quant(job, save_fn, model):
"last_module_idx": job["last_module_idx"],
"base_perplexity": job["base_perplexity"] }
with open(os.path.join(job["out_dir"], "measurement.json"), "w") as f:
with open(os.path.join(job["out_dir"], "measurement.json"), "w", encoding = "utf8") as f:
f.write(json.dumps(exp_measurement, indent = 4))

View File

@@ -107,7 +107,7 @@ job_file = os.path.join(out_dir, "job.json")
def save_job():
global job_file, job
with open(job_file, "w") as f:
with open(job_file, "w", encoding = "utf8") as f:
f.write(json.dumps(job, indent = 4))
if no_resume or not os.path.exists(job_file):
@@ -153,7 +153,7 @@ if no_resume or not os.path.exists(job_file):
if reuse_measurement is not None:
with open(reuse_measurement, "r") as f:
with open(reuse_measurement, "r", encoding = "utf8") as f:
imp_measurement = json.load(f)
job["measurement"] = imp_measurement["measurement"]
@@ -170,7 +170,7 @@ else:
print(f" -- Resuming job")
print(f" !! Note: Overriding options with settings from existing job")
with open(job_file, "r") as f:
with open(job_file, "r", encoding = "utf8") as f:
job = json.load(f)
if "invalid" in job:

View File

@@ -71,7 +71,7 @@ class ExLlamaV2Config:
self.model_config = os.path.join(self.model_dir, "config.json")
assert os.path.exists(self.model_config), "Can't find " + self.model_config
with open(self.model_config) as f:
with open(self.model_config, encoding = "utf8") as f:
read_config = json.load(f)
if "LlamaForCausalLM" in read_config["architectures"]: self.architecture = "Llama"

View File

@@ -50,7 +50,7 @@ class ExLlamaV2Lora:
# Grab relevant items from LoRA config
with open(lora_config_path) as f:
with open(lora_config_path, encoding = "utf8") as f:
read_config = json.load(f)
self.lora_r = read_config["r"]

View File

@@ -73,7 +73,7 @@ class ExLlamaV2Tokenizer:
tokenizer_json_path = os.path.join(self.config.model_dir, "tokenizer.json")
if os.path.exists(tokenizer_json_path):
with open(tokenizer_json_path) as f:
with open(tokenizer_json_path, encoding = "utf8") as f:
tokenizer_json = json.load(f)
if "added_tokens" in tokenizer_json:
for v in tokenizer_json["added_tokens"]:
@@ -86,7 +86,7 @@ class ExLlamaV2Tokenizer:
added_tokens_path = os.path.join(self.config.model_dir, "added_tokens.json")
if os.path.exists(added_tokens_path):
with open(added_tokens_path) as f:
with open(added_tokens_path, encoding = "utf8") as f:
self.extended_piece_to_id = json.load(f)
# Remove unspecial added tokens that exist in the base tokenizer already, but only if they decode correctly

View File

@@ -53,7 +53,7 @@ setup_kwargs = {
} if precompile else {}
version_py = {}
with open("exllamav2/version.py") as fp:
with open("exllamav2/version.py", encoding = "utf8") as fp:
exec(fp.read(), version_py)
version = version_py["__version__"]
print("Version:", version)

View File

@@ -76,7 +76,7 @@ index_filename = f"{input_file}.index.json"
print(f" -- Writing: {index_filename}")
with open(index_filename, 'w') as f:
with open(index_filename, 'w', encoding = "utf8") as f:
json.dump(index, f, indent = 2)
# Done