mirror of
https://github.com/turboderp-org/exllamav2.git
synced 2026-03-15 00:07:26 +00:00
Tree: Force utf8 when opening files
The default encoding on linux is utf8, but Windows uses cp1252 which isn't compatible with some unicode characters. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -417,7 +417,7 @@ def measure_quant(job, save_fn, model):
|
||||
print(f" -- Writing {job['output_measurement']}")
|
||||
|
||||
for filename in measurement_files:
|
||||
with open(filename, "w") as f:
|
||||
with open(filename, "w", encoding = "utf8") as f:
|
||||
f.write(json.dumps(exp_measurement, indent = 4))
|
||||
|
||||
|
||||
@@ -798,6 +798,6 @@ def quant(job, save_fn, model):
|
||||
"last_module_idx": job["last_module_idx"],
|
||||
"base_perplexity": job["base_perplexity"] }
|
||||
|
||||
with open(os.path.join(job["out_dir"], "measurement.json"), "w") as f:
|
||||
with open(os.path.join(job["out_dir"], "measurement.json"), "w", encoding = "utf8") as f:
|
||||
f.write(json.dumps(exp_measurement, indent = 4))
|
||||
|
||||
|
||||
@@ -107,7 +107,7 @@ job_file = os.path.join(out_dir, "job.json")
|
||||
|
||||
def save_job():
|
||||
global job_file, job
|
||||
with open(job_file, "w") as f:
|
||||
with open(job_file, "w", encoding = "utf8") as f:
|
||||
f.write(json.dumps(job, indent = 4))
|
||||
|
||||
if no_resume or not os.path.exists(job_file):
|
||||
@@ -153,7 +153,7 @@ if no_resume or not os.path.exists(job_file):
|
||||
|
||||
if reuse_measurement is not None:
|
||||
|
||||
with open(reuse_measurement, "r") as f:
|
||||
with open(reuse_measurement, "r", encoding = "utf8") as f:
|
||||
|
||||
imp_measurement = json.load(f)
|
||||
job["measurement"] = imp_measurement["measurement"]
|
||||
@@ -170,7 +170,7 @@ else:
|
||||
print(f" -- Resuming job")
|
||||
print(f" !! Note: Overriding options with settings from existing job")
|
||||
|
||||
with open(job_file, "r") as f:
|
||||
with open(job_file, "r", encoding = "utf8") as f:
|
||||
job = json.load(f)
|
||||
|
||||
if "invalid" in job:
|
||||
|
||||
@@ -71,7 +71,7 @@ class ExLlamaV2Config:
|
||||
self.model_config = os.path.join(self.model_dir, "config.json")
|
||||
assert os.path.exists(self.model_config), "Can't find " + self.model_config
|
||||
|
||||
with open(self.model_config) as f:
|
||||
with open(self.model_config, encoding = "utf8") as f:
|
||||
read_config = json.load(f)
|
||||
|
||||
if "LlamaForCausalLM" in read_config["architectures"]: self.architecture = "Llama"
|
||||
|
||||
@@ -50,7 +50,7 @@ class ExLlamaV2Lora:
|
||||
|
||||
# Grab relevant items from LoRA config
|
||||
|
||||
with open(lora_config_path) as f:
|
||||
with open(lora_config_path, encoding = "utf8") as f:
|
||||
read_config = json.load(f)
|
||||
|
||||
self.lora_r = read_config["r"]
|
||||
|
||||
@@ -73,7 +73,7 @@ class ExLlamaV2Tokenizer:
|
||||
|
||||
tokenizer_json_path = os.path.join(self.config.model_dir, "tokenizer.json")
|
||||
if os.path.exists(tokenizer_json_path):
|
||||
with open(tokenizer_json_path) as f:
|
||||
with open(tokenizer_json_path, encoding = "utf8") as f:
|
||||
tokenizer_json = json.load(f)
|
||||
if "added_tokens" in tokenizer_json:
|
||||
for v in tokenizer_json["added_tokens"]:
|
||||
@@ -86,7 +86,7 @@ class ExLlamaV2Tokenizer:
|
||||
|
||||
added_tokens_path = os.path.join(self.config.model_dir, "added_tokens.json")
|
||||
if os.path.exists(added_tokens_path):
|
||||
with open(added_tokens_path) as f:
|
||||
with open(added_tokens_path, encoding = "utf8") as f:
|
||||
self.extended_piece_to_id = json.load(f)
|
||||
|
||||
# Remove unspecial added tokens that exist in the base tokenizer already, but only if they decode correctly
|
||||
|
||||
2
setup.py
2
setup.py
@@ -53,7 +53,7 @@ setup_kwargs = {
|
||||
} if precompile else {}
|
||||
|
||||
version_py = {}
|
||||
with open("exllamav2/version.py") as fp:
|
||||
with open("exllamav2/version.py", encoding = "utf8") as fp:
|
||||
exec(fp.read(), version_py)
|
||||
version = version_py["__version__"]
|
||||
print("Version:", version)
|
||||
|
||||
@@ -76,7 +76,7 @@ index_filename = f"{input_file}.index.json"
|
||||
|
||||
print(f" -- Writing: {index_filename}")
|
||||
|
||||
with open(index_filename, 'w') as f:
|
||||
with open(index_filename, 'w', encoding = "utf8") as f:
|
||||
json.dump(index, f, indent = 2)
|
||||
|
||||
# Done
|
||||
|
||||
Reference in New Issue
Block a user