Option to resume conversion job with no other args

2026-04-19 22:08:55 +00:00 · 2024-06-08 22:15:41 +02:00
parent de05ac696b
commit 6030517a6f
2 changed files with 35 additions and 18 deletions
--- a/convert.py
+++ b/convert.py
@@ -14,6 +14,7 @@ import torch
 parser = argparse.ArgumentParser(description = "Convert model to ExLlamaV2")
 parser.add_argument("-i", "--in_dir", type = str, help = "Input directory", default = "")
 parser.add_argument("-o", "--out_dir", type = str, help = "Output (working) directory")
+parser.add_argument("-res", "--resume", action = "store_true", help = "Resume job from specified output directory (without specifying other options)")
 parser.add_argument("-nr", "--no_resume", action = "store_true", help = "Do not resume an interrupted job (deletes all files in the output directory)")
 parser.add_argument("-cf", "--compile_full", type = str, help = "Output folder for compiled model with all config/tokenizer files")
 parser.add_argument("-c", "--cal_dataset", type = str, help = "Calibration dataset (.parquet file)")
@@ -37,12 +38,17 @@ torch.set_printoptions(precision = 7, sci_mode = False, linewidth = 200)

 # Check some args

-if not args.in_dir:
-    print(" ## Please specify input model directory (-i, --in_dir)")
+resuming = False
+if args.out_dir:
+    if not args.no_resume:
+        if os.path.exists(os.path.join(args.out_dir, "job_new.json")):
+            resuming = True
+else:
+    print(" ## Please specify output/working directory (-o, --out_dir)")
    sys.exit()

-if not args.out_dir:
-    print(" ## Please specify output/working directory (-o, --out_dir)")
+if not args.in_dir and not resuming:
+    print(" ## Please specify input model directory (-i, --in_dir)")
    sys.exit()

 if args.length > 2048 or args.measurement_length > 2048:
@@ -63,17 +69,6 @@ if not os.path.exists(args.out_dir):
    print(f" ## Error: Directory not found: {args.out_dir}")
    sys.exit()

-# Create config
-
-config = ExLlamaV2Config()
-config.model_dir = args.in_dir
-config.qkv_embed = False
-config.prepare()
-
-# Tokenizer
-
-tokenizer = ExLlamaV2Tokenizer(config)
-
 # Create job

 def save_job():
@@ -133,7 +128,8 @@ if args.no_resume or not os.path.exists(job_file):

 else:
    print(f" -- Resuming job")
-    print(f" !! Note: Overriding options with settings from existing job")
+    if args.in_dir:
+        print(f" !! Note: Overriding options with settings from existing job")

    with open(job_file, "r", encoding = "utf8") as f:
        resume_job = json.load(f)
@@ -146,6 +142,10 @@ else:
        print(" ** Error: Corrupted job")
        sys.exit()

+    if job["progress"] == "finished":
+        print(" !! Job is already finished")
+        sys.exit()
+
 # Feedback

 print(f" -- Input: {job['in_dir']}")
@@ -161,7 +161,6 @@ else:
    print(f" -- Measurement will be saved to {job['output_measurement']}")
    print(f" !! Conversion script will end after measurement pass")

-
 if job['rope_scale']: print(f" -- RoPE scale: {job['rope_scale']:.2f}")
 if job['rope_alpha']: print(f" -- RoPE alpha: {job['rope_alpha']:.2f}")

@@ -180,6 +179,17 @@ out_tensor_dir = os.path.join(job["out_dir"], "out_tensor")
 if not os.path.exists(out_tensor_dir):
    os.makedirs(out_tensor_dir)

+# Create config
+
+config = ExLlamaV2Config()
+config.model_dir = job['in_dir']
+config.qkv_embed = False
+config.prepare()
+
+# Tokenizer
+
+tokenizer = ExLlamaV2Tokenizer(config)
+
 # Set scaling for input model

 if job["rope_scale"] is not None: config.scale_pos_emb = job["rope_scale"]
--- a/doc/convert.md
+++ b/doc/convert.md
@@ -4,7 +4,7 @@

 Here are the arguments to `convert.py`:

- **-i / --in_dir *directory***: _(required)_ The source model to convert, in HF format (FP16). The directory should 
+- **-i / --in_dir *directory***: _(required if not resuming)_ The source model to convert, in HF format (FP16). The directory should 
 contain at least a `config.json` file, a `tokenizer.model` file and one or more `.safetensors` files containing weights.
 If there are multiple weights files, they will all be indexed and searched for the neccessary tensors, so sharded models are 
 supported.
@@ -132,6 +132,13 @@ python convert.py \
    -b 4.5
 ```

+If the working `-o` directory is not empty and you do not specify `-nr`, any existing job in that directory
+will be resumed. You can resume a job with no other arguments:
+
+```sh
+python convert.py -o /mnt/temp/exl2/
+```
+
 ### Notes

 - If the conversion script seems to stop on the "Solving..." step, give it a moment. It's attempting to find the