mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-20 14:29:22 +00:00
[feat](kt-kernel): Add resume arg to CPU weight conversion (#1630)
* [feat]: kt-kernel: Add resume arg to CPU weight conversion * [docs]: kt-kernel: Document resume arg for CPU weight conversion * [fix]: kt-kernel: Only print resume layer if in use * [fix]: kt-kernel: Don't log skipped layers when using resume_layer
This commit is contained in:
@@ -107,6 +107,20 @@ output_dir/
|
||||
- Need to process very large models on memory-constrained systems
|
||||
- Want to preserve intermediate layer-wise quantized weights
|
||||
|
||||
### Resume Layer
|
||||
|
||||
For memory-constrained systems that are unable to complete quantization despite enabling low memory mode with `--no-merge-safetensor`, restart the script with the `--resume-layer` arg to specify the layer from which to continue the conversion process. In the example below, we skip layers 0-11 and resume conversion starting with layer 12.
|
||||
|
||||
```bash
|
||||
python scripts/convert_cpu_weights.py \
|
||||
--input-path /path/to/model \
|
||||
--input-type bf16 \
|
||||
--output /path/to/output \
|
||||
--quant-method int4 \
|
||||
--no-merge-safetensor
|
||||
--resume-layer 12
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
### Example 1: Quantize DeepSeek-V3.1 (FP8 → INT4)
|
||||
|
||||
@@ -330,11 +330,18 @@ class ConverterBase:
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement _convert_layer_experts")
|
||||
|
||||
def convert(self):
|
||||
"""Convert all expert layers using subclass-specific logic."""
|
||||
def convert(self, resume_layer: int = 0):
|
||||
"""Convert all expert layers using subclass-specific logic.
|
||||
|
||||
Args:
|
||||
resume_layer (int, optional): The layer index to resume conversion from.
|
||||
Layers with an index lower than this will be skipped. Defaults to 0.
|
||||
"""
|
||||
print("Starting conversion...")
|
||||
print(f"Input: {self.input_path}")
|
||||
print(f"Output: {self.output_path}")
|
||||
if resume_layer > 0:
|
||||
print(f"Resuming from layer: {resume_layer}")
|
||||
|
||||
# Create output directory
|
||||
os.makedirs(self.output_path, exist_ok=True)
|
||||
@@ -355,6 +362,8 @@ class ConverterBase:
|
||||
|
||||
# Process layers with memory cleanup
|
||||
for i, (layer_idx, expert_ids) in enumerate(sorted(expert_layers.items())):
|
||||
if layer_idx < resume_layer:
|
||||
continue
|
||||
print(f"Processing layer {layer_idx} ({i+1}/{len(expert_layers)})...")
|
||||
|
||||
layer_tensors = self._convert_layer_experts(layer_idx, expert_ids)
|
||||
@@ -840,6 +849,12 @@ def main():
|
||||
default=False,
|
||||
help="Keep layer folders without merging to safetensor files (default: False)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume-layer",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Resume conversion starting at this layer index (default: 0)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -893,7 +908,7 @@ def main():
|
||||
)
|
||||
|
||||
# Run conversion
|
||||
converter.convert()
|
||||
converter.convert(resume_layer=args.resume_layer)
|
||||
|
||||
# Cleanup
|
||||
converter.close()
|
||||
|
||||
Reference in New Issue
Block a user