From 13b8ddecd99790462d1a173c8fbc74661e74b569 Mon Sep 17 00:00:00 2001 From: Jiaqi Liao <30439460+SkqLiao@users.noreply.github.com> Date: Wed, 12 Nov 2025 16:34:54 +0800 Subject: [PATCH] AMXMoEWrapper -> KTMoEWrapper (#1604) fix import KTMoEWrapper --- kt-kernel/scripts/convert_cpu_weights.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kt-kernel/scripts/convert_cpu_weights.py b/kt-kernel/scripts/convert_cpu_weights.py index ddc2313..520e873 100644 --- a/kt-kernel/scripts/convert_cpu_weights.py +++ b/kt-kernel/scripts/convert_cpu_weights.py @@ -16,7 +16,7 @@ import numpy as np # Add parent directory to path to import kt_kernel sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) -from kt_kernel import AMXMoEWrapper +from kt_kernel import KTMoEWrapper import triton import triton.language as tl @@ -759,7 +759,7 @@ class OnlineQuantConverter(ConverterBase): # Create AMXMoEWrapper instance for this layer # num_gpu_experts=0 since we're converting all experts to CPU format - wrapper = AMXMoEWrapper( + wrapper = KTMoEWrapper( layer_idx=layer_idx, num_experts=self.num_experts, num_experts_per_tok=self.num_experts_per_tok, @@ -768,10 +768,10 @@ class OnlineQuantConverter(ConverterBase): num_gpu_experts=0, # All experts on CPU for conversion cpuinfer_threads=self.cpuinfer_threads, threadpool_count=self.threadpool_count, - amx_weight_path=self.output_path, # Output path for quantized weights + weight_path=self.output_path, # Output path for quantized weights chunked_prefill_size=512, # Arbitrary value, not critical for conversion cpu_save=True, # Enable saving quantized weights to output - amx_method=amx_method, # Specify quantization method (AMXINT4 or AMXINT8) + method=amx_method, # Specify quantization method (AMXINT4 or AMXINT8) ) # Load and quantize weights from tensors