AMXMoEWrapper -> KTMoEWrapper (#1604)

fix import KTMoEWrapper
2026-03-15 02:47:22 +00:00 · 2025-11-12 16:34:54 +08:00
parent 02801c3c4e
commit 13b8ddecd9
1 changed files with 4 additions and 4 deletions
--- a/kt-kernel/scripts/convert_cpu_weights.py
+++ b/kt-kernel/scripts/convert_cpu_weights.py
@@ -16,7 +16,7 @@ import numpy as np

 # Add parent directory to path to import kt_kernel
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-from kt_kernel import AMXMoEWrapper
+from kt_kernel import KTMoEWrapper

 import triton
 import triton.language as tl
@@ -759,7 +759,7 @@ class OnlineQuantConverter(ConverterBase):

        # Create AMXMoEWrapper instance for this layer
        # num_gpu_experts=0 since we're converting all experts to CPU format
-        wrapper = AMXMoEWrapper(
+        wrapper = KTMoEWrapper(
            layer_idx=layer_idx,
            num_experts=self.num_experts,
            num_experts_per_tok=self.num_experts_per_tok,
@@ -768,10 +768,10 @@ class OnlineQuantConverter(ConverterBase):
            num_gpu_experts=0,  # All experts on CPU for conversion
            cpuinfer_threads=self.cpuinfer_threads,
            threadpool_count=self.threadpool_count,
-            amx_weight_path=self.output_path,  # Output path for quantized weights
+            weight_path=self.output_path,  # Output path for quantized weights
            chunked_prefill_size=512,  # Arbitrary value, not critical for conversion
            cpu_save=True,  # Enable saving quantized weights to output
-            amx_method=amx_method,  # Specify quantization method (AMXINT4 or AMXINT8)
+            method=amx_method,  # Specify quantization method (AMXINT4 or AMXINT8)
        )

        # Load and quantize weights from tensors