AMXMoEWrapper -> KTMoEWrapper (#1604)

fix import KTMoEWrapper
This commit is contained in:
Jiaqi Liao
2025-11-12 16:34:54 +08:00
committed by GitHub
parent 02801c3c4e
commit 13b8ddecd9

View File

@@ -16,7 +16,7 @@ import numpy as np
# Add parent directory to path to import kt_kernel # Add parent directory to path to import kt_kernel
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from kt_kernel import AMXMoEWrapper from kt_kernel import KTMoEWrapper
import triton import triton
import triton.language as tl import triton.language as tl
@@ -759,7 +759,7 @@ class OnlineQuantConverter(ConverterBase):
# Create AMXMoEWrapper instance for this layer # Create AMXMoEWrapper instance for this layer
# num_gpu_experts=0 since we're converting all experts to CPU format # num_gpu_experts=0 since we're converting all experts to CPU format
wrapper = AMXMoEWrapper( wrapper = KTMoEWrapper(
layer_idx=layer_idx, layer_idx=layer_idx,
num_experts=self.num_experts, num_experts=self.num_experts,
num_experts_per_tok=self.num_experts_per_tok, num_experts_per_tok=self.num_experts_per_tok,
@@ -768,10 +768,10 @@ class OnlineQuantConverter(ConverterBase):
num_gpu_experts=0, # All experts on CPU for conversion num_gpu_experts=0, # All experts on CPU for conversion
cpuinfer_threads=self.cpuinfer_threads, cpuinfer_threads=self.cpuinfer_threads,
threadpool_count=self.threadpool_count, threadpool_count=self.threadpool_count,
amx_weight_path=self.output_path, # Output path for quantized weights weight_path=self.output_path, # Output path for quantized weights
chunked_prefill_size=512, # Arbitrary value, not critical for conversion chunked_prefill_size=512, # Arbitrary value, not critical for conversion
cpu_save=True, # Enable saving quantized weights to output cpu_save=True, # Enable saving quantized weights to output
amx_method=amx_method, # Specify quantization method (AMXINT4 or AMXINT8) method=amx_method, # Specify quantization method (AMXINT4 or AMXINT8)
) )
# Load and quantize weights from tensors # Load and quantize weights from tensors