[fix]: fix --numa-nodes handling (#1904)

* [fix]: fix --numa-nodes handling
This commit is contained in:
Oql
2026-03-31 17:50:22 +08:00
committed by GitHub
parent cdc867c864
commit 9e6484a538
5 changed files with 43 additions and 13 deletions

View File

@@ -5,7 +5,13 @@ from typing import List, Optional
# Use relative imports for package structure
from ..experts_base import BaseMoEWrapper
from .loader import SafeTensorLoader, CompressedSafeTensorLoader, FP8SafeTensorLoader, BF16SafeTensorLoader, GPTQSafeTensorLoader
from .loader import (
SafeTensorLoader,
CompressedSafeTensorLoader,
FP8SafeTensorLoader,
BF16SafeTensorLoader,
GPTQSafeTensorLoader,
)
from kt_kernel_ext.moe import MOEConfig
import kt_kernel_ext.moe as _moe_mod
@@ -351,6 +357,7 @@ class NativeMoEWrapper(BaseMoEWrapper):
cpu_save: bool = False,
max_deferred_experts_per_token: Optional[int] = None,
method: str = "RAWINT4",
numa_nodes: Optional[List[int]] = None,
):
if method == "RAWINT4" and not _HAS_RAWINT4_SUPPORT:
raise RuntimeError(
@@ -379,10 +386,7 @@ class NativeMoEWrapper(BaseMoEWrapper):
"Please recompile kt_kernel_ext with AVX512+BF16 or AVX2 enabled."
)
if method == "GPTQ_INT4" and not _HAS_AVX2_GPTQ_INT4_SUPPORT:
raise RuntimeError(
"GPTQ_INT4 backend not available.\n"
"Please recompile kt_kernel_ext with AVX2 enabled."
)
raise RuntimeError("GPTQ_INT4 backend not available.\n" "Please recompile kt_kernel_ext with AVX2 enabled.")
super().__init__(
layer_idx=layer_idx,

View File

@@ -41,6 +41,7 @@ class LlamafileMoEWrapper(BaseMoEWrapper):
cpu_save: bool = False,
max_deferred_experts_per_token: Optional[int] = None,
method: str = "LLAMAFILE",
numa_nodes: Optional[List[int]] = None,
):
"""
Initialize Llamafile MoE Wrapper.

View File

@@ -49,6 +49,7 @@ class GeneralMoEWrapper(BaseMoEWrapper):
cpu_save: bool = False,
max_deferred_experts_per_token: Optional[int] = None,
method: str = "MOE_INT8",
numa_nodes: Optional[List[int]] = None,
):
"""
Initialize general MoE Wrapper.