mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-05-05 13:41:35 +00:00
(kt-kernel): add numa_nodes parameter for explicit NUMA node mapping (#1891)
Add numa_nodes parameter to BaseMoEWrapper and all subclasses, allowing users to explicitly specify which NUMA node IDs to use for subpool mapping instead of always defaulting to sequential [0, 1, ..., N-1]. This enables running multiple KTransformers instances on different NUMA nodes of the same machine, e.g. --kt-threadpool-count 1 --kt-numa-nodes 1 to bind to NUMA node 1. Previously this required external numactl workarounds since subpool_numa_map was hardcoded to start from 0.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import torch
|
||||
from typing import Optional
|
||||
from typing import List, Optional
|
||||
import os
|
||||
|
||||
# Use relative imports for package structure
|
||||
@@ -133,6 +133,7 @@ class LlamafileMoEWrapper(BaseMoEWrapper):
|
||||
cpu_save=cpu_save,
|
||||
max_deferred_experts_per_token=max_deferred_experts_per_token,
|
||||
method=method,
|
||||
numa_nodes=numa_nodes,
|
||||
)
|
||||
|
||||
self.weights_to_keep = None
|
||||
|
||||
Reference in New Issue
Block a user