mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-20 14:29:22 +00:00
fix local_chat.py chunk_size not effect experts
This commit is contained in:
@@ -70,6 +70,7 @@ def local_chat(
|
||||
torch.set_grad_enabled(False)
|
||||
|
||||
Config().cpu_infer = cpu_infer
|
||||
Config().chunk_size = chunk_size
|
||||
if torch.xpu.is_available():
|
||||
use_cuda_graph = False
|
||||
|
||||
|
||||
@@ -213,7 +213,7 @@ class KExpertsCPU(KExpertsBase):
|
||||
self.config.num_experts_per_tok,
|
||||
self.config.hidden_size,
|
||||
self.config.moe_intermediate_size,
|
||||
max(cuda_graphs),
|
||||
max(cuda_graphs) if isinstance(cuda_graphs, list) else Config().chunk_size,
|
||||
gate_ptr,
|
||||
up_ptr,
|
||||
down_ptr,
|
||||
@@ -231,7 +231,7 @@ class KExpertsCPU(KExpertsBase):
|
||||
self.config.num_experts_per_tok,
|
||||
self.config.hidden_size,
|
||||
self.config.moe_intermediate_size,
|
||||
max(cuda_graphs),
|
||||
max(cuda_graphs) if isinstance(cuda_graphs, list) else Config().chunk_size,
|
||||
gate_ptr,
|
||||
up_ptr,
|
||||
down_ptr,
|
||||
|
||||
Reference in New Issue
Block a user