mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-30 03:01:16 +00:00
[fix](kt-kernel): drop the weights held in Python for loading weights operation in C++ (#1695)
This commit is contained in:
@@ -217,3 +217,6 @@ class LlamafileMoEWrapper(BaseMoEWrapper):
|
|||||||
# Load weights
|
# Load weights
|
||||||
self.cpu_infer.submit(self.moe.load_weights_task(physical_to_logical_map_cpu.data_ptr()))
|
self.cpu_infer.submit(self.moe.load_weights_task(physical_to_logical_map_cpu.data_ptr()))
|
||||||
self.cpu_infer.sync()
|
self.cpu_infer.sync()
|
||||||
|
|
||||||
|
# Drop original weights after loading
|
||||||
|
self.weights_to_keep = None
|
||||||
Reference in New Issue
Block a user