mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-03-14 18:37:23 +00:00
[fix](kt-kernel): drop the weights held in Python for loading weights operation in C++ (#1695)
This commit is contained in:
@@ -217,3 +217,6 @@ class LlamafileMoEWrapper(BaseMoEWrapper):
|
||||
# Load weights
|
||||
self.cpu_infer.submit(self.moe.load_weights_task(physical_to_logical_map_cpu.data_ptr()))
|
||||
self.cpu_infer.sync()
|
||||
|
||||
# Drop original weights after loading
|
||||
self.weights_to_keep = None
|
||||
Reference in New Issue
Block a user