[fix](kt-kernel): drop the weights held in Python for loading weights operation in C++ (#1695)

This commit is contained in:
SCDESPERTATE
2025-12-12 11:42:33 +08:00
committed by GitHub
parent 1e69563363
commit 008de19e16

View File

@@ -217,3 +217,6 @@ class LlamafileMoEWrapper(BaseMoEWrapper):
# Load weights
self.cpu_infer.submit(self.moe.load_weights_task(physical_to_logical_map_cpu.data_ptr()))
self.cpu_infer.sync()
# Drop original weights after loading
self.weights_to_keep = None