[fix](kt-kernel): drop the weights held in Python for loading weights operation in C++ (#1695)

2026-05-11 00:10:07 +00:00 · 2025-12-12 11:42:33 +08:00
parent 1e69563363
commit 008de19e16
1 changed files with 3 additions and 0 deletions
--- a/kt-kernel/python/utils/llamafile.py
+++ b/kt-kernel/python/utils/llamafile.py
@@ -217,3 +217,6 @@ class LlamafileMoEWrapper(BaseMoEWrapper):
        # Load weights
        self.cpu_infer.submit(self.moe.load_weights_task(physical_to_logical_map_cpu.data_ptr()))
        self.cpu_infer.sync()
+
+        # Drop original weights after loading
+        self.weights_to_keep = None