diff --git a/kt-kernel/python/utils/llamafile.py b/kt-kernel/python/utils/llamafile.py index f39704b..68dce64 100644 --- a/kt-kernel/python/utils/llamafile.py +++ b/kt-kernel/python/utils/llamafile.py @@ -217,3 +217,6 @@ class LlamafileMoEWrapper(BaseMoEWrapper): # Load weights self.cpu_infer.submit(self.moe.load_weights_task(physical_to_logical_map_cpu.data_ptr())) self.cpu_infer.sync() + + # Drop original weights after loading + self.weights_to_keep = None \ No newline at end of file