From a3440098a4d4fd0daf4f2c2e5091647c6f60014b Mon Sep 17 00:00:00 2001 From: turboderp <11859846+turboderp@users.noreply.github.com> Date: Tue, 29 Apr 2025 20:44:10 +0200 Subject: [PATCH] Add Qwen3ForCausalLM --- exllamav2/architecture.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/exllamav2/architecture.py b/exllamav2/architecture.py index 300fe0c..1797dc4 100644 --- a/exllamav2/architecture.py +++ b/exllamav2/architecture.py @@ -428,6 +428,19 @@ class ExLlamaV2ArchParams: self.lm.attention_bias_qkv = True self.lm.supports_tp = True + # Qwen3 + + if arch_string == "Qwen3ForCausalLM": + arch_recognized = True + self.lm.layer_keys += \ + layer_keys_llama_norms + \ + layer_keys_llama_attn + \ + layer_keys_llama_mlp + self.lm.expect_keys += \ + expect_keys_llama + self.lm.supports_tp = True + self.lm.default_use_qk_norm = True + # Qwen2-VL (2, 2.5) if arch_string in ["Qwen2VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration"]: