Update installation instructions

2026-04-28 10:11:33 +00:00 · 2025-11-04 23:03:36 +08:00
parent 501b114863
commit fe556bba34
1 changed files with 4 additions and 4 deletions
--- a/doc/zh/KTransformers-Fine-Tuning_User-Guide_zh.md
+++ b/doc/zh/KTransformers-Fine-Tuning_User-Guide_zh.md
@@ -114,11 +114,11 @@ git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
 cd LLaMA-Factory
 pip install -e ".[torch,metrics]" --no-build-isolation

-# 3. 安装对应torch和python版本的KTransformers（CUDA版本可以跟whl命名的不一致）
+# 3. 安装对应torch和python版本的KTransformers（CUDA版本可以跟whl命名的不一致），从https://github.com/kvcache-ai/ktransformers/releases/tag/v0.4.1
 pip install ktransformers-0.4.1+cu128torch28fancy-cp310-cp310-linux_x86_64.whl

 # 4. 安装flash-attention，参照python版本和torch版本，从https://github.com/Dao-AILab/flash-attention/releases下载
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.8cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
+pip install flash_attn-2.8.3+cu12torch2.8cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
 # abi=True/False可以用下面代码查看
 # import torch
 # print(torch._C._GLIBCXX_USE_CXX11_ABI)
@@ -233,7 +233,7 @@ infer_backend: ktransformers  # choices: [huggingface, vllm, sglang, ktransforme
 trust_remote_code: true

 use_kt: true # 调用KTransformers backend
-kt_optimize_rule: examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx.yaml # 请选择和LoRA微调的时候保持一致的YAML文件
+kt_optimize_rule: examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml # 请选择和LoRA微调的时候保持一致的YAML文件
 cpu_infer: 32
 chunk_size: 8192
 ```
@@ -299,4 +299,4 @@ DeepSeek-V2-lite（14B，27层，其中26层有MoE）占用显存大约**5.5GB**

 通过开发 KTransformers LoRA微调并将其集成到 LLaMA‑Factory，我们为希望高效训练与部署 MoE 大模型的用户提供了可行指南。KT 带来最尖端的优化（支持 DeepSeek、Qwen、Kimi 等，配合 AMX 加速 kernel），同时通过 LoRA 微调在极低 GPU 显存下实现定制化。LLaMA‑Factory 则提供友好的统一界面，更广的用户支持。

-该集成（类似 Unsloth 补丁所带来的提速）意味着即便是数百亿乃至万亿总参数量的 MoE 模型，也可在普通硬件上完成微调并低延迟部署。**显存节省、速度提升、易用性** 三者兼得。我们鼓励用户在下一次 MoE 项目中尝试 LLaMA‑Factory 的 KT 集成，并参考本文档进行操作。也欢迎提出任何问题和建议！
+该集成（类似 Unsloth 补丁所带来的提速）意味着即便是数百亿乃至万亿总参数量的 MoE 模型，也可在普通硬件上完成微调并低延迟部署。**显存节省、速度提升、易用性** 三者兼得。我们鼓励用户在下一次 MoE 项目中尝试 LLaMA‑Factory 的 KT 集成，并参考本文档进行操作。也欢迎提出任何问题和建议！