mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-05-11 08:20:21 +00:00
update marlin expert example
This commit is contained in:
@@ -79,6 +79,24 @@
|
||||
generate_device: "cuda:1"
|
||||
prefill_device: "cuda:1"
|
||||
|
||||
- match:
|
||||
name: "^model\\.layers\\.(0|[1-4])\\.mlp\\.experts$" # inject experts in layer 0~4 as marlin expert
|
||||
replace:
|
||||
class: ktransformers.operators.experts.KTransformersExperts
|
||||
kwargs:
|
||||
generate_device: "cuda:0" # run in cuda:0
|
||||
generate_op: "KExpertsMarlin"
|
||||
recursive: False
|
||||
|
||||
- match:
|
||||
name: "^model\\.layers\\.([3][0])\\.mlp\\.experts$" # inject experts in layer 30~31 as marlin expert
|
||||
replace:
|
||||
class: ktransformers.operators.experts.KTransformersExperts
|
||||
kwargs:
|
||||
generate_device: "cuda:1"
|
||||
generate_op: "KExpertsMarlin"
|
||||
recursive: False
|
||||
|
||||
- match:
|
||||
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp\\.experts$"
|
||||
replace:
|
||||
@@ -139,5 +157,5 @@
|
||||
replace:
|
||||
class: "default"
|
||||
kwargs:
|
||||
generate_device: "cuda:1"
|
||||
prefill_device: "cuda:1"
|
||||
generate_device: "cuda:0"
|
||||
prefill_device: "cuda:0"
|
||||
|
||||
Reference in New Issue
Block a user