numactl -N 0 -m 0 \
+> ./build/bin/llama-bench \
+> --model /mnt/ai/models/unsloth/DeepSeek-R1-GGUF/DeepSeek-R1-Q8_0/DeepSeek-R1.Q8_0-00001-of-00015.gguf \
+> --cache-type-k f16 \
+> --cache-type-v f16 \
+> --numa numactl \
+> --threads 64,43,64,86,128,172
+> Results
+> +> model | size | params | backend | threads | test | t/s +> -- | -- | -- | -- | -- | -- | -- +> deepseek2 671B Q8_0 | 664.29 GiB | 671.03 B | CPU | 64 | pp512 | 56.86 ± 7.21 +> deepseek2 671B Q8_0 | 664.29 GiB | 671.03 B | CPU | 64 | tg128 | 4.86 ± 0.01 +> deepseek2 671B Q8_0 | 664.29 GiB | 671.03 B | CPU | 43 | pp512 | 40.62 ± 0.02 +> deepseek2 671B Q8_0 | 664.29 GiB | 671.03 B | CPU | 43 | tg128 | 3.69 ± 0.00 +> deepseek2 671B Q8_0 | 664.29 GiB | 671.03 B | CPU | 64 | pp512 | 57.67 ± 4.62 +> deepseek2 671B Q8_0 | 664.29 GiB | 671.03 B | CPU | 64 | tg128 | 4.89 ± 0.00 +> deepseek2 671B Q8_0 | 664.29 GiB | 671.03 B | CPU | 86 | pp512 | 62.21 ± 13.63 +> deepseek2 671B Q8_0 | 664.29 GiB | 671.03 B | CPU | 86 | tg128 | 5.69 ± 0.00 +> deepseek2 671B Q8_0 | 664.29 GiB | 671.03 B | CPU | 128 | pp512 | 78.89 ± 21.46 +> deepseek2 671B Q8_0 | 664.29 GiB | 671.03 B | CPU | 128 | tg128 | 6.60 ± 0.00 +> deepseek2 671B Q8_0 | 664.29 GiB | 671.03 B | CPU | 172 | pp512 | 70.63 ± 0.58 +> deepseek2 671B Q8_0 | 664.29 GiB | 671.03 B | CPU | 172 | tg128 | 5.05 ± 0.00 + +--- + +👤 **ikawrakow** replied the **2025-03-13** at **11:55:55**: