mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-23 16:09:18 +00:00
Enable q6_0 for flash attention (#101)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
2
Makefile
2
Makefile
@@ -600,6 +600,8 @@ else
|
||||
OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
|
||||
OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-iq4_nl.cu))
|
||||
OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*:iq4_nl-iq4_nl.cu))
|
||||
OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*:q6_0-q5_0.cu))
|
||||
OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*:q8_0-q6_0.cu))
|
||||
endif # GGML_CUDA_FA_ALL_QUANTS
|
||||
|
||||
ifdef GGML_CUDA
|
||||
|
||||
Reference in New Issue
Block a user