diff --git a/ggml/src/ggml-cuda/mmq_id_common.cuh b/ggml/src/ggml-cuda/mmq_id_common.cuh index a7ae241e..931d144b 100644 --- a/ggml/src/ggml-cuda/mmq_id_common.cuh +++ b/ggml/src/ggml-cuda/mmq_id_common.cuh @@ -3746,9 +3746,6 @@ static void launch_mul_mat_q_id(ggml_backend_cuda_context & ctx, const mmq_args_ const int ntzw = args.nchannels_y * args.nsamples_y; const dim3 block_nums_xy_tiling(nty, ntx, ntzw); - if (args.nchannels_y % args.nchannels_x) { - printf("Oops: args.nchannels_y = %d, args.nchannels_x = %d\n", args.nchannels_y, args.nchannels_x); - } GGML_ASSERT(args.nchannels_y % args.nchannels_x == 0); GGML_ASSERT(args.nsamples_y % args.nsamples_x == 0); const int channel_ratio = args.nchannels_y / args.nchannels_x; diff --git a/ggml/src/ggml-cuda/mmq_id_kernels.cu b/ggml/src/ggml-cuda/mmq_id_kernels.cu deleted file mode 100644 index 722320ad..00000000 --- a/ggml/src/ggml-cuda/mmq_id_kernels.cu +++ /dev/null @@ -1,22 +0,0 @@ -#include "mmq_id_common.cuh" - -DECL_MMQ_CASE(GGML_TYPE_Q4_0); -DECL_MMQ_CASE(GGML_TYPE_Q4_1); -DECL_MMQ_CASE(GGML_TYPE_Q5_0); -DECL_MMQ_CASE(GGML_TYPE_Q5_1); -DECL_MMQ_CASE(GGML_TYPE_Q8_0); -DECL_MMQ_CASE(GGML_TYPE_MXFP4); -DECL_MMQ_CASE(GGML_TYPE_Q2_K); -DECL_MMQ_CASE(GGML_TYPE_Q3_K); -DECL_MMQ_CASE(GGML_TYPE_Q4_K); -DECL_MMQ_CASE(GGML_TYPE_Q5_K); -DECL_MMQ_CASE(GGML_TYPE_Q6_K); -DECL_MMQ_CASE(GGML_TYPE_IQ2_XXS); -DECL_MMQ_CASE(GGML_TYPE_IQ2_XS); -DECL_MMQ_CASE(GGML_TYPE_IQ2_S); -DECL_MMQ_CASE(GGML_TYPE_IQ3_XXS); -DECL_MMQ_CASE(GGML_TYPE_IQ3_S); -DECL_MMQ_CASE(GGML_TYPE_IQ1_S); -DECL_MMQ_CASE(GGML_TYPE_IQ4_NL); -DECL_MMQ_CASE(GGML_TYPE_IQ4_XS); - diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s_id.cu new file mode 100644 index 00000000..9c04a020 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ1_S); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s_id.cu new file mode 100644 index 00000000..6f54f24e --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ2_S); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs_id.cu new file mode 100644 index 00000000..52958667 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ2_XS); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs_id.cu new file mode 100644 index 00000000..c735ab97 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ2_XXS); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s_id.cu new file mode 100644 index 00000000..5c501ed3 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ3_S); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs_id.cu new file mode 100644 index 00000000..387e6d27 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ3_XXS); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl_id.cu new file mode 100644 index 00000000..6ef74f1d --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl_id.cu @@ -0,0 +1,4 @@ +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ4_NL); +DECL_MMQ_CASE(GGML_TYPE_MXFP4); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs_id.cu new file mode 100644 index 00000000..988f5da1 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ4_XS); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k_id.cu new file mode 100644 index 00000000..14d78d83 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q2_K); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k_id.cu new file mode 100644 index 00000000..4262f49a --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q3_K); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0_id.cu new file mode 100644 index 00000000..4e747a8c --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q4_0); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1_id.cu new file mode 100644 index 00000000..a29a943e --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q4_1); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k_id.cu new file mode 100644 index 00000000..c16f7613 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q4_K); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0_id.cu new file mode 100644 index 00000000..3afa037a --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q5_0); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1_id.cu new file mode 100644 index 00000000..1c161297 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q5_1); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k_id.cu new file mode 100644 index 00000000..36c7a9fa --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q5_K); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k_id.cu new file mode 100644 index 00000000..2a02f965 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q6_K); diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0_id.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0_id.cu new file mode 100644 index 00000000..3b126bed --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0_id.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq_id_common.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q8_0);