mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-13 07:20:15 +00:00
Fix bug in MMVQ kernel
This commit is contained in:
@@ -72,10 +72,13 @@ static __device__ void mul_mat_vec_q(
|
|||||||
|
|
||||||
constexpr vec_dot_q_cuda_t vec_dot_q_cuda = get_vec_dot_q_cuda(type);
|
constexpr vec_dot_q_cuda_t vec_dot_q_cuda = get_vec_dot_q_cuda(type);
|
||||||
|
|
||||||
|
//int64_t rows_per_cuda_block = ggml_cuda_info().devices[id].cc < CC_RDNA2 ?
|
||||||
|
// ncols_y < 4 ? 1 : 2 : 1;
|
||||||
|
|
||||||
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) && (defined(RDNA2) || defined(RDNA3))
|
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) && (defined(RDNA2) || defined(RDNA3))
|
||||||
constexpr int rows_per_cuda_block = 1;
|
constexpr int rows_per_cuda_block = 1;
|
||||||
#else
|
#else
|
||||||
constexpr int rows_per_cuda_block = ncols_y == 1 ? 1 : 2;
|
constexpr int rows_per_cuda_block = ncols_y < 4 ? 1 : 2;
|
||||||
#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) && !defined(RDNA2) && !defined(RDNA3)
|
#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) && !defined(RDNA2) && !defined(RDNA3)
|
||||||
|
|
||||||
const int tid = WARP_SIZE*threadIdx.y + threadIdx.x;
|
const int tid = WARP_SIZE*threadIdx.y + threadIdx.x;
|
||||||
|
|||||||
Reference in New Issue
Block a user