mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-24 00:19:19 +00:00
iqk_mul_mat: be able to handle any f16/f32 combination on AVX2
But only turning on f16 x f32 and f32 x f16 for now.
This commit is contained in:
41
sgemm.cpp
41
sgemm.cpp
@@ -866,22 +866,41 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
||||
if (Ctype != GGML_TYPE_F32)
|
||||
return false;
|
||||
|
||||
if (task == GGML_TASK_TYPE_COMPUTE && k >= 256 && Atype == GGML_TYPE_F16) {
|
||||
#if defined __AVX2__ && defined __FMA__
|
||||
if (Btype == GGML_TYPE_F32) {
|
||||
if (iqk_mul_mat(m, n, k, Atype, A, B, (float *)C, ldc, ith, nth)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
//bool is_accepted_float_type = k >= 32 && Atype == GGML_TYPE_F16 && Btype == GGML_TYPE_F32;
|
||||
bool is_accepted_float_type = k >= 32 &&
|
||||
((Atype == GGML_TYPE_F16 && Btype == GGML_TYPE_F32) || (Atype == GGML_TYPE_F32 && Btype == GGML_TYPE_F16));
|
||||
#elif defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC && defined __ARM_FEATURE_FMA
|
||||
if (Btype == GGML_TYPE_F16) {
|
||||
if (iqk_mul_mat(m, n, k, Atype, A, B, (float *)C, ldc, ith, nth)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
bool is_accepted_float_type = k >= 32 && Atype == GGML_TYPE_F16 && Btype == GGML_TYPE_F16;
|
||||
#else
|
||||
bool is_accepted_float_type = false;
|
||||
#endif
|
||||
if (task == GGML_TASK_TYPE_INIT && is_accepted_float_type) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (task == GGML_TASK_TYPE_COMPUTE && is_accepted_float_type) {
|
||||
if (iqk_mul_mat(m, n, k, Atype, A, B, (float *)C, ldc, ith, nth)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// if (task == GGML_TASK_TYPE_COMPUTE && k >= 32 && Atype == GGML_TYPE_F16) {
|
||||
//#if defined __AVX2__ && defined __FMA__
|
||||
// if (Btype == GGML_TYPE_F32) {
|
||||
// if (iqk_mul_mat(m, n, k, Atype, A, B, (float *)C, ldc, ith, nth)) {
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
//#elif defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC && defined __ARM_FEATURE_FMA
|
||||
// if (Btype == GGML_TYPE_F16) {
|
||||
// if (iqk_mul_mat(m, n, k, Atype, A, B, (float *)C, ldc, ith, nth)) {
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
//#endif
|
||||
// }
|
||||
|
||||
switch (Atype) {
|
||||
|
||||
case GGML_TYPE_F32: {
|
||||
|
||||
Reference in New Issue
Block a user