From cb74202db39dc8cb81fdd06f8a445f8837e27853 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Tue, 13 Sep 2022 11:46:24 -0500 Subject: [PATCH] Fixed incorrect sizeof(type) in edge case macros. (#662) Details: - In bli_edge_case_macro_defs.h, the GEMM_UKR_SETUP_CT_PRE() and GEMMTRSM_UKR_SETUP_CT_PRE() macros previously declared their temporary ct microtiles as: PASTEMAC(ch,ctype) _ct[ BLIS_STACK_BUF_MAX_SIZE / sizeof( PASTEMAC(ch,type) ) ] \ __attribute__((aligned(alignment))); \ The problem here is that sizeof( PASTEMAC(ch,type) ) evaluates to things like sizeof( BLIS_DOUBLE ), not sizeof( double ), and since BLIS_DOUBLE is an enum, it is typically an int, which means the sizeof() expression is evaluating to the wrong value. This was likely a benign bug, though, since BLIS does not support any computational datatypes that are smaller than sizeof( int ), which means the ct array would be *over*-allocated rather than underallocated. Thanks to @moon-chilled for identifying and reporting this bug in #624. - CREDITS file update. --- CREDITS | 1 + frame/include/bli_edge_case_macro_defs.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CREDITS b/CREDITS index 49361c801..152de0a4b 100644 --- a/CREDITS +++ b/CREDITS @@ -68,6 +68,7 @@ but many others have contributed code and feedback, including Devin Matthews @devinamatthews (The University of Texas at Austin) Stefanos Mavros @smavros Mithun Mohan @MithunMohanKadavil (AMD) + @moon-chilled Ilknur Mustafazade @Runkli @nagsingh Bhaskar Nallani @BhaskarNallani (AMD) diff --git a/frame/include/bli_edge_case_macro_defs.h b/frame/include/bli_edge_case_macro_defs.h index 70d97d5d1..6fc4e46c8 100644 --- a/frame/include/bli_edge_case_macro_defs.h +++ b/frame/include/bli_edge_case_macro_defs.h @@ -47,7 +47,7 @@ PASTEMAC(ch,ctype)* restrict _c = c; \ const inc_t _rs_c = rs_c; \ const inc_t _cs_c = cs_c; \ - PASTEMAC(ch,ctype) _ct[ BLIS_STACK_BUF_MAX_SIZE / sizeof( PASTEMAC(ch,type) ) ] \ + PASTEMAC(ch,ctype) _ct[ BLIS_STACK_BUF_MAX_SIZE / sizeof( PASTEMAC(ch,ctype) ) ] \ __attribute__((aligned(alignment))); \ const inc_t _rs_ct = row_major ? nr : 1; \ const inc_t _cs_ct = row_major ? 1 : mr; @@ -137,7 +137,7 @@ PASTEMAC(ch,ctype)* restrict _c = c11; \ const inc_t _rs_c = rs_c; \ const inc_t _cs_c = cs_c; \ - PASTEMAC(ch,ctype) _ct[ BLIS_STACK_BUF_MAX_SIZE / sizeof( PASTEMAC(ch,type) ) ] \ + PASTEMAC(ch,ctype) _ct[ BLIS_STACK_BUF_MAX_SIZE / sizeof( PASTEMAC(ch,ctype) ) ] \ __attribute__((aligned(alignment))); \ const inc_t _rs_ct = row_major ? nr : 1; \ const inc_t _cs_ct = row_major ? 1 : mr;