mirror of
https://github.com/amd/blis.git
synced 2026-04-19 23:28:52 +00:00
Fix for n_fringe in AVX512 FP32 6x64 kernel (#42)
Details: - Fixed the problem decomposition for n-fringe case of 6x64 AVX512 FP32 kernel by updating the pointers correctly after each fringe kernel call. - AMD-Internal: SWLCSG-3556
This commit is contained in:
committed by
GitHub
parent
37efbd284e
commit
9e9441db47
@@ -134,6 +134,7 @@ LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_avx512_6x64m)
|
||||
}
|
||||
if( n_left >= 8)
|
||||
{
|
||||
dim_t nr_cur = n_left % 16;
|
||||
lpgemm_rowvar_f32f32f32of32_avx512_6xlt16m(m0, k0,
|
||||
ai, rs_a, cs_a, ps_a,
|
||||
bj, rs_b, cs_b,
|
||||
@@ -141,8 +142,10 @@ LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_avx512_6x64m)
|
||||
alpha, beta,
|
||||
n_left,
|
||||
post_ops_list, post_ops_attr );
|
||||
cij += nr_cur*cs_c; bj += nr_cur*cs_b; n_left -= nr_cur;
|
||||
post_ops_attr.post_op_c_j += nr_cur;
|
||||
}
|
||||
if( 8 > n_left )
|
||||
if( n_left > 0 )
|
||||
{
|
||||
lpgemm_rowvar_f32f32f32of32_6xlt8m(m0, k0,
|
||||
ai, rs_a, cs_a, ps_a,
|
||||
|
||||
Reference in New Issue
Block a user