From 22c6b5dc4c9cc21942f8ccc30891f9b4385a9504 Mon Sep 17 00:00:00 2001 From: Nicholai Tukanov Date: Tue, 30 Mar 2021 19:07:42 -0500 Subject: [PATCH] Fixed bug in power10 microkernel I/O. (#488) Details: - Fixed a bug in the POWER10 DGEMM kernel whereby the microkernel did not store the microtile result correctly due to incorrect indices calculations. (The error was introduced when I reorganized the 'kernels/power10/3' directory.) --- kernels/power10/3/bli_dgemm_power10_mma.c | 32 +++++++++++------------ 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/kernels/power10/3/bli_dgemm_power10_mma.c b/kernels/power10/3/bli_dgemm_power10_mma.c index 83f1c1dc5..396824986 100644 --- a/kernels/power10/3/bli_dgemm_power10_mma.c +++ b/kernels/power10/3/bli_dgemm_power10_mma.c @@ -168,25 +168,25 @@ void bli_dgemm_power10_mma_8x8 // handle beta cases if (beta_ != 0.0) { - SAVE_ACC(dv4sf_t, &acc0, rs_c, 0 ); - SAVE_ACC(dv4sf_t, &acc1, rs_c, 4 ); - SAVE_ACC(dv4sf_t, &acc2, rs_c, 8 ); - SAVE_ACC(dv4sf_t, &acc3, rs_c, 12 ); - SAVE_ACC(dv4sf_t, &acc4, rs_c, 4*rs_c); - SAVE_ACC(dv4sf_t, &acc5, rs_c, 4+4*rs_c); - SAVE_ACC(dv4sf_t, &acc6, rs_c, 8+4*rs_c); - SAVE_ACC(dv4sf_t, &acc7, rs_c, 12+4*rs_c); + SAVE_ACC(dv4sf_t, &acc0, rs_c, 0 ); + SAVE_ACC(dv4sf_t, &acc1, rs_c, 2 ); + SAVE_ACC(dv4sf_t, &acc2, rs_c, 4 ); + SAVE_ACC(dv4sf_t, &acc3, rs_c, 6 ); + SAVE_ACC(dv4sf_t, &acc4, rs_c, 4*rs_c); + SAVE_ACC(dv4sf_t, &acc5, rs_c, 2+4*rs_c); + SAVE_ACC(dv4sf_t, &acc6, rs_c, 4+4*rs_c); + SAVE_ACC(dv4sf_t, &acc7, rs_c, 6+4*rs_c); } else { - SAVE_ACC_bz(dv4sf_t, &acc0, rs_c, 0 ); - SAVE_ACC_bz(dv4sf_t, &acc1, rs_c, 4 ); - SAVE_ACC_bz(dv4sf_t, &acc2, rs_c, 8 ); - SAVE_ACC_bz(dv4sf_t, &acc3, rs_c, 12 ); - SAVE_ACC_bz(dv4sf_t, &acc4, rs_c, 4*rs_c); - SAVE_ACC_bz(dv4sf_t, &acc5, rs_c, 4+4*rs_c); - SAVE_ACC_bz(dv4sf_t, &acc6, rs_c, 8+4*rs_c); - SAVE_ACC_bz(dv4sf_t, &acc7, rs_c, 12+4*rs_c); + SAVE_ACC_bz(dv4sf_t, &acc0, rs_c, 0 ); + SAVE_ACC_bz(dv4sf_t, &acc1, rs_c, 2 ); + SAVE_ACC_bz(dv4sf_t, &acc2, rs_c, 4 ); + SAVE_ACC_bz(dv4sf_t, &acc3, rs_c, 6 ); + SAVE_ACC_bz(dv4sf_t, &acc4, rs_c, 4*rs_c); + SAVE_ACC_bz(dv4sf_t, &acc5, rs_c, 2+4*rs_c); + SAVE_ACC_bz(dv4sf_t, &acc6, rs_c, 4+4*rs_c); + SAVE_ACC_bz(dv4sf_t, &acc7, rs_c, 6+4*rs_c); } }