From d186cfdf2e5800bc3e1aea3dcbc220f021187c05 Mon Sep 17 00:00:00 2001 From: bhaskarn Date: Mon, 10 Aug 2020 17:45:23 +0530 Subject: [PATCH] CPUPL-1074: - Bug fix in sgemmsup 1x16 Kernel for Beta Zero and with C col storage rcx register incrementing was missing because of this 4 values in output are overwritten Change-Id: Ia3028040dce3e615f1db5a331498d86faadcf916 --- kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16.c b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16.c index 6c9f8cabe..32d5b6584 100644 --- a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16.c +++ b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16.c @@ -2479,6 +2479,7 @@ void bli_sgemmsup_rv_zen_asm_1x16 vmovss(xmm1, mem(rcx, rsi, 1)) vmovss(xmm2, mem(rcx, rsi, 2)) vmovss(xmm14, mem(rcx, rax, 1)) + lea(mem(rcx, rsi, 4), rcx) // rcx += cs_c vextractf128(imm(0x0), ymm5, xmm0)//c0-c3 vshufps(imm(0x01), xmm0, xmm0,xmm1) vshufps(imm(0x02), xmm0, xmm0,xmm2)