diff --git a/frame/3/trmm/bli_trmm_ll_ker_var2.c b/frame/3/trmm/bli_trmm_ll_ker_var2.c index 01fc281ee..927357105 100644 --- a/frame/3/trmm/bli_trmm_ll_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ll_ker_var2.c @@ -223,14 +223,15 @@ void PASTEMAC(ch,varname)( \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as - if the diagonal offset were zero. This skips over the region (in - increments of MR) that was not packed. (Note we skip in increments of - MR since that is how the region would have been skipped by packm.) */ \ + if the diagonal offset were zero. This skips over the region that was + not packed. (Note we assume the diagonal offset is a multiple of MR; + this assumption will hold as long as the cache blocksizes are each a + multiple of MR and NR.) */ \ if ( diagoffa < 0 ) \ { \ - i = ( -diagoffa / MR ) * MR; \ + i = -diagoffa; \ m = m - i; \ - diagoffa = -diagoffa % MR; \ + diagoffa = 0; \ c_cast = c_cast + (i )*rs_c; \ } \ \ diff --git a/frame/3/trmm/bli_trmm_ru_ker_var2.c b/frame/3/trmm/bli_trmm_ru_ker_var2.c index 57d112ce5..d6498f180 100644 --- a/frame/3/trmm/bli_trmm_ru_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ru_ker_var2.c @@ -224,14 +224,14 @@ void PASTEMAC(ch,varname)( \ /* If there is a zero region to the left of where the diagonal of B intersects the top edge of the panel, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over - the region (in increments of NR) that was not packed. (Note we skip - in increments of NR since that is how the region would have been - skipped by packm.) */ \ + the region that was not packed. (Note we assume the diagonal offset + is a multiple of MR; this assumption will hold as long as the cache + blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffb > 0 ) \ { \ - j = ( diagoffb / NR ) * NR; \ + j = diagoffb; \ n = n - j; \ - diagoffb = diagoffb % NR; \ + diagoffb = 0; \ c_cast = c_cast + (j )*cs_c; \ } \ \ diff --git a/frame/3/trsm/bli_trsm_ll_ker_var2.c b/frame/3/trsm/bli_trsm_ll_ker_var2.c index 11b39cc59..bb0ed34db 100644 --- a/frame/3/trsm/bli_trsm_ll_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ll_ker_var2.c @@ -236,14 +236,15 @@ void PASTEMAC(ch,varname)( \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as - if the diagonal offset were zero. This skips over the region (in - increments of MR) that was not packed. (Note we skip in increments of - MR since that is how the region would have been skipped by packm.) */ \ + if the diagonal offset were zero. This skips over the region that was + not packed. (Note we assume the diagonal offset is a multiple of MR; + this assumption will hold as long as the cache blocksizes are each a + multiple of MR and NR.) */ \ if ( diagoffa < 0 ) \ { \ - i = ( -diagoffa / MR ) * MR; \ + i = -diagoffa; \ m = m - i; \ - diagoffa = -diagoffa % MR; \ + diagoffa = 0; \ c_cast = c_cast + (i )*rs_c; \ } \ \ diff --git a/frame/3/trsm/bli_trsm_rl_ker_var2.c b/frame/3/trsm/bli_trsm_rl_ker_var2.c index ffa41aa9a..5d0288c40 100644 --- a/frame/3/trsm/bli_trsm_rl_ker_var2.c +++ b/frame/3/trsm/bli_trsm_rl_ker_var2.c @@ -333,7 +333,8 @@ void PASTEMAC(ch,varname)( \ \ n_cur = ( bli_is_not_edge_b( jb, n_iter, n_left ) ? NR : n_left ); \ \ - /* Compute various offsets into and lengths of parts of B. */ \ + /* Determine the offset to and length of the panel that was packed + so we can index into the corresponding location in A. */ \ off_b11 = bli_max( -diagoffb_j, 0 ); \ k_b1121 = k - off_b11; \ k_b11 = NR; \ diff --git a/frame/3/trsm/bli_trsm_ru_ker_var2.c b/frame/3/trsm/bli_trsm_ru_ker_var2.c index 44fe387a6..9bac5c946 100644 --- a/frame/3/trsm/bli_trsm_ru_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ru_ker_var2.c @@ -245,14 +245,14 @@ void PASTEMAC(ch,varname)( \ /* If there is a zero region to the left of where the diagonal of B intersects the top edge of the panel, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over - the region (in increments of NR) that was not packed. (Note we skip - in increments of NR since that is how the region would have been - skipped by packm.) */ \ + the region that was not packed. (Note we assume the diagonal offset + is a multiple of MR; this assumption will hold as long as the cache + blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffb > 0 ) \ { \ - j = ( diagoffb / NR ) * NR; \ + j = diagoffb; \ n = n - j; \ - diagoffb = diagoffb % NR; \ + diagoffb = 0; \ c_cast = c_cast + (j )*cs_c; \ } \ \