From 9b0e715f29338a1a1d6445907d2445c35f011121 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Wed, 19 Mar 2014 15:47:54 -0500 Subject: [PATCH] Minor simplifications to trmm, trsm macro-kernels. Details: - Simplified some code that would have allowed the diagonal of a trmm or trsm triangular matrix to intersect the short end of a micro-panel. This is disallowed via higher-level constraints on cache blocksizes, so this code was never needed and only served to obfuscate. - Updated some comments in trmm, trsm macro-kernels. --- frame/3/trmm/bli_trmm_ll_ker_var2.c | 11 ++++++----- frame/3/trmm/bli_trmm_ru_ker_var2.c | 10 +++++----- frame/3/trsm/bli_trsm_ll_ker_var2.c | 11 ++++++----- frame/3/trsm/bli_trsm_rl_ker_var2.c | 3 ++- frame/3/trsm/bli_trsm_ru_ker_var2.c | 10 +++++----- 5 files changed, 24 insertions(+), 21 deletions(-) diff --git a/frame/3/trmm/bli_trmm_ll_ker_var2.c b/frame/3/trmm/bli_trmm_ll_ker_var2.c index 01fc281ee..927357105 100644 --- a/frame/3/trmm/bli_trmm_ll_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ll_ker_var2.c @@ -223,14 +223,15 @@ void PASTEMAC(ch,varname)( \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as - if the diagonal offset were zero. This skips over the region (in - increments of MR) that was not packed. (Note we skip in increments of - MR since that is how the region would have been skipped by packm.) */ \ + if the diagonal offset were zero. This skips over the region that was + not packed. (Note we assume the diagonal offset is a multiple of MR; + this assumption will hold as long as the cache blocksizes are each a + multiple of MR and NR.) */ \ if ( diagoffa < 0 ) \ { \ - i = ( -diagoffa / MR ) * MR; \ + i = -diagoffa; \ m = m - i; \ - diagoffa = -diagoffa % MR; \ + diagoffa = 0; \ c_cast = c_cast + (i )*rs_c; \ } \ \ diff --git a/frame/3/trmm/bli_trmm_ru_ker_var2.c b/frame/3/trmm/bli_trmm_ru_ker_var2.c index 57d112ce5..d6498f180 100644 --- a/frame/3/trmm/bli_trmm_ru_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ru_ker_var2.c @@ -224,14 +224,14 @@ void PASTEMAC(ch,varname)( \ /* If there is a zero region to the left of where the diagonal of B intersects the top edge of the panel, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over - the region (in increments of NR) that was not packed. (Note we skip - in increments of NR since that is how the region would have been - skipped by packm.) */ \ + the region that was not packed. (Note we assume the diagonal offset + is a multiple of MR; this assumption will hold as long as the cache + blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffb > 0 ) \ { \ - j = ( diagoffb / NR ) * NR; \ + j = diagoffb; \ n = n - j; \ - diagoffb = diagoffb % NR; \ + diagoffb = 0; \ c_cast = c_cast + (j )*cs_c; \ } \ \ diff --git a/frame/3/trsm/bli_trsm_ll_ker_var2.c b/frame/3/trsm/bli_trsm_ll_ker_var2.c index 11b39cc59..bb0ed34db 100644 --- a/frame/3/trsm/bli_trsm_ll_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ll_ker_var2.c @@ -236,14 +236,15 @@ void PASTEMAC(ch,varname)( \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as - if the diagonal offset were zero. This skips over the region (in - increments of MR) that was not packed. (Note we skip in increments of - MR since that is how the region would have been skipped by packm.) */ \ + if the diagonal offset were zero. This skips over the region that was + not packed. (Note we assume the diagonal offset is a multiple of MR; + this assumption will hold as long as the cache blocksizes are each a + multiple of MR and NR.) */ \ if ( diagoffa < 0 ) \ { \ - i = ( -diagoffa / MR ) * MR; \ + i = -diagoffa; \ m = m - i; \ - diagoffa = -diagoffa % MR; \ + diagoffa = 0; \ c_cast = c_cast + (i )*rs_c; \ } \ \ diff --git a/frame/3/trsm/bli_trsm_rl_ker_var2.c b/frame/3/trsm/bli_trsm_rl_ker_var2.c index ffa41aa9a..5d0288c40 100644 --- a/frame/3/trsm/bli_trsm_rl_ker_var2.c +++ b/frame/3/trsm/bli_trsm_rl_ker_var2.c @@ -333,7 +333,8 @@ void PASTEMAC(ch,varname)( \ \ n_cur = ( bli_is_not_edge_b( jb, n_iter, n_left ) ? NR : n_left ); \ \ - /* Compute various offsets into and lengths of parts of B. */ \ + /* Determine the offset to and length of the panel that was packed + so we can index into the corresponding location in A. */ \ off_b11 = bli_max( -diagoffb_j, 0 ); \ k_b1121 = k - off_b11; \ k_b11 = NR; \ diff --git a/frame/3/trsm/bli_trsm_ru_ker_var2.c b/frame/3/trsm/bli_trsm_ru_ker_var2.c index 44fe387a6..9bac5c946 100644 --- a/frame/3/trsm/bli_trsm_ru_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ru_ker_var2.c @@ -245,14 +245,14 @@ void PASTEMAC(ch,varname)( \ /* If there is a zero region to the left of where the diagonal of B intersects the top edge of the panel, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over - the region (in increments of NR) that was not packed. (Note we skip - in increments of NR since that is how the region would have been - skipped by packm.) */ \ + the region that was not packed. (Note we assume the diagonal offset + is a multiple of MR; this assumption will hold as long as the cache + blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffb > 0 ) \ { \ - j = ( diagoffb / NR ) * NR; \ + j = diagoffb; \ n = n - j; \ - diagoffb = diagoffb % NR; \ + diagoffb = 0; \ c_cast = c_cast + (j )*cs_c; \ } \ \