Minor simplifications to trmm, trsm macro-kernels.

Details:
- Simplified some code that would have allowed the diagonal of a trmm
  or trsm triangular matrix to intersect the short end of a micro-panel.
  This is disallowed via higher-level constraints on cache blocksizes, so
  this code was never needed and only served to obfuscate.
- Updated some comments in trmm, trsm macro-kernels.
This commit is contained in:
Field G. Van Zee
2014-03-19 15:47:54 -05:00
parent a3902750b9
commit 9b0e715f29
5 changed files with 24 additions and 21 deletions

View File

@@ -223,14 +223,15 @@ void PASTEMAC(ch,varname)( \
\
/* If there is a zero region above where the diagonal of A intersects the
left edge of the block, adjust the pointer to C and treat this case as
if the diagonal offset were zero. This skips over the region (in
increments of MR) that was not packed. (Note we skip in increments of
MR since that is how the region would have been skipped by packm.) */ \
if the diagonal offset were zero. This skips over the region that was
not packed. (Note we assume the diagonal offset is a multiple of MR;
this assumption will hold as long as the cache blocksizes are each a
multiple of MR and NR.) */ \
if ( diagoffa < 0 ) \
{ \
i = ( -diagoffa / MR ) * MR; \
i = -diagoffa; \
m = m - i; \
diagoffa = -diagoffa % MR; \
diagoffa = 0; \
c_cast = c_cast + (i )*rs_c; \
} \
\

View File

@@ -224,14 +224,14 @@ void PASTEMAC(ch,varname)( \
/* If there is a zero region to the left of where the diagonal of B
intersects the top edge of the panel, adjust the pointer to C and
treat this case as if the diagonal offset were zero. This skips over
the region (in increments of NR) that was not packed. (Note we skip
in increments of NR since that is how the region would have been
skipped by packm.) */ \
the region that was not packed. (Note we assume the diagonal offset
is a multiple of MR; this assumption will hold as long as the cache
blocksizes are each a multiple of MR and NR.) */ \
if ( diagoffb > 0 ) \
{ \
j = ( diagoffb / NR ) * NR; \
j = diagoffb; \
n = n - j; \
diagoffb = diagoffb % NR; \
diagoffb = 0; \
c_cast = c_cast + (j )*cs_c; \
} \
\

View File

@@ -236,14 +236,15 @@ void PASTEMAC(ch,varname)( \
\
/* If there is a zero region above where the diagonal of A intersects the
left edge of the block, adjust the pointer to C and treat this case as
if the diagonal offset were zero. This skips over the region (in
increments of MR) that was not packed. (Note we skip in increments of
MR since that is how the region would have been skipped by packm.) */ \
if the diagonal offset were zero. This skips over the region that was
not packed. (Note we assume the diagonal offset is a multiple of MR;
this assumption will hold as long as the cache blocksizes are each a
multiple of MR and NR.) */ \
if ( diagoffa < 0 ) \
{ \
i = ( -diagoffa / MR ) * MR; \
i = -diagoffa; \
m = m - i; \
diagoffa = -diagoffa % MR; \
diagoffa = 0; \
c_cast = c_cast + (i )*rs_c; \
} \
\

View File

@@ -333,7 +333,8 @@ void PASTEMAC(ch,varname)( \
\
n_cur = ( bli_is_not_edge_b( jb, n_iter, n_left ) ? NR : n_left ); \
\
/* Compute various offsets into and lengths of parts of B. */ \
/* Determine the offset to and length of the panel that was packed
so we can index into the corresponding location in A. */ \
off_b11 = bli_max( -diagoffb_j, 0 ); \
k_b1121 = k - off_b11; \
k_b11 = NR; \

View File

@@ -245,14 +245,14 @@ void PASTEMAC(ch,varname)( \
/* If there is a zero region to the left of where the diagonal of B
intersects the top edge of the panel, adjust the pointer to C and
treat this case as if the diagonal offset were zero. This skips over
the region (in increments of NR) that was not packed. (Note we skip
in increments of NR since that is how the region would have been
skipped by packm.) */ \
the region that was not packed. (Note we assume the diagonal offset
is a multiple of MR; this assumption will hold as long as the cache
blocksizes are each a multiple of MR and NR.) */ \
if ( diagoffb > 0 ) \
{ \
j = ( diagoffb / NR ) * NR; \
j = diagoffb; \
n = n - j; \
diagoffb = diagoffb % NR; \
diagoffb = 0; \
c_cast = c_cast + (j )*cs_c; \
} \
\