mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Minor simplifications to trmm, trsm macro-kernels.
Details: - Simplified some code that would have allowed the diagonal of a trmm or trsm triangular matrix to intersect the short end of a micro-panel. This is disallowed via higher-level constraints on cache blocksizes, so this code was never needed and only served to obfuscate. - Updated some comments in trmm, trsm macro-kernels.
This commit is contained in:
@@ -223,14 +223,15 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
/* If there is a zero region above where the diagonal of A intersects the
|
||||
left edge of the block, adjust the pointer to C and treat this case as
|
||||
if the diagonal offset were zero. This skips over the region (in
|
||||
increments of MR) that was not packed. (Note we skip in increments of
|
||||
MR since that is how the region would have been skipped by packm.) */ \
|
||||
if the diagonal offset were zero. This skips over the region that was
|
||||
not packed. (Note we assume the diagonal offset is a multiple of MR;
|
||||
this assumption will hold as long as the cache blocksizes are each a
|
||||
multiple of MR and NR.) */ \
|
||||
if ( diagoffa < 0 ) \
|
||||
{ \
|
||||
i = ( -diagoffa / MR ) * MR; \
|
||||
i = -diagoffa; \
|
||||
m = m - i; \
|
||||
diagoffa = -diagoffa % MR; \
|
||||
diagoffa = 0; \
|
||||
c_cast = c_cast + (i )*rs_c; \
|
||||
} \
|
||||
\
|
||||
|
||||
@@ -224,14 +224,14 @@ void PASTEMAC(ch,varname)( \
|
||||
/* If there is a zero region to the left of where the diagonal of B
|
||||
intersects the top edge of the panel, adjust the pointer to C and
|
||||
treat this case as if the diagonal offset were zero. This skips over
|
||||
the region (in increments of NR) that was not packed. (Note we skip
|
||||
in increments of NR since that is how the region would have been
|
||||
skipped by packm.) */ \
|
||||
the region that was not packed. (Note we assume the diagonal offset
|
||||
is a multiple of MR; this assumption will hold as long as the cache
|
||||
blocksizes are each a multiple of MR and NR.) */ \
|
||||
if ( diagoffb > 0 ) \
|
||||
{ \
|
||||
j = ( diagoffb / NR ) * NR; \
|
||||
j = diagoffb; \
|
||||
n = n - j; \
|
||||
diagoffb = diagoffb % NR; \
|
||||
diagoffb = 0; \
|
||||
c_cast = c_cast + (j )*cs_c; \
|
||||
} \
|
||||
\
|
||||
|
||||
@@ -236,14 +236,15 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
/* If there is a zero region above where the diagonal of A intersects the
|
||||
left edge of the block, adjust the pointer to C and treat this case as
|
||||
if the diagonal offset were zero. This skips over the region (in
|
||||
increments of MR) that was not packed. (Note we skip in increments of
|
||||
MR since that is how the region would have been skipped by packm.) */ \
|
||||
if the diagonal offset were zero. This skips over the region that was
|
||||
not packed. (Note we assume the diagonal offset is a multiple of MR;
|
||||
this assumption will hold as long as the cache blocksizes are each a
|
||||
multiple of MR and NR.) */ \
|
||||
if ( diagoffa < 0 ) \
|
||||
{ \
|
||||
i = ( -diagoffa / MR ) * MR; \
|
||||
i = -diagoffa; \
|
||||
m = m - i; \
|
||||
diagoffa = -diagoffa % MR; \
|
||||
diagoffa = 0; \
|
||||
c_cast = c_cast + (i )*rs_c; \
|
||||
} \
|
||||
\
|
||||
|
||||
@@ -333,7 +333,8 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
n_cur = ( bli_is_not_edge_b( jb, n_iter, n_left ) ? NR : n_left ); \
|
||||
\
|
||||
/* Compute various offsets into and lengths of parts of B. */ \
|
||||
/* Determine the offset to and length of the panel that was packed
|
||||
so we can index into the corresponding location in A. */ \
|
||||
off_b11 = bli_max( -diagoffb_j, 0 ); \
|
||||
k_b1121 = k - off_b11; \
|
||||
k_b11 = NR; \
|
||||
|
||||
@@ -245,14 +245,14 @@ void PASTEMAC(ch,varname)( \
|
||||
/* If there is a zero region to the left of where the diagonal of B
|
||||
intersects the top edge of the panel, adjust the pointer to C and
|
||||
treat this case as if the diagonal offset were zero. This skips over
|
||||
the region (in increments of NR) that was not packed. (Note we skip
|
||||
in increments of NR since that is how the region would have been
|
||||
skipped by packm.) */ \
|
||||
the region that was not packed. (Note we assume the diagonal offset
|
||||
is a multiple of MR; this assumption will hold as long as the cache
|
||||
blocksizes are each a multiple of MR and NR.) */ \
|
||||
if ( diagoffb > 0 ) \
|
||||
{ \
|
||||
j = ( diagoffb / NR ) * NR; \
|
||||
j = diagoffb; \
|
||||
n = n - j; \
|
||||
diagoffb = diagoffb % NR; \
|
||||
diagoffb = 0; \
|
||||
c_cast = c_cast + (j )*cs_c; \
|
||||
} \
|
||||
\
|
||||
|
||||
Reference in New Issue
Block a user