mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Minor fixes to trsm consistent with prev on trmm.
Details: - Removed use of bli_min() and bli_max() that were only being used to try to support situations where the diagonal would intersect the short end of some micro-panels, which is situation that is disallowed at a higher level by various constraints on the register and cache blocksize. This only affected trsm_ll and trsm_lu. - Use panel stride as passed into the macro-kernel rather than compute it via k and PACKMR/PACKNR. This affects all macro-kernels of trsm.
This commit is contained in:
@@ -243,7 +243,7 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( m_left ) ++m_iter; \
|
||||
\
|
||||
/* Determine some increments used to step through A, B, and C. */ \
|
||||
rstep_a = k * PACKMR; \
|
||||
rstep_a = ps_a; \
|
||||
\
|
||||
cstep_b = ps_b; \
|
||||
\
|
||||
@@ -294,7 +294,7 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
/* Compute various offsets into and lengths of parts of A. */ \
|
||||
off_a10 = 0; \
|
||||
k_a1011 = bli_min( k, diagoffa_i + MR ); \
|
||||
k_a1011 = diagoffa_i + MR; \
|
||||
k_a10 = k_a1011 - MR; \
|
||||
off_a11 = k_a10; \
|
||||
\
|
||||
|
||||
@@ -252,7 +252,7 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( m_left ) ++m_iter; \
|
||||
\
|
||||
/* Determine some increments used to step through A, B, and C. */ \
|
||||
rstep_a = k * PACKMR; \
|
||||
rstep_a = ps_a; \
|
||||
\
|
||||
cstep_b = ps_b; \
|
||||
\
|
||||
@@ -303,7 +303,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict a2; \
|
||||
\
|
||||
/* Compute various offsets into and lengths of parts of A. */ \
|
||||
off_a11 = bli_max( diagoffa_i, 0 ); \
|
||||
off_a11 = diagoffa_i; \
|
||||
k_a1112 = k - off_a11;; \
|
||||
k_a11 = MR; \
|
||||
k_a12 = k_a1112 - MR; \
|
||||
|
||||
@@ -267,7 +267,7 @@ void PASTEMAC(ch,varname)( \
|
||||
/* Determine some increments used to step through A, B, and C. */ \
|
||||
rstep_a = ps_a; \
|
||||
\
|
||||
cstep_b = k * PACKNR; \
|
||||
cstep_b = ps_b; \
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
|
||||
@@ -262,7 +262,7 @@ void PASTEMAC(ch,varname)( \
|
||||
/* Determine some increments used to step through A, B, and C. */ \
|
||||
rstep_a = ps_a; \
|
||||
\
|
||||
cstep_b = k * PACKNR; \
|
||||
cstep_b = ps_b; \
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
|
||||
Reference in New Issue
Block a user