Minor fixes to trsm consistent with prev on trmm.

Details:
- Removed use of bli_min() and bli_max() that were only being used to
  try to support situations where the diagonal would intersect the
  short end of some micro-panels, which is situation that is disallowed
  at a higher level by various constraints on the register and cache
  blocksize. This only affected trsm_ll and trsm_lu.
- Use panel stride as passed into the macro-kernel rather than compute
  it via k and PACKMR/PACKNR. This affects all macro-kernels of trsm.
This commit is contained in:
Field G. Van Zee
2014-02-13 09:29:55 -06:00
parent 6260b0b5f8
commit bd3ab1ad4c
4 changed files with 6 additions and 6 deletions

View File

@@ -243,7 +243,7 @@ void PASTEMAC(ch,varname)( \
if ( m_left ) ++m_iter; \
\
/* Determine some increments used to step through A, B, and C. */ \
rstep_a = k * PACKMR; \
rstep_a = ps_a; \
\
cstep_b = ps_b; \
\
@@ -294,7 +294,7 @@ void PASTEMAC(ch,varname)( \
\
/* Compute various offsets into and lengths of parts of A. */ \
off_a10 = 0; \
k_a1011 = bli_min( k, diagoffa_i + MR ); \
k_a1011 = diagoffa_i + MR; \
k_a10 = k_a1011 - MR; \
off_a11 = k_a10; \
\

View File

@@ -252,7 +252,7 @@ void PASTEMAC(ch,varname)( \
if ( m_left ) ++m_iter; \
\
/* Determine some increments used to step through A, B, and C. */ \
rstep_a = k * PACKMR; \
rstep_a = ps_a; \
\
cstep_b = ps_b; \
\
@@ -303,7 +303,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict a2; \
\
/* Compute various offsets into and lengths of parts of A. */ \
off_a11 = bli_max( diagoffa_i, 0 ); \
off_a11 = diagoffa_i; \
k_a1112 = k - off_a11;; \
k_a11 = MR; \
k_a12 = k_a1112 - MR; \

View File

@@ -267,7 +267,7 @@ void PASTEMAC(ch,varname)( \
/* Determine some increments used to step through A, B, and C. */ \
rstep_a = ps_a; \
\
cstep_b = k * PACKNR; \
cstep_b = ps_b; \
\
rstep_c = rs_c * MR; \
cstep_c = cs_c * NR; \

View File

@@ -262,7 +262,7 @@ void PASTEMAC(ch,varname)( \
/* Determine some increments used to step through A, B, and C. */ \
rstep_a = ps_a; \
\
cstep_b = k * PACKNR; \
cstep_b = ps_b; \
\
rstep_c = rs_c * MR; \
cstep_c = cs_c * NR; \