dher2 API in blis make check fails on non avx2 platform

- dher2 did not have avx check for platform.
  It was calling avx kernel regardless of platform
  support. Which resulted in core dump.

- Added avx based platform check in both variant of dher2 for
  fixing the issue.

AMD-Internal: [CPUPL-2043]
Change-Id: I1fd1dcc9336980bfb7ffa9376f491f107c889c0b
This commit is contained in:
Harsh Dave
2022-03-02 04:08:26 -06:00
committed by Dipal M Zambare
parent e12f45033d
commit d50d607995
2 changed files with 64 additions and 33 deletions

View File

@@ -249,9 +249,13 @@ void bli_dher2_unf_var1
PASTECH(d,axpy2v_ker_ft) kfp_2v;
/* Query the context for the kernel function pointer. */
kfp_2v = bli_cntx_get_l1f_ker_dt( dt, BLIS_AXPY2V_KER, cntx );
if ( cntx == NULL ) cntx = bli_gks_query_cntx();
kfp_2v = bli_cntx_get_l1f_ker_dt( dt, BLIS_AXPY2V_KER, cntx );
if( (incx == 1) && (incy == 1) && (rs_ct == 1))
if ( (bli_cpuid_is_avx_supported() == TRUE)
&& (incx == 1)
&& (incy == 1)
&& (rs_ct == 1))
{
for ( i = 0; i < m; )
{
@@ -265,29 +269,43 @@ void bli_dher2_unf_var1
if((n_behind >= 3))
{
bli_dher2_trans_zen_int_4(c10t, x0, y0, &alpha0, n_behind + 1, cs_ct);
bli_dher2_trans_zen_int_4(c10t, x0, y0,
&alpha0,
n_behind + 1,
cs_ct);
i+=4;
}
else
{
/* Apply conjx and/or conjy to chi1 and/or psi1. */
PASTEMAC(d,copycjs)( conjx, *chi1, conjx0_chi1 );
PASTEMAC(d,copycjs)( conjy, *psi1, conjy1_psi1 );
PASTEMAC(d,copycjs)( conj0, *psi1, conjy0_psi1 );
/* Apply conjx and/or conjy to chi1
* and/or psi1. */
PASTEMAC(d,copycjs)( conjx, *chi1,
conjx0_chi1 );
PASTEMAC(d,copycjs)( conjy, *psi1,
conjy1_psi1 );
PASTEMAC(d,copycjs)( conj0, *psi1,
conjy0_psi1 );
/* Compute scalars for vector subproblems. */
PASTEMAC(d,scal2s)( alpha0, conjx0_chi1, alpha0_chi1 );
PASTEMAC(d,scal2s)( alpha1, conjy1_psi1, alpha1_psi1 );
/* Compute scalars for vector
* subproblems. */
PASTEMAC(d,scal2s)( alpha0,
conjx0_chi1,
alpha0_chi1 );
PASTEMAC(d,scal2s)( alpha1,
conjy1_psi1,
alpha1_psi1 );
/* Compute alpha * chi1 * conj(psi1) after both chi1
* and psi1 have already been conjugated, if needed,
/* Compute alpha * chi1 * conj(psi1)
* after both chi1 and psi1 have
* already been conjugated, if needed
* by conjx and conjy.
*/
PASTEMAC(d,scal2s)( alpha0_chi1, conjy0_psi1,
alpha0_chi1_psi1 );
PASTEMAC(d,scal2s)( alpha0_chi1,
conjy0_psi1,
alpha0_chi1_psi1 );
/* c10t = c10t + alpha * chi1 * y0'; */
/* c10t = c10t + conj(alpha) * psi1 * x0'; */
/* c10t = c10t + alpha * chi1 * y0';*/
/* c10t = c10t + conj(alpha) * psi1 * x0';*/
kfp_2v
(
conj0,
@@ -301,10 +319,12 @@ void bli_dher2_unf_var1
cntx
);
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1)
+ conj(alpha) * psi1 * conj(chi1); */
PASTEMAC(d,adds)( alpha0_chi1_psi1, *gamma11 );
PASTEMAC(d,adds)( alpha0_chi1_psi1, *gamma11 );
/* gamma11 = gamma11 + alpha * chi1 *conj(psi1)
* + conj(alpha) * psi1 * conj(chi1);*/
PASTEMAC(d,adds)( alpha0_chi1_psi1,
*gamma11 );
PASTEMAC(d,adds)( alpha0_chi1_psi1,
*gamma11 );
i+=1;
}

View File

@@ -246,9 +246,13 @@ void bli_dher2_unf_var4
PASTECH(d,axpy2v_ker_ft) kfp_2v;
/* Query the context for the kernel function pointer. */
if ( cntx == NULL ) cntx = bli_gks_query_cntx();
kfp_2v = bli_cntx_get_l1f_ker_dt( dt, BLIS_AXPY2V_KER, cntx );
if((incx == 1) && (incy == 1) && (rs_ct == 1))
if ( (bli_cpuid_is_avx_supported() == TRUE)
&& (incx == 1)
&& (incy == 1)
&& (rs_ct == 1))
{
for ( i = 0; i < m; )
{
@@ -262,23 +266,28 @@ void bli_dher2_unf_var4
if((n_ahead >= 3))
{
bli_dher2_zen_int_4(gamma11, chi1, psi1, &alpha0, n_ahead + 1, cs_ct);
bli_dher2_zen_int_4(gamma11, chi1,
psi1, &alpha0,
n_ahead + 1, cs_ct);
i+= 4;
}
else
{
/* Compute scalars for vector subproblems. */
PASTEMAC(d,scal2s)( alpha0, *psi1, alpha0_psi1 );
PASTEMAC(d,scal2s)( alpha0, *chi1, alpha1_chi1 );
/* Compute scalars for vector
* subproblems. */
PASTEMAC(d,scal2s)( alpha0, *psi1,
alpha0_psi1 );
PASTEMAC(d,scal2s)( alpha0, *chi1,
alpha1_chi1 );
/* Compute alpha * chi1 * conj(psi1) after both chi1
* and psi1 have
already been conjugated, if needed, by conjx and
conjy. */
/* Compute alpha * chi1 * conj(psi1)
* after both chi1 and psi1 have
* already been conjugated, if needed,
* by conjx and conjy. */
PASTEMAC(d,scal2s)( alpha0_psi1, *chi1,
alpha0_chi1_psi1 );
alpha0_chi1_psi1 );
/* c21 = c21 + alpha * x2 * conj(psi1); */
/* c21 = c21 + alpha * x2 * conj(psi1)*/
/* c21 = c21 + conj(alpha) * y2 * conj(chi1); */
kfp_2v
@@ -295,8 +304,10 @@ void bli_dher2_unf_var4
);
PASTEMAC(d,adds)( alpha0_chi1_psi1, *gamma11 );
PASTEMAC(d,adds)( alpha0_chi1_psi1, *gamma11 );
PASTEMAC(d,adds)( alpha0_chi1_psi1,
*gamma11 );
PASTEMAC(d,adds)( alpha0_chi1_psi1,
*gamma11 );
i+=1;
}
}