mirror of
https://github.com/amd/blis.git
synced 2026-05-13 10:35:38 +00:00
dher2 API in blis make check fails on non avx2 platform
- dher2 did not have avx check for platform. It was calling avx kernel regardless of platform support. Which resulted in core dump. - Added avx based platform check in both variant of dher2 for fixing the issue. AMD-Internal: [CPUPL-2043] Change-Id: I1fd1dcc9336980bfb7ffa9376f491f107c889c0b
This commit is contained in:
committed by
Dipal M Zambare
parent
e12f45033d
commit
d50d607995
@@ -249,9 +249,13 @@ void bli_dher2_unf_var1
|
||||
PASTECH(d,axpy2v_ker_ft) kfp_2v;
|
||||
|
||||
/* Query the context for the kernel function pointer. */
|
||||
kfp_2v = bli_cntx_get_l1f_ker_dt( dt, BLIS_AXPY2V_KER, cntx );
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx();
|
||||
kfp_2v = bli_cntx_get_l1f_ker_dt( dt, BLIS_AXPY2V_KER, cntx );
|
||||
|
||||
if( (incx == 1) && (incy == 1) && (rs_ct == 1))
|
||||
if ( (bli_cpuid_is_avx_supported() == TRUE)
|
||||
&& (incx == 1)
|
||||
&& (incy == 1)
|
||||
&& (rs_ct == 1))
|
||||
{
|
||||
for ( i = 0; i < m; )
|
||||
{
|
||||
@@ -265,29 +269,43 @@ void bli_dher2_unf_var1
|
||||
|
||||
if((n_behind >= 3))
|
||||
{
|
||||
bli_dher2_trans_zen_int_4(c10t, x0, y0, &alpha0, n_behind + 1, cs_ct);
|
||||
bli_dher2_trans_zen_int_4(c10t, x0, y0,
|
||||
&alpha0,
|
||||
n_behind + 1,
|
||||
cs_ct);
|
||||
i+=4;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Apply conjx and/or conjy to chi1 and/or psi1. */
|
||||
PASTEMAC(d,copycjs)( conjx, *chi1, conjx0_chi1 );
|
||||
PASTEMAC(d,copycjs)( conjy, *psi1, conjy1_psi1 );
|
||||
PASTEMAC(d,copycjs)( conj0, *psi1, conjy0_psi1 );
|
||||
/* Apply conjx and/or conjy to chi1
|
||||
* and/or psi1. */
|
||||
PASTEMAC(d,copycjs)( conjx, *chi1,
|
||||
conjx0_chi1 );
|
||||
PASTEMAC(d,copycjs)( conjy, *psi1,
|
||||
conjy1_psi1 );
|
||||
PASTEMAC(d,copycjs)( conj0, *psi1,
|
||||
conjy0_psi1 );
|
||||
|
||||
/* Compute scalars for vector subproblems. */
|
||||
PASTEMAC(d,scal2s)( alpha0, conjx0_chi1, alpha0_chi1 );
|
||||
PASTEMAC(d,scal2s)( alpha1, conjy1_psi1, alpha1_psi1 );
|
||||
/* Compute scalars for vector
|
||||
* subproblems. */
|
||||
PASTEMAC(d,scal2s)( alpha0,
|
||||
conjx0_chi1,
|
||||
alpha0_chi1 );
|
||||
PASTEMAC(d,scal2s)( alpha1,
|
||||
conjy1_psi1,
|
||||
alpha1_psi1 );
|
||||
|
||||
/* Compute alpha * chi1 * conj(psi1) after both chi1
|
||||
* and psi1 have already been conjugated, if needed,
|
||||
/* Compute alpha * chi1 * conj(psi1)
|
||||
* after both chi1 and psi1 have
|
||||
* already been conjugated, if needed
|
||||
* by conjx and conjy.
|
||||
*/
|
||||
PASTEMAC(d,scal2s)( alpha0_chi1, conjy0_psi1,
|
||||
alpha0_chi1_psi1 );
|
||||
PASTEMAC(d,scal2s)( alpha0_chi1,
|
||||
conjy0_psi1,
|
||||
alpha0_chi1_psi1 );
|
||||
|
||||
/* c10t = c10t + alpha * chi1 * y0'; */
|
||||
/* c10t = c10t + conj(alpha) * psi1 * x0'; */
|
||||
/* c10t = c10t + alpha * chi1 * y0';*/
|
||||
/* c10t = c10t + conj(alpha) * psi1 * x0';*/
|
||||
kfp_2v
|
||||
(
|
||||
conj0,
|
||||
@@ -301,10 +319,12 @@ void bli_dher2_unf_var1
|
||||
cntx
|
||||
);
|
||||
|
||||
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1)
|
||||
+ conj(alpha) * psi1 * conj(chi1); */
|
||||
PASTEMAC(d,adds)( alpha0_chi1_psi1, *gamma11 );
|
||||
PASTEMAC(d,adds)( alpha0_chi1_psi1, *gamma11 );
|
||||
/* gamma11 = gamma11 + alpha * chi1 *conj(psi1)
|
||||
* + conj(alpha) * psi1 * conj(chi1);*/
|
||||
PASTEMAC(d,adds)( alpha0_chi1_psi1,
|
||||
*gamma11 );
|
||||
PASTEMAC(d,adds)( alpha0_chi1_psi1,
|
||||
*gamma11 );
|
||||
|
||||
i+=1;
|
||||
}
|
||||
|
||||
@@ -246,9 +246,13 @@ void bli_dher2_unf_var4
|
||||
PASTECH(d,axpy2v_ker_ft) kfp_2v;
|
||||
|
||||
/* Query the context for the kernel function pointer. */
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx();
|
||||
kfp_2v = bli_cntx_get_l1f_ker_dt( dt, BLIS_AXPY2V_KER, cntx );
|
||||
|
||||
if((incx == 1) && (incy == 1) && (rs_ct == 1))
|
||||
if ( (bli_cpuid_is_avx_supported() == TRUE)
|
||||
&& (incx == 1)
|
||||
&& (incy == 1)
|
||||
&& (rs_ct == 1))
|
||||
{
|
||||
for ( i = 0; i < m; )
|
||||
{
|
||||
@@ -262,23 +266,28 @@ void bli_dher2_unf_var4
|
||||
|
||||
if((n_ahead >= 3))
|
||||
{
|
||||
bli_dher2_zen_int_4(gamma11, chi1, psi1, &alpha0, n_ahead + 1, cs_ct);
|
||||
bli_dher2_zen_int_4(gamma11, chi1,
|
||||
psi1, &alpha0,
|
||||
n_ahead + 1, cs_ct);
|
||||
i+= 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Compute scalars for vector subproblems. */
|
||||
PASTEMAC(d,scal2s)( alpha0, *psi1, alpha0_psi1 );
|
||||
PASTEMAC(d,scal2s)( alpha0, *chi1, alpha1_chi1 );
|
||||
/* Compute scalars for vector
|
||||
* subproblems. */
|
||||
PASTEMAC(d,scal2s)( alpha0, *psi1,
|
||||
alpha0_psi1 );
|
||||
PASTEMAC(d,scal2s)( alpha0, *chi1,
|
||||
alpha1_chi1 );
|
||||
|
||||
/* Compute alpha * chi1 * conj(psi1) after both chi1
|
||||
* and psi1 have
|
||||
already been conjugated, if needed, by conjx and
|
||||
conjy. */
|
||||
/* Compute alpha * chi1 * conj(psi1)
|
||||
* after both chi1 and psi1 have
|
||||
* already been conjugated, if needed,
|
||||
* by conjx and conjy. */
|
||||
PASTEMAC(d,scal2s)( alpha0_psi1, *chi1,
|
||||
alpha0_chi1_psi1 );
|
||||
alpha0_chi1_psi1 );
|
||||
|
||||
/* c21 = c21 + alpha * x2 * conj(psi1); */
|
||||
/* c21 = c21 + alpha * x2 * conj(psi1)*/
|
||||
/* c21 = c21 + conj(alpha) * y2 * conj(chi1); */
|
||||
|
||||
kfp_2v
|
||||
@@ -295,8 +304,10 @@ void bli_dher2_unf_var4
|
||||
);
|
||||
|
||||
|
||||
PASTEMAC(d,adds)( alpha0_chi1_psi1, *gamma11 );
|
||||
PASTEMAC(d,adds)( alpha0_chi1_psi1, *gamma11 );
|
||||
PASTEMAC(d,adds)( alpha0_chi1_psi1,
|
||||
*gamma11 );
|
||||
PASTEMAC(d,adds)( alpha0_chi1_psi1,
|
||||
*gamma11 );
|
||||
i+=1;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user