mirror of
https://github.com/amd/blis.git
synced 2026-05-12 10:05:38 +00:00
Fixed dynamic dispatch crash issue on non-zen architecture.
This commit fixed issue for gemm and copy API’s. The BLIS binary with dynamic dispatch feature was crashing on non-zen CPUs (specifically CPUs without AVX2 support). The crash was caused by un-supported instructions in zen optimized kernels. The issue is fixed by calling only reference kernels if the architecture detected at runtime is not zen, zen2 or zen3. AMD-Internal: [CPUPL-1930] Change-Id: Ief57cd457b87542aa1a7bad64dc36c01f0d1a366
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -153,16 +153,37 @@ void scopy_
|
||||
incy0 = (inc_t)(*incy);
|
||||
}
|
||||
|
||||
// When dynamic dispatch is enabled i.e. library is built for ‘amdzen’ configuration.
|
||||
// This function is invoked on all architectures including ‘generic’.
|
||||
// Invoke architecture specific kernels only if we are sure that we are running on zen,
|
||||
// zen2 or zen3 otherwise fall back to reference kernels (via framework and context).
|
||||
arch_t id = bli_arch_query_id();
|
||||
bool bamdzen = (id == BLIS_ARCH_ZEN3) || (id == BLIS_ARCH_ZEN2) || (id == BLIS_ARCH_ZEN);
|
||||
|
||||
/* Call BLIS kernel */
|
||||
bli_scopyv_zen_int
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
if (bamdzen)
|
||||
{
|
||||
/* Call BLIS kernel */
|
||||
bli_scopyv_zen_int
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
PASTEMAC2(s, copyv, BLIS_TAPI_EX_SUF)
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
}
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1)
|
||||
/* Finalize BLIS. */
|
||||
@@ -232,16 +253,38 @@ void dcopy_
|
||||
incy0 = (inc_t)(*incy);
|
||||
}
|
||||
|
||||
// When dynamic dispatch is enabled i.e. library is built for ‘amdzen’ configuration.
|
||||
// This function is invoked on all architectures including ‘generic’.
|
||||
// Invoke architecture specific kernels only if we are sure that we are running on zen,
|
||||
// zen2 or zen3 otherwise fall back to reference kernels (via framework and context).
|
||||
arch_t id = bli_arch_query_id();
|
||||
bool bamdzen = (id == BLIS_ARCH_ZEN3) || (id == BLIS_ARCH_ZEN2) || (id == BLIS_ARCH_ZEN);
|
||||
|
||||
if (bamdzen)
|
||||
{
|
||||
/* Call BLIS kernel */
|
||||
bli_dcopyv_zen_int
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
PASTEMAC2(d, copyv, BLIS_TAPI_EX_SUF)
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
}
|
||||
|
||||
/* Call BLIS kernel */
|
||||
bli_dcopyv_zen_int
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1)
|
||||
/* Finalize BLIS. */
|
||||
|
||||
@@ -362,7 +362,63 @@ void dgemm_
|
||||
const inc_t rs_c = 1;
|
||||
const inc_t cs_c = *ldc;
|
||||
|
||||
if((k0 == 1) && bli_is_notrans(blis_transa) && bli_is_notrans(blis_transb))
|
||||
// When dynamic dispatch is enabled i.e. library is built for ‘amdzen’ configuration.
|
||||
// This function is invoked on all architectures including ‘generic’.
|
||||
// Invoke architecture specific kernels only if we are sure that we are running on zen,
|
||||
// zen2 or zen3 otherwise fall back to reference kernels (via framework and context).
|
||||
arch_t id = bli_arch_query_id();
|
||||
bool bamdzen = (id == BLIS_ARCH_ZEN3) || (id == BLIS_ARCH_ZEN2) || (id == BLIS_ARCH_ZEN);
|
||||
|
||||
if (!bamdzen)
|
||||
{
|
||||
// This code is duplicated below, however we don't want to move it out of
|
||||
// this IF block as it will affect the performance on Zen architetures
|
||||
// Also this is temporary fix which will be replaced later.
|
||||
const num_t dt = BLIS_DOUBLE;
|
||||
|
||||
obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1;
|
||||
obj_t ao = BLIS_OBJECT_INITIALIZER;
|
||||
obj_t bo = BLIS_OBJECT_INITIALIZER;
|
||||
obj_t betao = BLIS_OBJECT_INITIALIZER_1X1;
|
||||
obj_t co = BLIS_OBJECT_INITIALIZER;
|
||||
|
||||
dim_t m0_a, n0_a;
|
||||
dim_t m0_b, n0_b;
|
||||
|
||||
bli_set_dims_with_trans(blis_transa, m0, k0, &m0_a, &n0_a);
|
||||
bli_set_dims_with_trans(blis_transb, k0, n0, &m0_b, &n0_b);
|
||||
|
||||
bli_obj_init_finish_1x1(dt, (double *)alpha, &alphao);
|
||||
bli_obj_init_finish_1x1(dt, (double *)beta, &betao);
|
||||
|
||||
bli_obj_init_finish(dt, m0_a, n0_a, (double *)a, rs_a, cs_a, &ao);
|
||||
bli_obj_init_finish(dt, m0_b, n0_b, (double *)b, rs_b, cs_b, &bo);
|
||||
bli_obj_init_finish(dt, m0, n0, (double *)c, rs_c, cs_c, &co);
|
||||
|
||||
bli_obj_set_conjtrans(blis_transa, &ao);
|
||||
bli_obj_set_conjtrans(blis_transb, &bo);
|
||||
|
||||
// Will call parallelized dgemm code - sup & native
|
||||
PASTEMAC(gemm, BLIS_OAPI_EX_SUF)
|
||||
(
|
||||
&alphao,
|
||||
&ao,
|
||||
&bo,
|
||||
&betao,
|
||||
&co,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS. */
|
||||
bli_finalize_auto();
|
||||
return;
|
||||
}
|
||||
|
||||
if((k0 == 1) && bli_is_notrans(blis_transa) && bli_is_notrans(blis_transb))
|
||||
{
|
||||
bli_dgemm_ref_k1_nn( m0, n0, k0,
|
||||
(double*)alpha,
|
||||
|
||||
Reference in New Issue
Block a user