mirror of
https://github.com/amd/blis.git
synced 2026-05-11 01:30:00 +00:00
Adding support for AOCL_ENABLE_INSTRUCTIONS for f32 LPGEMM API.
-Currently lpgemm sets the context (block sizes and micro-kernels) based on the ISA of the machine it is being executed on. However this approach does not give the flexibility to select a different context at runtime. In order to enable runtime selection of context, the context initialization is modified to read the AOCL_ENABLE_INSTRUCTIONS env variable and set the context based on the same. As part of this commit, only f32 context selection is enabled. -Bug fixes in scale ops in f32 micro-kernels and GEMV path selection. -Added vectorized f32 packing kernels for NR=16(AVX2) and NR=64(AVX512). This is only for B matrix and helps remove dependency of f32 lpgemm api on the BLIS packing framework. AMD Internal: [CPUPL-5959] Change-Id: I4b459aaf33c54423952f89905ba43cf119ce20f6
This commit is contained in:
@@ -31,8 +31,8 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
#ifndef BLIS_GEMM_F32_PACKA
|
||||
#define BLIS_GEMM_F32_PACKA
|
||||
#ifndef BLIS_GEMM_F32_PACKAB
|
||||
#define BLIS_GEMM_F32_PACKAB
|
||||
|
||||
void packa_mr16_f32f32f32of32_col_major
|
||||
(
|
||||
@@ -45,6 +45,43 @@ void packa_mr16_f32f32f32of32_col_major
|
||||
dim_t* rs_p,
|
||||
dim_t* cs_p
|
||||
);
|
||||
#endif
|
||||
|
||||
typedef void (*lpgemm_pack_f32)
|
||||
(
|
||||
float*,
|
||||
const float*,
|
||||
const dim_t,
|
||||
const dim_t,
|
||||
const dim_t,
|
||||
const dim_t,
|
||||
dim_t*,
|
||||
dim_t*
|
||||
);
|
||||
|
||||
void packb_nr64_f32f32f32of32
|
||||
(
|
||||
float* pack_b_buffer,
|
||||
const float* b,
|
||||
const dim_t rs_b,
|
||||
const dim_t cs_b,
|
||||
const dim_t NC,
|
||||
const dim_t KC,
|
||||
dim_t* rs_p,
|
||||
dim_t* cs_p
|
||||
);
|
||||
|
||||
void packb_nr16_f32f32f32of32
|
||||
(
|
||||
float* pack_b_buffer,
|
||||
const float* b,
|
||||
const dim_t rs_b,
|
||||
const dim_t cs_b,
|
||||
const dim_t NC,
|
||||
const dim_t KC,
|
||||
dim_t* rs_p,
|
||||
dim_t* cs_p
|
||||
);
|
||||
|
||||
#endif //BLIS_GEMM_F32_PACKAB
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user