Fixed configuration issues in AOCL_GEMM addon (#4)

* Fixed configuration issues in AOCL_GEMM addon

Description:

Fixed aocl_gemm addon initialization of kernels and block sizes
for machines which supports only AVX512 but not
AVX512_VNNI/VNNI_BF16.

Aligned NC, KC blocking variables between ZEN and ZEN4

AMD-Internal: [SWLCSG-3527]
This commit is contained in:
Bhaskar, Nallani
2025-05-13 17:19:19 +05:30
committed by GitHub
parent 121d81df16
commit 42a0d74ced
2 changed files with 36 additions and 5 deletions

View File

@@ -49,15 +49,15 @@
#define LPGEMM_BLKSZ_MAP_ZEN \
XMACRO(U8S8S32OS32, 144, 1024, 2048, 6, 64, 4, 24, 4*64, 64) \
XMACRO(F32F32F32OF32, 144, 8160, 512, 6, 16, 1, 6, 16, 1) \
XMACRO(BF16BF16F32OF32, 144, 1024, 2048, 6, 64, 0, 0, 2*64, 64/2) \
XMACRO(F32F32F32OF32, 144, 8064, 512, 6, 16, 1, 6, 16, 1) \
XMACRO(BF16BF16F32OF32, 144, 1024, 4096, 6, 64, 0, 0, 2*64, 64/2) \
XMACRO(BF16S4F32OF32, 144, 1024, 4096, 6, 64, 0, 0, 2*64, 64/2) \
XMACRO(S8S8S32OS32, 144, 1024, 2048, 6, 64, 4, 24, 4*64, 64) \
XMACRO(U8S4S32OS32, 144, 1024, 2048, 6, 64, 4, 24, 4*64, 64) \
XMACRO(BF16S4F32OF32, 144, 1024, 2048, 6, 64, 0, 0, 2*64, 64/2) \
XMACRO(F32OBF16, 144, 1024, 2048, 6, 64, 0, 0, 2*64, 64/2) \
XMACRO(F32OBF16, 144, 1024, 4096, 6, 64, 0, 0, 2*64, 64/2) \
#define LPGEMM_BLKSZ_UPD_MAP_ZEN4_TO_ZEN \
XMACRO(F32F32F32OF32, 144, 8160, 512, 6, 64, 1, 6, 64, 1) \
XMACRO(F32F32F32OF32, 144, 8064, 512, 6, 64, 1, 6, 64, 1) \
// The STMACRO follows the format MT, NT, KT which are SUP switch thresholds.
// ID = One of the AOCL_OPERATION_TYPE enum.

View File

@@ -138,6 +138,12 @@ static void _lpgemm_util_cntx_init_func_map()
{
#ifdef BLIS_KERNELS_ZEN4
LPGEMM_UTIL_KERN_FUNC_MAP_AVX512_VNNI
#endif
}
else if ( bli_cpuid_is_avx512_supported() == TRUE )
{
#ifdef BLIS_KERNELS_ZEN4
LPGEMM_UTIL_KERN_FUNC_MAP_AVX512
#endif
}
else if ( bli_cpuid_is_avx2fma3_supported() == TRUE )
@@ -264,6 +270,22 @@ static void _lpgemm_cntx_init_func_map()
LPGEMM_PACKB_FUNC_UPD_MAP_AVX512_VNNI_TO_AVX2;
}
#endif
}
else if ( bli_cpuid_is_avx512_supported() == TRUE )
{
#ifdef BLIS_KERNELS_ZEN4
LPGEMM_KERN_FUNC_MAP_AVX512
LPGEMM_PACKA_FUNC_MAP_AVX512
LPGEMM_PACKB_FUNC_MAP_AVX512
LPGEMM_PACKBMXP_FUNC_MAP_AVX512
if ( global_lpgemm_enable_arch == BLIS_ARCH_ZEN3 )
{
LPGEMM_KERN_FUNC_UPD_MAP_AVX512_VNNI_TO_AVX2
LPGEMM_PACKA_FUNC_UPD_MAP_AVX512_VNNI_TO_AVX2;
LPGEMM_PACKB_FUNC_UPD_MAP_AVX512_VNNI_TO_AVX2;
}
#endif
}
else if ( bli_cpuid_is_avx2fma3_supported() == TRUE )
{
@@ -352,6 +374,15 @@ static void _lpgemm_cntx_init_blksz_map()
LPGEMM_BLKSZ_UPD_MAP_ZEN4_TO_ZEN
}
}
else if ( bli_cpuid_is_avx512_supported() == TRUE )
{
LPGEMM_BLKSZ_MAP_ZEN4
if ( global_lpgemm_enable_arch == BLIS_ARCH_ZEN3 )
{
LPGEMM_BLKSZ_UPD_MAP_ZEN4_TO_ZEN
}
}
else if ( bli_cpuid_is_avx2fma3_supported() == TRUE )
{
LPGEMM_BLKSZ_MAP_ZEN