mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Added AVX512 DTRSM small RLNN/RUTN variant kernels
- 8x8 kernels are used for DTRSM SMALL - Implemented fringe cases with below block sizes 8x8, 8x4, 8x3, 8x2, 8x1 4x8, 4x4, 4x3, 4x2, 4x1 3x8, 3x4, 3x3, 3x2, 3x1 2x8, 2x4, 2x3, 2x2, 2x1 1x8, 1x4, 1x3, 1x2, 1x1 AMD-Internal: [CPUPL-2745] Change-Id: Ifb8cfba6958e1c89ddbfa18893127ab6d44cc367
This commit is contained in:
@@ -956,9 +956,6 @@ void dtrsm_blis_impl
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t id = bli_arch_query_id();
|
||||
#if defined(BLIS_KERNELS_ZEN4)
|
||||
bool uplo, transa;
|
||||
#endif
|
||||
switch(id)
|
||||
{
|
||||
case BLIS_ARCH_ZEN4:
|
||||
@@ -969,11 +966,8 @@ void dtrsm_blis_impl
|
||||
// for n < 200 avx2 kernels are performing better, but if
|
||||
// n is a multiple of 8 then there will be no fringe case for avx512,
|
||||
// in such cases avx512 kernels will perform better.
|
||||
uplo = bli_obj_is_upper(&ao);
|
||||
transa = bli_obj_has_trans(&ao);
|
||||
if(( ((blis_side == BLIS_RIGHT) && (uplo == true) && (transa == false)) ||
|
||||
((blis_side == BLIS_RIGHT) && (uplo == false) && (transa == true))) &&
|
||||
((n0 > 400) && (m0 > 50)))
|
||||
if( (blis_side == BLIS_RIGHT) &&
|
||||
((n0 > 300) && (m0 > 50)))
|
||||
{
|
||||
status = bli_trsm_small_AVX512(
|
||||
blis_side,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user