mirror of
https://github.com/amd/blis.git
synced 2026-05-12 01:59:59 +00:00
Made framework changes to initialize specific cache block sizes for TRSM.
Details: -This commit addresses the performance optimization(single-thread and multi-thread) for DTRSM on zen2. -This new optimization employs different MC, KC & NC values for TRSM than what is being used in other Level-3 routines like DGEMM. -Changed TRSM framework code to choose these blocksizes for TRSM on zen family configurations. -Added a new field called "trsm_blkszs" to cntx structure in order to store TRSM specific block sizes. -Implemented routines to initialize, set and query the TRSM-specific block sizes. -Defined a new macro "AOCL_BLIS_ZEN" in configure script. This macro is automatically defined for zen family architectures. It enables us to choose different cache block sizes for TRSM instead of common level-3 block sizes. Change-Id: Id8557b1c962a316b1edecca9cd582675eaf35fe6 Signed-off-by: Meghana Vankadari <meghana.vankadari@amd.com> AMD-Internal: [CPUPL-656]
This commit is contained in:
committed by
dzambare
parent
6b5c68b9ed
commit
a03a0f8f70
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -168,6 +168,20 @@ void bli_cntx_init_zen( cntx_t* cntx )
|
||||
cntx
|
||||
);
|
||||
|
||||
// Update the context with the current architecture's register and cache
|
||||
// blocksizes for level-3 TRSM execution.
|
||||
bli_cntx_set_trsm_blkszs
|
||||
(
|
||||
5,
|
||||
// level-3
|
||||
BLIS_NC, &blkszs[ BLIS_NC ],
|
||||
BLIS_KC, &blkszs[ BLIS_KC ],
|
||||
BLIS_MC, &blkszs[ BLIS_MC ],
|
||||
BLIS_NR, &blkszs[ BLIS_NR ],
|
||||
BLIS_MR, &blkszs[ BLIS_MR ],
|
||||
cntx
|
||||
);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// Initialize sup thresholds with architecture-appropriate values.
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2016, Hewlett Packard Enterprise Development LP
|
||||
Copyright (C) 2019, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -735,6 +735,8 @@ BLIS_EXPORT_BLIS void bli_cntx_clear( cntx_t* cntx );
|
||||
|
||||
BLIS_EXPORT_BLIS void bli_cntx_set_blkszs( ind_t method, dim_t n_bs, ... );
|
||||
|
||||
BLIS_EXPORT_BLIS void bli_cntx_set_trsm_blkszs( dim_t n_bs, ... );
|
||||
|
||||
BLIS_EXPORT_BLIS void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... );
|
||||
|
||||
BLIS_EXPORT_BLIS void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... );
|
||||
|
||||
Reference in New Issue
Block a user