From a03a0f8f706e8929e47eee981031562dea9a1ca8 Mon Sep 17 00:00:00 2001 From: Meghana Vankadari Date: Wed, 12 Feb 2020 12:32:36 +0530 Subject: [PATCH] Made framework changes to initialize specific cache block sizes for TRSM. Details: -This commit addresses the performance optimization(single-thread and multi-thread) for DTRSM on zen2. -This new optimization employs different MC, KC & NC values for TRSM than what is being used in other Level-3 routines like DGEMM. -Changed TRSM framework code to choose these blocksizes for TRSM on zen family configurations. -Added a new field called "trsm_blkszs" to cntx structure in order to store TRSM specific block sizes. -Implemented routines to initialize, set and query the TRSM-specific block sizes. -Defined a new macro "AOCL_BLIS_ZEN" in configure script. This macro is automatically defined for zen family architectures. It enables us to choose different cache block sizes for TRSM instead of common level-3 block sizes. Change-Id: Id8557b1c962a316b1edecca9cd582675eaf35fe6 Signed-off-by: Meghana Vankadari AMD-Internal: [CPUPL-656] --- config/zen/bli_cntx_init_zen.c | 16 +++++++++++++++- frame/base/bli_cntx.h | 4 +++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/config/zen/bli_cntx_init_zen.c b/config/zen/bli_cntx_init_zen.c index 368f0e8f7..efdc9a83d 100644 --- a/config/zen/bli_cntx_init_zen.c +++ b/config/zen/bli_cntx_init_zen.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + Copyright (C) 2020, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -168,6 +168,20 @@ void bli_cntx_init_zen( cntx_t* cntx ) cntx ); + // Update the context with the current architecture's register and cache + // blocksizes for level-3 TRSM execution. + bli_cntx_set_trsm_blkszs + ( + 5, + // level-3 + BLIS_NC, &blkszs[ BLIS_NC ], + BLIS_KC, &blkszs[ BLIS_KC ], + BLIS_MC, &blkszs[ BLIS_MC ], + BLIS_NR, &blkszs[ BLIS_NR ], + BLIS_MR, &blkszs[ BLIS_MR ], + cntx + ); + // ------------------------------------------------------------------------- // Initialize sup thresholds with architecture-appropriate values. diff --git a/frame/base/bli_cntx.h b/frame/base/bli_cntx.h index 998658d3b..41921cf24 100644 --- a/frame/base/bli_cntx.h +++ b/frame/base/bli_cntx.h @@ -6,7 +6,7 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP - Copyright (C) 2019, Advanced Micro Devices, Inc. + Copyright (C) 2020, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -735,6 +735,8 @@ BLIS_EXPORT_BLIS void bli_cntx_clear( cntx_t* cntx ); BLIS_EXPORT_BLIS void bli_cntx_set_blkszs( ind_t method, dim_t n_bs, ... ); +BLIS_EXPORT_BLIS void bli_cntx_set_trsm_blkszs( dim_t n_bs, ... ); + BLIS_EXPORT_BLIS void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... ); BLIS_EXPORT_BLIS void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... );