diff --git a/config/zen2/bli_cntx_init_zen2.c b/config/zen2/bli_cntx_init_zen2.c index 25eae4866..452e2426f 100644 --- a/config/zen2/bli_cntx_init_zen2.c +++ b/config/zen2/bli_cntx_init_zen2.c @@ -38,7 +38,7 @@ void bli_cntx_init_zen2( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; - + blksz_t thresh[ BLIS_NUM_THRESH ]; // Set default kernel blocksizes and functions. bli_cntx_init_zen2_ref( cntx ); @@ -135,5 +135,61 @@ void bli_cntx_init_zen2( cntx_t* cntx ) BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, cntx ); +// ------------------------------------------------------------------------- + + // Initialize sup thresholds with architecture-appropriate values. + // s d c z + bli_blksz_init_easy( &thresh[ BLIS_MT ], -1, 256, -1, -1 ); + bli_blksz_init_easy( &thresh[ BLIS_NT ], -1, 100, -1, -1 ); + bli_blksz_init_easy( &thresh[ BLIS_KT ], -1, 120, -1, -1 ); + + // Initialize the context with the sup thresholds. + bli_cntx_set_l3_sup_thresh + ( + 3, + BLIS_MT, &thresh[ BLIS_MT ], + BLIS_NT, &thresh[ BLIS_NT ], + BLIS_KT, &thresh[ BLIS_KT ], + cntx + ); + + // Update the context with optimized small/unpacked gemm kernels. + bli_cntx_set_l3_sup_kers + ( + 8, + //BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_r_haswell_ref, + BLIS_RRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, + BLIS_RRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8m, TRUE, + BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, + BLIS_RCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, + BLIS_CRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, + BLIS_CRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, TRUE, + BLIS_CCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, + BLIS_CCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, + cntx + ); + + // Initialize level-3 sup blocksize objects with architecture-specific + // values. + // s d c z + bli_blksz_init ( &blkszs[ BLIS_MR ], -1, 6, -1, -1, + -1, 9, -1, -1 ); + bli_blksz_init_easy( &blkszs[ BLIS_NR ], -1, 8, -1, -1 ); + bli_blksz_init_easy( &blkszs[ BLIS_MC ], -1, 72, -1, -1 ); + bli_blksz_init_easy( &blkszs[ BLIS_KC ], -1, 256, -1, -1 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], -1, 4080, -1, -1 ); + + // Update the context with the current architecture's register and cache + // blocksizes for small/unpacked level-3 problems. + bli_cntx_set_l3_sup_blkszs + ( + 5, + BLIS_NC, &blkszs[ BLIS_NC ], + BLIS_KC, &blkszs[ BLIS_KC ], + BLIS_MC, &blkszs[ BLIS_MC ], + BLIS_NR, &blkszs[ BLIS_NR ], + BLIS_MR, &blkszs[ BLIS_MR ], + cntx + ); }