diff --git a/config/zen/bli_cntx_init_zen.c b/config/zen/bli_cntx_init_zen.c index 81d86d106..368f0e8f7 100644 --- a/config/zen/bli_cntx_init_zen.c +++ b/config/zen/bli_cntx_init_zen.c @@ -197,7 +197,7 @@ void bli_cntx_init_zen( cntx_t* cntx ) // Update the context with optimized small/unpacked gemm kernels. bli_cntx_set_l3_sup_kers ( - 8, + 14, //BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_r_haswell_ref, BLIS_RRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, BLIS_RRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8m, TRUE, @@ -207,18 +207,24 @@ void bli_cntx_init_zen( cntx_t* cntx ) BLIS_CRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, TRUE, BLIS_CCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, BLIS_CCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, + BLIS_RRR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, TRUE, + BLIS_RCR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, TRUE, + BLIS_RCC, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, TRUE, + BLIS_CRR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, TRUE, + BLIS_CCR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, TRUE, + BLIS_CCC, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, TRUE, cntx ); // Initialize level-3 sup blocksize objects with architecture-specific // values. // s d c z - bli_blksz_init ( &blkszs[ BLIS_MR ], -1, 6, -1, -1, - -1, 9, -1, -1 ); - bli_blksz_init_easy( &blkszs[ BLIS_NR ], -1, 8, -1, -1 ); - bli_blksz_init_easy( &blkszs[ BLIS_MC ], -1, 72, -1, -1 ); - bli_blksz_init_easy( &blkszs[ BLIS_KC ], -1, 256, -1, -1 ); - bli_blksz_init_easy( &blkszs[ BLIS_NC ], -1, 4080, -1, -1 ); + bli_blksz_init ( &blkszs[ BLIS_MR ], 6, 6, -1, -1, + 9, 9, -1, -1 ); + bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, -1, -1 ); + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, -1, -1 ); + bli_blksz_init_easy( &blkszs[ BLIS_KC ], 512, 256, -1, -1 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes for small/unpacked level-3 problems.