Fixed Early return checks in reorder function for f32 & int8 APIs.

Details:
- In reorder functions, validity of strides are being checked assuming
  that the matrix to be reordered is always row-major. Modified the code
  to take stor_order into consideration while checking for validity of
  strides.
- This does not directly impact the functionality of GEMM as we don't
  support GEMM on col-major matrices where A and/or B matrices are
  reordered before GEMM computation. But this change makes sense when
  reordering is viewed as an independent functionality irrespective of
  what the reordered buffers will be used for.

Change-Id: If2cc4a353bca2f998ad557d6f128881bc9963330
This commit is contained in:
Meghana Vankadari
2025-04-15 09:39:43 +00:00
parent f76f37cc11
commit 1ff96343f1
4 changed files with 96 additions and 36 deletions

View File

@@ -305,7 +305,7 @@ AOCL_GEMM_REORDER(bfloat16, bf16bf16f32of32)
if( ( order == 'r') || ( order == 'R' ) )
{
if( ( bli_is_notrans( blis_trans ) && ( ldb < n ) ) ||
( bli_is_trans( blis_trans ) && ( ldb < k ) ) )
( bli_is_trans( blis_trans ) && ( ldb < k ) ) )
{
return; // Error.
}
@@ -318,7 +318,7 @@ AOCL_GEMM_REORDER(bfloat16, bf16bf16f32of32)
else if ( ( order == 'c' ) || ( order == 'C' ) )
{
if( ( bli_is_notrans( blis_trans ) && ( ldb < k ) ) ||
( bli_is_trans( blis_trans ) && ( ldb < n ) ) )
( bli_is_trans( blis_trans ) && ( ldb < n ) ) )
{
return; // Error.
}

View File

@@ -95,23 +95,38 @@ AOCL_GEMM_REORDER(float,f32f32f32of32)
bli_param_map_netlib_to_blis_trans(trans, &blis_trans);
if ( ( input_buf_addr == NULL ) || ( reorder_buf_addr == NULL ) ||
( k <= 0 ) || ( n <= 0 ) || ( bli_is_notrans( blis_trans ) && ( ldb < n ) ) ||
( bli_is_trans( blis_trans ) && ( ldb < k ) ) )
( k <= 0 ) || ( n <= 0 ) )
{
return; // Error.
}
// Only supports row major packing now.
inc_t rs_b, cs_b;
if ((order == 'r') || (order == 'R'))
if( ( order == 'r') || ( order == 'R' ) )
{
rs_b = bli_is_notrans(blis_trans) ? ldb : 1;
cs_b = bli_is_notrans(blis_trans) ? 1 : ldb;
if( ( bli_is_notrans( blis_trans ) && ( ldb < n ) ) ||
( bli_is_trans( blis_trans ) && ( ldb < k ) ) )
{
return; // Error.
}
else
{
rs_b = bli_is_notrans( blis_trans ) ? ldb : 1;
cs_b = bli_is_notrans( blis_trans ) ? 1 : ldb;
}
}
else if ((order == 'c') || (order == 'C'))
else if ( ( order == 'c' ) || ( order == 'C' ) )
{
rs_b = bli_is_notrans(blis_trans) ? 1 : ldb;
cs_b = bli_is_notrans(blis_trans) ? ldb : 1;
if( ( bli_is_notrans( blis_trans ) && ( ldb < k ) ) ||
( bli_is_trans( blis_trans ) && ( ldb < n ) ) )
{
return; // Error.
}
else
{
rs_b = bli_is_notrans( blis_trans ) ? 1 : ldb;
cs_b = bli_is_notrans( blis_trans ) ? ldb : 1;
}
}
else
{
@@ -125,7 +140,7 @@ AOCL_GEMM_REORDER(float,f32f32f32of32)
"cannot perform f32f32f32 gemm.", __FILE__, __LINE__ );
return; // Error.
}
/* Initialize BLIS. */
bli_init_auto();
@@ -158,7 +173,7 @@ AOCL_GEMM_REORDER(float,f32f32f32of32)
n_threads = ( n_threads > 0 ) ? n_threads : 1;
#ifdef BLIS_KERNELS_ZEN4
//When n == 1, B marix becomes a vector.
//When n == 1, B marix becomes a vector.
//Reordering is avoided so that LPGEMV can process it efficiently.
if( ( n == 1 ) && ( lpgemm_get_enabled_arch() != BLIS_ARCH_ZEN3 ) )
{

View File

@@ -193,22 +193,37 @@ AOCL_GEMM_REORDER(int8_t,s8s8s32os32)
bli_param_map_netlib_to_blis_trans(trans, &blis_trans);
if ((input_buf_addr == NULL) || (reorder_buf_addr == NULL) ||
(k <= 0) || (n <= 0) || (bli_is_notrans(blis_trans) && (ldb < n)) ||
(bli_is_trans(blis_trans) && (ldb < k)) )
(k <= 0) || (n <= 0) )
{
return; // Error.
}
inc_t rs_b, cs_b;
if ((order == 'r') || (order == 'R'))
if( ( order == 'r') || ( order == 'R' ) )
{
rs_b = bli_is_notrans(blis_trans) ? ldb : 1;
cs_b = bli_is_notrans(blis_trans) ? 1 : ldb;
if( ( bli_is_notrans( blis_trans ) && ( ldb < n ) ) ||
( bli_is_trans( blis_trans ) && ( ldb < k ) ) )
{
return; // Error.
}
else
{
rs_b = bli_is_notrans( blis_trans ) ? ldb : 1;
cs_b = bli_is_notrans( blis_trans ) ? 1 : ldb;
}
}
else if ((order == 'c') || (order == 'C'))
else if ( ( order == 'c' ) || ( order == 'C' ) )
{
rs_b = bli_is_notrans(blis_trans) ? 1 : ldb;
cs_b = bli_is_notrans(blis_trans) ? ldb : 1;
if( ( bli_is_notrans( blis_trans ) && ( ldb < k ) ) ||
( bli_is_trans( blis_trans ) && ( ldb < n ) ) )
{
return; // Error.
}
else
{
rs_b = bli_is_notrans( blis_trans ) ? 1 : ldb;
cs_b = bli_is_notrans( blis_trans ) ? ldb : 1;
}
}
else
{
@@ -284,22 +299,37 @@ AOCL_GEMM_REORDER_SYM_QUANT(int8_t,s8s8s32os32_sym_quant)
bli_param_map_netlib_to_blis_trans(trans, &blis_trans);
if ((input_buf_addr == NULL) || (reorder_buf_addr == NULL) ||
(k <= 0) || (n <= 0) || (bli_is_notrans(blis_trans) && (ldb < n)) ||
(bli_is_trans(blis_trans) && (ldb < k)) )
(k <= 0) || (n <= 0) )
{
return; // Error.
}
inc_t rs_b, cs_b;
if ((order == 'r') || (order == 'R'))
if( ( order == 'r') || ( order == 'R' ) )
{
rs_b = bli_is_notrans(blis_trans) ? ldb : 1;
cs_b = bli_is_notrans(blis_trans) ? 1 : ldb;
if( ( bli_is_notrans( blis_trans ) && ( ldb < n ) ) ||
( bli_is_trans( blis_trans ) && ( ldb < k ) ) )
{
return; // Error.
}
else
{
rs_b = bli_is_notrans( blis_trans ) ? ldb : 1;
cs_b = bli_is_notrans( blis_trans ) ? 1 : ldb;
}
}
else if ((order == 'c') || (order == 'C'))
else if ( ( order == 'c' ) || ( order == 'C' ) )
{
rs_b = bli_is_notrans(blis_trans) ? 1 : ldb;
cs_b = bli_is_notrans(blis_trans) ? ldb : 1;
if( ( bli_is_notrans( blis_trans ) && ( ldb < k ) ) ||
( bli_is_trans( blis_trans ) && ( ldb < n ) ) )
{
return; // Error.
}
else
{
rs_b = bli_is_notrans( blis_trans ) ? 1 : ldb;
cs_b = bli_is_notrans( blis_trans ) ? ldb : 1;
}
}
else
{

View File

@@ -111,22 +111,37 @@ AOCL_GEMM_REORDER(int8_t,u8s8s32os32)
bli_param_map_netlib_to_blis_trans(trans, &blis_trans);
if ( ( input_buf_addr == NULL ) || ( reorder_buf_addr == NULL ) ||
( k <= 0 ) || ( n <= 0 ) || ( bli_is_notrans( blis_trans ) && ( ldb < n ) ) ||
( bli_is_trans( blis_trans ) && ( ldb < k ) ) )
( k <= 0 ) || ( n <= 0 ) )
{
return; // Error.
}
inc_t rs_b, cs_b;
if ((order == 'r') || (order == 'R'))
if( ( order == 'r') || ( order == 'R' ) )
{
rs_b = bli_is_notrans(blis_trans) ? ldb : 1;
cs_b = bli_is_notrans(blis_trans) ? 1 : ldb;
if( ( bli_is_notrans( blis_trans ) && ( ldb < n ) ) ||
( bli_is_trans( blis_trans ) && ( ldb < k ) ) )
{
return; // Error.
}
else
{
rs_b = bli_is_notrans( blis_trans ) ? ldb : 1;
cs_b = bli_is_notrans( blis_trans ) ? 1 : ldb;
}
}
else if ((order == 'c') || (order == 'C'))
else if ( ( order == 'c' ) || ( order == 'C' ) )
{
rs_b = bli_is_notrans(blis_trans) ? 1 : ldb;
cs_b = bli_is_notrans(blis_trans) ? ldb : 1;
if( ( bli_is_notrans( blis_trans ) && ( ldb < k ) ) ||
( bli_is_trans( blis_trans ) && ( ldb < n ) ) )
{
return; // Error.
}
else
{
rs_b = bli_is_notrans( blis_trans ) ? 1 : ldb;
cs_b = bli_is_notrans( blis_trans ) ? ldb : 1;
}
}
else
{