DGEMV BugFixes and code cleanup (#134)

- Modified gemv (matrix-vector multiply) reference for better handling of transpose flags.
- Modified Zen4 kernel implementations for better handling of transpose flags and vector stride (incy).
- The changes refine kernel selection logic and move variable definition in macro guards.
This commit is contained in:
Sharma, Shubham
2025-08-14 12:54:06 +05:30
committed by GitHub
parent 9a7bacb30c
commit 3a14417ce1
2 changed files with 11 additions and 4 deletions

View File

@@ -175,7 +175,7 @@ void bli_dgemv_zen_ref
// If alpha == 0, return.
if ( bli_deq0( *alpha ) ) return;
if ( bli_is_notrans( transa ) ) // BLIS_NO_TRANSPOSE
if ( bli_does_notrans( transa ) ) // transa = N or C
{
if ( incy == 1 )
{
@@ -292,7 +292,7 @@ void bli_sgemv_zen_ref
// If alpha == 0, return.
if ( bli_seq0( *alpha ) ) return;
if ( bli_is_notrans( transa ) ) // BLIS_NO_TRANSPOSE
if ( bli_does_notrans( transa ) ) // transa = N or C
{
if ( incy == 1 )
{

View File

@@ -1465,9 +1465,10 @@ void bli_dgemv_n_zen4_int (
double*,
inc_t, cntx_t* ) = NULL;
dim_t size = m * n;
// If AOCL_DYNAMIC is enabled, call ST kernels for small sizes.
#if (defined(AOCL_DYNAMIC) || (defined(BLIS_ENABLE_OPENMP)))
dim_t size = m * n;
#endif
#ifdef AOCL_DYNAMIC
if ( size < 95000 )
{
@@ -1514,6 +1515,12 @@ void bli_dgemv_n_zen4_int (
#endif
}
// Use 32x8 kernel when transa = "C" or "H"
// and if incy != 1, which uses packing to handle non unit stride y
if ( incy != 1 || transa != BLIS_NO_TRANSPOSE)
{
ker_ft = bli_dgemv_n_zen4_32x8_int_st;
}
ker_ft
(
transa,