Added Support for General Stride in DGEMV

- Updated the bli_dgemv_zen_ref( ... ) kernel to support general stride.
- Since the latest dgemv kernels don't support general stride, added
  checks to invoke bli_dgemv_zen_ref( ... ) when A matrix has a general
  stride.
- Thanks to Vignesh Balasubramanian <vignesh.balasubramanian@amd.com>
  for finding this issue.

AMD-Internal: [CPUPL-6492]
Change-Id: Ia987ce7674cb26cb32eea4a6e9bd6623f2027328
This commit is contained in:
Arnav Sharma
2025-02-25 11:21:27 +05:30
committed by Arnav Sharma
parent e6ca01c1ba
commit b4c1026ec2
4 changed files with 52 additions and 6 deletions

View File

@@ -208,6 +208,8 @@ void bli_dgemv_unf_var1
cntx_t* cntx
)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_3)
dim_t i;
dim_t f;
dim_t m0 = m, n0 = n;
@@ -233,6 +235,26 @@ void bli_dgemv_unf_var1
// n0 = n;
//
// Invoking the reference kernel to handle general stride.
if ( ( rs_a != 1 ) && ( cs_a != 1 ) )
{
bli_dgemv_zen_ref
(
transa,
m,
n,
alpha,
a, rs_a, cs_a,
x, incx,
beta,
y, incy,
NULL
);
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3)
return;
}
bli_set_dims_incs_with_trans(transa,
m, n, rs_a, cs_a,
&n0, &m0, &lda, &inca);

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -303,6 +303,8 @@ void bli_dgemv_unf_var2
y, incy,
cntx
);
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3)
return;
#endif
case BLIS_ARCH_ZEN:
@@ -320,6 +322,8 @@ void bli_dgemv_unf_var2
y, incy,
cntx
);
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3)
return;
default:

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -184,7 +184,7 @@ void bli_dgemv_zen_ref
double rho = (*alpha) * (*x0);
for ( dim_t j = 0; j < m0; ++j )
{
*(y0 + j) += rho * (*(a0 + j));
*(y0 + j) += rho * (*(a0 + j*inca));
}
x0 += incx;
a0 += lda;
@@ -197,7 +197,7 @@ void bli_dgemv_zen_ref
double rho = (*alpha) * (*x0);
for ( dim_t j = 0; j < m0; ++j )
{
*(y0 + j*incy) += rho * (*(a0 + j));
*(y0 + j*incy) += rho * (*(a0 + j*inca));
}
x0 += incx;
a0 += lda;
@@ -213,7 +213,7 @@ void bli_dgemv_zen_ref
double rho = 0.0;
for ( dim_t j = 0; j < m0; ++j )
{
rho += (*(a0 + j)) * (*(x0 + j));
rho += (*(a0 + j*inca)) * (*(x0 + j));
}
(*y0) += (*alpha) * rho;
y0 += incy;
@@ -227,7 +227,7 @@ void bli_dgemv_zen_ref
double rho = 0.0;
for ( dim_t j = 0; j < m0; ++j )
{
rho += (*(a0 + j)) * (*(x0 + j*incx));
rho += (*(a0 + j*inca)) * (*(x0 + j*incx));
}
(*y0) += (*alpha) * rho;
y0 += incy;

View File

@@ -104,6 +104,26 @@ void bli_dgemv_n_avx512
{
AOCL_DTL_TRACE_ENTRY( AOCL_DTL_LEVEL_TRACE_4 );
// Invoking the reference kernel to handle general stride.
if ( ( rs_a != 1 ) && ( cs_a != 1 ) )
{
bli_dgemv_zen_ref
(
transa,
m,
n,
alpha,
a, rs_a, cs_a,
x, incx,
beta,
y, incy,
NULL
);
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
return;
}
dim_t m0, n0;
inc_t rs_at, cs_at;
conj_t conja;