mirror of
https://github.com/amd/blis.git
synced 2026-04-20 07:38:53 +00:00
Added Support for General Stride in DGEMV
- Updated the bli_dgemv_zen_ref( ... ) kernel to support general stride. - Since the latest dgemv kernels don't support general stride, added checks to invoke bli_dgemv_zen_ref( ... ) when A matrix has a general stride. - Thanks to Vignesh Balasubramanian <vignesh.balasubramanian@amd.com> for finding this issue. AMD-Internal: [CPUPL-6492] Change-Id: Ia987ce7674cb26cb32eea4a6e9bd6623f2027328
This commit is contained in:
committed by
Arnav Sharma
parent
e6ca01c1ba
commit
b4c1026ec2
@@ -208,6 +208,8 @@ void bli_dgemv_unf_var1
|
||||
cntx_t* cntx
|
||||
)
|
||||
{
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_3)
|
||||
|
||||
dim_t i;
|
||||
dim_t f;
|
||||
dim_t m0 = m, n0 = n;
|
||||
@@ -233,6 +235,26 @@ void bli_dgemv_unf_var1
|
||||
// n0 = n;
|
||||
//
|
||||
|
||||
// Invoking the reference kernel to handle general stride.
|
||||
if ( ( rs_a != 1 ) && ( cs_a != 1 ) )
|
||||
{
|
||||
bli_dgemv_zen_ref
|
||||
(
|
||||
transa,
|
||||
m,
|
||||
n,
|
||||
alpha,
|
||||
a, rs_a, cs_a,
|
||||
x, incx,
|
||||
beta,
|
||||
y, incy,
|
||||
NULL
|
||||
);
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3)
|
||||
return;
|
||||
}
|
||||
|
||||
bli_set_dims_incs_with_trans(transa,
|
||||
m, n, rs_a, cs_a,
|
||||
&n0, &m0, &lda, &inca);
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -303,6 +303,8 @@ void bli_dgemv_unf_var2
|
||||
y, incy,
|
||||
cntx
|
||||
);
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3)
|
||||
return;
|
||||
#endif
|
||||
case BLIS_ARCH_ZEN:
|
||||
@@ -320,6 +322,8 @@ void bli_dgemv_unf_var2
|
||||
y, incy,
|
||||
cntx
|
||||
);
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3)
|
||||
return;
|
||||
|
||||
default:
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2020 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -184,7 +184,7 @@ void bli_dgemv_zen_ref
|
||||
double rho = (*alpha) * (*x0);
|
||||
for ( dim_t j = 0; j < m0; ++j )
|
||||
{
|
||||
*(y0 + j) += rho * (*(a0 + j));
|
||||
*(y0 + j) += rho * (*(a0 + j*inca));
|
||||
}
|
||||
x0 += incx;
|
||||
a0 += lda;
|
||||
@@ -197,7 +197,7 @@ void bli_dgemv_zen_ref
|
||||
double rho = (*alpha) * (*x0);
|
||||
for ( dim_t j = 0; j < m0; ++j )
|
||||
{
|
||||
*(y0 + j*incy) += rho * (*(a0 + j));
|
||||
*(y0 + j*incy) += rho * (*(a0 + j*inca));
|
||||
}
|
||||
x0 += incx;
|
||||
a0 += lda;
|
||||
@@ -213,7 +213,7 @@ void bli_dgemv_zen_ref
|
||||
double rho = 0.0;
|
||||
for ( dim_t j = 0; j < m0; ++j )
|
||||
{
|
||||
rho += (*(a0 + j)) * (*(x0 + j));
|
||||
rho += (*(a0 + j*inca)) * (*(x0 + j));
|
||||
}
|
||||
(*y0) += (*alpha) * rho;
|
||||
y0 += incy;
|
||||
@@ -227,7 +227,7 @@ void bli_dgemv_zen_ref
|
||||
double rho = 0.0;
|
||||
for ( dim_t j = 0; j < m0; ++j )
|
||||
{
|
||||
rho += (*(a0 + j)) * (*(x0 + j*incx));
|
||||
rho += (*(a0 + j*inca)) * (*(x0 + j*incx));
|
||||
}
|
||||
(*y0) += (*alpha) * rho;
|
||||
y0 += incy;
|
||||
|
||||
@@ -104,6 +104,26 @@ void bli_dgemv_n_avx512
|
||||
{
|
||||
AOCL_DTL_TRACE_ENTRY( AOCL_DTL_LEVEL_TRACE_4 );
|
||||
|
||||
// Invoking the reference kernel to handle general stride.
|
||||
if ( ( rs_a != 1 ) && ( cs_a != 1 ) )
|
||||
{
|
||||
bli_dgemv_zen_ref
|
||||
(
|
||||
transa,
|
||||
m,
|
||||
n,
|
||||
alpha,
|
||||
a, rs_a, cs_a,
|
||||
x, incx,
|
||||
beta,
|
||||
y, incy,
|
||||
NULL
|
||||
);
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
|
||||
return;
|
||||
}
|
||||
|
||||
dim_t m0, n0;
|
||||
inc_t rs_at, cs_at;
|
||||
conj_t conja;
|
||||
|
||||
Reference in New Issue
Block a user