mirror of
https://github.com/amd/blis.git
synced 2026-04-19 23:28:52 +00:00
DGEMV BugFixes and code cleanup (#134)
- Modified gemv (matrix-vector multiply) reference for better handling of transpose flags. - Modified Zen4 kernel implementations for better handling of transpose flags and vector stride (incy). - The changes refine kernel selection logic and move variable definition in macro guards.
This commit is contained in:
@@ -175,7 +175,7 @@ void bli_dgemv_zen_ref
|
||||
// If alpha == 0, return.
|
||||
if ( bli_deq0( *alpha ) ) return;
|
||||
|
||||
if ( bli_is_notrans( transa ) ) // BLIS_NO_TRANSPOSE
|
||||
if ( bli_does_notrans( transa ) ) // transa = N or C
|
||||
{
|
||||
if ( incy == 1 )
|
||||
{
|
||||
@@ -292,7 +292,7 @@ void bli_sgemv_zen_ref
|
||||
// If alpha == 0, return.
|
||||
if ( bli_seq0( *alpha ) ) return;
|
||||
|
||||
if ( bli_is_notrans( transa ) ) // BLIS_NO_TRANSPOSE
|
||||
if ( bli_does_notrans( transa ) ) // transa = N or C
|
||||
{
|
||||
if ( incy == 1 )
|
||||
{
|
||||
|
||||
@@ -1465,9 +1465,10 @@ void bli_dgemv_n_zen4_int (
|
||||
double*,
|
||||
inc_t, cntx_t* ) = NULL;
|
||||
|
||||
dim_t size = m * n;
|
||||
|
||||
// If AOCL_DYNAMIC is enabled, call ST kernels for small sizes.
|
||||
#if (defined(AOCL_DYNAMIC) || (defined(BLIS_ENABLE_OPENMP)))
|
||||
dim_t size = m * n;
|
||||
#endif
|
||||
#ifdef AOCL_DYNAMIC
|
||||
if ( size < 95000 )
|
||||
{
|
||||
@@ -1514,6 +1515,12 @@ void bli_dgemv_n_zen4_int (
|
||||
#endif
|
||||
}
|
||||
|
||||
// Use 32x8 kernel when transa = "C" or "H"
|
||||
// and if incy != 1, which uses packing to handle non unit stride y
|
||||
if ( incy != 1 || transa != BLIS_NO_TRANSPOSE)
|
||||
{
|
||||
ker_ft = bli_dgemv_n_zen4_32x8_int_st;
|
||||
}
|
||||
ker_ft
|
||||
(
|
||||
transa,
|
||||
|
||||
Reference in New Issue
Block a user