mirror of
https://github.com/amd/blis.git
synced 2026-05-12 10:05:38 +00:00
Optimized daxpy2v implementation
- Optimized axpy2v implementation for double datatype by handling rows in mulitple of 4 and store the final computed result at the end of computation, preventing unnecessary stores for improving the performance. - Optimal and reuse of vector registers for faster computation. AMD-Internal: [CPUPL-1973] Change-Id: I7b8ef94d0f67c1c666fdce26e9b2b7291365d2e9
This commit is contained in:
committed by
Dipal M Zambare
parent
43c16d8e08
commit
718c6bc024
@@ -8,5 +8,8 @@ target_sources("${PROJECT_NAME}"
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_axpyf_zen_int_4.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_axpyf_zen_int_6.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_axpy2v_zen_int.c
|
||||
<<<<<<< HEAD
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_dotxaxpyf_zen_int_8.c
|
||||
=======
|
||||
>>>>>>> 8b5b2707... Optimized daxpy2v implementation
|
||||
)
|
||||
|
||||
@@ -186,6 +186,7 @@ void bli_daxpy2v_zen_int
|
||||
);
|
||||
}
|
||||
}
|
||||
<<<<<<< HEAD
|
||||
|
||||
/**
|
||||
* zaxpy2v kernel performs axpy2v operation.
|
||||
@@ -718,4 +719,6 @@ void bli_zaxpy2v_zen_int
|
||||
}
|
||||
}
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
|
||||
}
|
||||
}
|
||||
=======
|
||||
>>>>>>> 8b5b2707... Optimized daxpy2v implementation
|
||||
|
||||
@@ -32,6 +32,19 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
<<<<<<< HEAD
|
||||
=======
|
||||
// hemv helper function
|
||||
void bli_pre_hemv_8x8(double *a, double *x,
|
||||
double *y, double *alpha,
|
||||
dim_t cs_a, dim_t rs_a);
|
||||
|
||||
void bli_post_hemv_8x8(double *a, double *x,
|
||||
double *y, double *alpha,
|
||||
dim_t cs_a, dim_t rs_a);
|
||||
|
||||
|
||||
>>>>>>> 8b5b2707... Optimized daxpy2v implementation
|
||||
// -- level-1m --
|
||||
PACKM_KER_PROT(double, d, packm_8xk_gen_zen)
|
||||
PACKM_KER_PROT(double, d, packm_6xk_gen_zen)
|
||||
@@ -129,7 +142,10 @@ AXPYF_KER_PROT( dcomplex, z, axpyf_zen_int_5 )
|
||||
AXPYF_KER_PROT( dcomplex, z, axpyf_zen_int_4 )
|
||||
// axpy2v (intrinsics)
|
||||
AXPY2V_KER_PROT(double, d, axpy2v_zen_int )
|
||||
<<<<<<< HEAD
|
||||
AXPY2V_KER_PROT(dcomplex, z, axpy2v_zen_int )
|
||||
=======
|
||||
>>>>>>> 8b5b2707... Optimized daxpy2v implementation
|
||||
|
||||
// dotxf (intrinsics)
|
||||
DOTXF_KER_PROT( float, s, dotxf_zen_int_8 )
|
||||
|
||||
Reference in New Issue
Block a user