mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Vectorized and parallelized zdscal routine
- Implemented optimized intrinsic kernel for zdscalv for the cases where AVX2 is supported. - Also added multithreaded support for the same. - The optimal number of threads is being calculated on the basis of input size. AMD-Internal: [CPUPL-2602] Change-Id: I4d05c3b1cc365a7770703286a89c6dce3875c067
This commit is contained in:
committed by
Arnav Sharma
parent
9c292b79e2
commit
90f915d3a9
@@ -159,8 +159,8 @@ GENTFUNCR2( dcomplex, double, z, d, blasname, blisname )
|
||||
\
|
||||
GENTFUNCSCAL( scomplex, scomplex, c, , blasname, blisname ) \
|
||||
GENTFUNCSCAL( dcomplex, dcomplex, z, , blasname, blisname ) \
|
||||
GENTFUNCSCAL( scomplex, float, c, s, blasname, blisname ) \
|
||||
GENTFUNCSCAL( dcomplex, double, z, d, blasname, blisname )
|
||||
GENTFUNCSCAL( scomplex, float, c, s, blasname, blisname )
|
||||
// GENTFUNCSCAL( dcomplex, double, z, d, blasname, blisname )
|
||||
|
||||
|
||||
#define INSERT_GENTFUNCSCAL_BLAS( blasname, blisname ) \
|
||||
|
||||
Reference in New Issue
Block a user