mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Add an option to change the complex return type.
ifort apparently does not return complex numbers in registers as in C/C++ (or gfortran), but instead creates a "hidden" first parameter for the return value. The option --complex-return=gnu|intel has been added, as well as a guess based on a provided FC if not specified (otherwise default to gnu). This option affects the signatures of cdotc, cdotu, zdotc, and zdotu, and a single library cannot be used with both GNU and Intel Fortran compilers. Fixes #433.
This commit is contained in:
@@ -165,5 +165,11 @@
|
||||
#define BLIS_DISABLE_SHARED
|
||||
#endif
|
||||
|
||||
#if @complex_return_intel@
|
||||
#define BLIS_ENABLE_COMPLEX_RETURN_INTEL
|
||||
#else
|
||||
#define BLIS_DISABLE_COMPLEX_RETURN_INTEL
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
59
configure
vendored
59
configure
vendored
@@ -299,6 +299,15 @@ print_usage()
|
||||
echo " when debugging certain configuration issues, and/or as"
|
||||
echo " a sanity check to make sure these lists are constituted"
|
||||
echo " as expected."
|
||||
echo " "
|
||||
echo " --complex-return=gnu|intel"
|
||||
echo " "
|
||||
echo " Specify the way in which complex numbers are returned"
|
||||
echo " from Fortran functions, either \"gnu\" (return in"
|
||||
echo " registers) or \"intel\" (return via hidden argument)."
|
||||
echo " If not specified and the environment variable FC is set,"
|
||||
echo " attempt to determine the return type from the compiler."
|
||||
echo " Otherwise, the default is \"gnu\"."
|
||||
echo " "
|
||||
echo " -q, --quiet Suppress informational output. By default, configure"
|
||||
echo " is verbose. (NOTE: -q is not yet implemented)"
|
||||
@@ -309,6 +318,7 @@ print_usage()
|
||||
echo " "
|
||||
echo " CC Specifies the C compiler to use."
|
||||
echo " CXX Specifies the C++ compiler to use (sandbox only)."
|
||||
echo " FC Specifies the Fortran compiler to use (only to determine --complex-return)."
|
||||
echo " RANLIB Specifies the ranlib executable to use."
|
||||
echo " AR Specifies the archiver to use."
|
||||
echo " CFLAGS Specifies additional compiler flags to use (prepended)."
|
||||
@@ -1954,6 +1964,7 @@ main()
|
||||
enable_sup_handling='yes'
|
||||
enable_memkind='' # The default memkind value is determined later on.
|
||||
force_version='no'
|
||||
complex_return='default'
|
||||
|
||||
# The sandbox flag and name.
|
||||
sandbox_flag=''
|
||||
@@ -2142,6 +2153,9 @@ main()
|
||||
show-config-list)
|
||||
show_config_list=1
|
||||
;;
|
||||
complex-return=*)
|
||||
complex_return=${OPTARG#*=}
|
||||
;;
|
||||
*)
|
||||
print_usage
|
||||
;;
|
||||
@@ -2996,7 +3010,49 @@ main()
|
||||
|
||||
enable_sandbox_01=0
|
||||
fi
|
||||
|
||||
# Check the method used for returning complex numbers
|
||||
if [ "x${complex_return}" = "xdefault" ]; then
|
||||
if [ -n "${FC}" ]; then
|
||||
# Determine the complex return type from the given Fortran compiler
|
||||
|
||||
# Query the full vendor version string output. This includes the
|
||||
# version number along with (potentially) a bunch of other textual
|
||||
# clutter.
|
||||
# NOTE: This maybe should use merged stdout/stderr rather than only
|
||||
# stdout. But it works for now.
|
||||
vendor_string="$(${FC} --version 2>/dev/null)"
|
||||
|
||||
# Query the compiler "vendor" (ie: the compiler's simple name) and
|
||||
# isolate the version number.
|
||||
# The last part ({ read first rest ; echo $first ; }) is a workaround
|
||||
# to OS X's egrep only returning the first match.
|
||||
fc_vendor=$(echo "${vendor_string}" | egrep -o 'ifort|GNU' | { read first rest ; echo $first ; })
|
||||
|
||||
if [ "x${fc_vendor}" = "xifort" ]; then
|
||||
complex_return='intel'
|
||||
elif [ "x${fc_vendor}" = "xGNU" ]; then
|
||||
complex_return='gnu'
|
||||
else
|
||||
echo "${script_name}: unable to determine Fortran compiler vendor!"
|
||||
complex_return='gnu'
|
||||
fi
|
||||
else
|
||||
complex_return='gnu'
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "x${complex_return}" = "xgnu" ]; then
|
||||
complex_return_intel01='0'
|
||||
elif [ "x${complex_return}" = "xintel" ]; then
|
||||
complex_return_intel01='1'
|
||||
else
|
||||
echo "${script_name}: unknown complex return type \"${complex_return}\"! Cannot continue."
|
||||
echo "${script_name}: *** Acceptable values are \"gnu\" and \"intel\"."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "${script_name}: configuring complex return type as \"${complex_return}\"."
|
||||
|
||||
# Variables that may contain forward slashes, such as paths, need extra
|
||||
# escaping when used in sed commands. We insert those extra escape
|
||||
@@ -3165,7 +3221,8 @@ main()
|
||||
| sed -e "s/@enable_memkind@/${enable_memkind_01}/g" \
|
||||
| sed -e "s/@enable_pragma_omp_simd@/${enable_pragma_omp_simd_01}/g" \
|
||||
| sed -e "s/@enable_sandbox@/${enable_sandbox_01}/g" \
|
||||
| sed -e "s/@enable_shared@/${enable_shared_01}/g" \
|
||||
| sed -e "s/@enable_shared@/${enable_shared_01}/g" \
|
||||
| sed -e "s/@complex_return_intel@/${complex_return_intel01}/g" \
|
||||
> "${bli_config_h_out_path}"
|
||||
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
|
||||
//
|
||||
// Define BLAS-to-BLIS interfaces.
|
||||
@@ -48,45 +49,103 @@ ftype PASTEF772(ch,blasname,chc) \
|
||||
const ftype* y, const f77_int* incy \
|
||||
) \
|
||||
{ \
|
||||
dim_t n0; \
|
||||
ftype* x0; \
|
||||
ftype* y0; \
|
||||
inc_t incx0; \
|
||||
inc_t incy0; \
|
||||
ftype rho; \
|
||||
dim_t n0; \
|
||||
ftype* x0; \
|
||||
ftype* y0; \
|
||||
inc_t incx0; \
|
||||
inc_t incy0; \
|
||||
ftype rho; \
|
||||
\
|
||||
/* Initialize BLIS. */ \
|
||||
bli_init_auto(); \
|
||||
/* Initialize BLIS. */ \
|
||||
bli_init_auto(); \
|
||||
\
|
||||
/* Convert/typecast negative values of n to zero. */ \
|
||||
bli_convert_blas_dim1( *n, n0 ); \
|
||||
/* Convert/typecast negative values of n to zero. */ \
|
||||
bli_convert_blas_dim1( *n, n0 ); \
|
||||
\
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */ \
|
||||
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
|
||||
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */ \
|
||||
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
|
||||
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
|
||||
\
|
||||
/* Call BLIS interface. */ \
|
||||
PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
|
||||
( \
|
||||
blis_conjx, \
|
||||
BLIS_NO_CONJUGATE, \
|
||||
n0, \
|
||||
x0, incx0, \
|
||||
y0, incy0, \
|
||||
&rho, \
|
||||
NULL, \
|
||||
NULL \
|
||||
); \
|
||||
/* Call BLIS interface. */ \
|
||||
PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
|
||||
( \
|
||||
blis_conjx, \
|
||||
BLIS_NO_CONJUGATE, \
|
||||
n0, \
|
||||
x0, incx0, \
|
||||
y0, incy0, \
|
||||
&rho, \
|
||||
NULL, \
|
||||
NULL \
|
||||
); \
|
||||
\
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
\
|
||||
return rho; \
|
||||
return rho; \
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
INSERT_GENTFUNCDOT_BLAS( dot, dotv )
|
||||
INSERT_GENTFUNCDOTR_BLAS( dot, dotv )
|
||||
|
||||
#if BLIS_DISABLE_COMPLEX_RETURN_INTEL
|
||||
|
||||
INSERT_GENTFUNCDOTC_BLAS( dot, dotv )
|
||||
|
||||
#else
|
||||
|
||||
// For the "intel" complex return type, use a hidden parameter to return the result
|
||||
#undef GENTFUNCDOT
|
||||
#define GENTFUNCDOT( ftype, ch, chc, blis_conjx, blasname, blisname ) \
|
||||
\
|
||||
void PASTEF772(ch,blasname,chc) \
|
||||
( \
|
||||
ftype* rhop, \
|
||||
const f77_int* n, \
|
||||
const ftype* x, const f77_int* incx, \
|
||||
const ftype* y, const f77_int* incy \
|
||||
) \
|
||||
{ \
|
||||
dim_t n0; \
|
||||
ftype* x0; \
|
||||
ftype* y0; \
|
||||
inc_t incx0; \
|
||||
inc_t incy0; \
|
||||
ftype rho; \
|
||||
\
|
||||
/* Initialize BLIS. */ \
|
||||
bli_init_auto(); \
|
||||
\
|
||||
/* Convert/typecast negative values of n to zero. */ \
|
||||
bli_convert_blas_dim1( *n, n0 ); \
|
||||
\
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */ \
|
||||
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
|
||||
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
|
||||
\
|
||||
/* Call BLIS interface. */ \
|
||||
PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
|
||||
( \
|
||||
blis_conjx, \
|
||||
BLIS_NO_CONJUGATE, \
|
||||
n0, \
|
||||
x0, incx0, \
|
||||
y0, incy0, \
|
||||
&rho, \
|
||||
NULL, \
|
||||
NULL \
|
||||
); \
|
||||
\
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
\
|
||||
*rhop = rho; \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCDOTC_BLAS( dot, dotv )
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// -- "Black sheep" dot product function definitions --
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
|
||||
*/
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
|
||||
//
|
||||
// Prototype BLAS-to-BLIS interfaces.
|
||||
@@ -46,8 +47,29 @@ BLIS_EXPORT_BLAS ftype PASTEF772(ch,blasname,chc) \
|
||||
const ftype* y, const f77_int* incy \
|
||||
);
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
INSERT_GENTPROTDOT_BLAS( dot )
|
||||
INSERT_GENTPROTDOTR_BLAS( dot )
|
||||
|
||||
#if BLIS_DISABLE_COMPLEX_RETURN_INTEL
|
||||
|
||||
INSERT_GENTPROTDOTC_BLAS( dot )
|
||||
|
||||
#else
|
||||
|
||||
// For the "intel" complex return type, use a hidden parameter to return the result
|
||||
#undef GENTPROTDOT
|
||||
#define GENTPROTDOT( ftype, ch, chc, blasname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLAS void PASTEF772(ch,blasname,chc) \
|
||||
( \
|
||||
ftype* rhop, \
|
||||
const f77_int* n, \
|
||||
const ftype* x, const f77_int* incx, \
|
||||
const ftype* y, const f77_int* incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROTDOTC_BLAS( dot )
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// -- "Black sheep" dot product function prototypes --
|
||||
@@ -66,4 +88,5 @@ BLIS_EXPORT_BLAS double PASTEF77(d,sdot)
|
||||
const float* x, const f77_int* incx,
|
||||
const float* y, const f77_int* incy
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -74,17 +74,33 @@ GENTFUNCCO( scomplex, float, c, s, blasname, blisname ) \
|
||||
GENTFUNCCO( dcomplex, double, z, d, blasname, blisname )
|
||||
|
||||
|
||||
// -- Basic one-operand macro with conjugation (real funcs only, used only for dot, ger) --
|
||||
|
||||
|
||||
#define INSERT_GENTFUNCDOTR_BLAS( blasname, blisname ) \
|
||||
\
|
||||
GENTFUNCDOT( float, s, , BLIS_NO_CONJUGATE, blasname, blisname ) \
|
||||
GENTFUNCDOT( double, d, , BLIS_NO_CONJUGATE, blasname, blisname )
|
||||
|
||||
|
||||
// -- Basic one-operand macro with conjugation (complex funcs only, used only for dot, ger) --
|
||||
|
||||
|
||||
#define INSERT_GENTFUNCDOTC_BLAS( blasname, blisname ) \
|
||||
\
|
||||
GENTFUNCDOT( scomplex, c, c, BLIS_CONJUGATE, blasname, blisname ) \
|
||||
GENTFUNCDOT( scomplex, c, u, BLIS_NO_CONJUGATE, blasname, blisname ) \
|
||||
GENTFUNCDOT( dcomplex, z, c, BLIS_CONJUGATE, blasname, blisname ) \
|
||||
GENTFUNCDOT( dcomplex, z, u, BLIS_NO_CONJUGATE, blasname, blisname )
|
||||
|
||||
|
||||
// -- Basic one-operand macro with conjugation (used only for dot, ger) --
|
||||
|
||||
|
||||
#define INSERT_GENTFUNCDOT_BLAS( blasname, blisname ) \
|
||||
\
|
||||
GENTFUNCDOT( float, s, , BLIS_NO_CONJUGATE, blasname, blisname ) \
|
||||
GENTFUNCDOT( double, d, , BLIS_NO_CONJUGATE, blasname, blisname ) \
|
||||
GENTFUNCDOT( scomplex, c, c, BLIS_CONJUGATE, blasname, blisname ) \
|
||||
GENTFUNCDOT( scomplex, c, u, BLIS_NO_CONJUGATE, blasname, blisname ) \
|
||||
GENTFUNCDOT( dcomplex, z, c, BLIS_CONJUGATE, blasname, blisname ) \
|
||||
GENTFUNCDOT( dcomplex, z, u, BLIS_NO_CONJUGATE, blasname, blisname )
|
||||
INSERT_GENTFUNCDOTR_BLAS( blasname, blisname ) \
|
||||
INSERT_GENTFUNCDOTC_BLAS( blasname, blisname )
|
||||
|
||||
|
||||
// -- Basic one-operand macro with real projection --
|
||||
|
||||
@@ -74,17 +74,33 @@ GENTPROTCO( scomplex, float, c, s, blasname ) \
|
||||
GENTPROTCO( dcomplex, double, z, d, blasname )
|
||||
|
||||
|
||||
// -- Basic one-operand macro with conjugation (real funcs only, used only for dot, ger) --
|
||||
|
||||
|
||||
#define INSERT_GENTPROTDOTR_BLAS( blasname ) \
|
||||
\
|
||||
GENTPROTDOT( float, s, , blasname ) \
|
||||
GENTPROTDOT( double, d, , blasname )
|
||||
|
||||
|
||||
// -- Basic one-operand macro with conjugation (complex funcs only, used only for dot, ger) --
|
||||
|
||||
|
||||
#define INSERT_GENTPROTDOTC_BLAS( blasname ) \
|
||||
\
|
||||
GENTPROTDOT( scomplex, c, c, blasname ) \
|
||||
GENTPROTDOT( scomplex, c, u, blasname ) \
|
||||
GENTPROTDOT( dcomplex, z, c, blasname ) \
|
||||
GENTPROTDOT( dcomplex, z, u, blasname )
|
||||
|
||||
|
||||
// -- Basic one-operand macro with conjugation (used only for dot, ger) --
|
||||
|
||||
|
||||
#define INSERT_GENTPROTDOT_BLAS( blasname ) \
|
||||
\
|
||||
GENTPROTDOT( float, s, , blasname ) \
|
||||
GENTPROTDOT( double, d, , blasname ) \
|
||||
GENTPROTDOT( scomplex, c, c, blasname ) \
|
||||
GENTPROTDOT( scomplex, c, u, blasname ) \
|
||||
GENTPROTDOT( dcomplex, z, c, blasname ) \
|
||||
GENTPROTDOT( dcomplex, z, u, blasname )
|
||||
INSERT_GENTPROTDOTR_BLAS( blasname ) \
|
||||
INSERT_GENTPROTDOTC_BLAS( blasname )
|
||||
|
||||
|
||||
// -- Basic one-operand macro with real projection --
|
||||
|
||||
Reference in New Issue
Block a user