Add an option to change the complex return type.

ifort apparently does not return complex numbers in registers as in C/C++ (or gfortran), but instead creates a "hidden" first parameter for the return value. The option --complex-return=gnu|intel has been added, as well as a guess based on a provided FC if not specified (otherwise default to gnu). This option affects the signatures of cdotc, cdotu, zdotc, and zdotu, and a single library cannot be used with both GNU and Intel Fortran compilers. Fixes #433.
This commit is contained in:
Devin Matthews
2020-08-06 14:03:55 -05:00
parent 6e522e5823
commit 7fdc0fc893
6 changed files with 223 additions and 46 deletions

View File

@@ -165,5 +165,11 @@
#define BLIS_DISABLE_SHARED
#endif
#if @complex_return_intel@
#define BLIS_ENABLE_COMPLEX_RETURN_INTEL
#else
#define BLIS_DISABLE_COMPLEX_RETURN_INTEL
#endif
#endif

59
configure vendored
View File

@@ -299,6 +299,15 @@ print_usage()
echo " when debugging certain configuration issues, and/or as"
echo " a sanity check to make sure these lists are constituted"
echo " as expected."
echo " "
echo " --complex-return=gnu|intel"
echo " "
echo " Specify the way in which complex numbers are returned"
echo " from Fortran functions, either \"gnu\" (return in"
echo " registers) or \"intel\" (return via hidden argument)."
echo " If not specified and the environment variable FC is set,"
echo " attempt to determine the return type from the compiler."
echo " Otherwise, the default is \"gnu\"."
echo " "
echo " -q, --quiet Suppress informational output. By default, configure"
echo " is verbose. (NOTE: -q is not yet implemented)"
@@ -309,6 +318,7 @@ print_usage()
echo " "
echo " CC Specifies the C compiler to use."
echo " CXX Specifies the C++ compiler to use (sandbox only)."
echo " FC Specifies the Fortran compiler to use (only to determine --complex-return)."
echo " RANLIB Specifies the ranlib executable to use."
echo " AR Specifies the archiver to use."
echo " CFLAGS Specifies additional compiler flags to use (prepended)."
@@ -1954,6 +1964,7 @@ main()
enable_sup_handling='yes'
enable_memkind='' # The default memkind value is determined later on.
force_version='no'
complex_return='default'
# The sandbox flag and name.
sandbox_flag=''
@@ -2142,6 +2153,9 @@ main()
show-config-list)
show_config_list=1
;;
complex-return=*)
complex_return=${OPTARG#*=}
;;
*)
print_usage
;;
@@ -2996,7 +3010,49 @@ main()
enable_sandbox_01=0
fi
# Check the method used for returning complex numbers
if [ "x${complex_return}" = "xdefault" ]; then
if [ -n "${FC}" ]; then
# Determine the complex return type from the given Fortran compiler
# Query the full vendor version string output. This includes the
# version number along with (potentially) a bunch of other textual
# clutter.
# NOTE: This maybe should use merged stdout/stderr rather than only
# stdout. But it works for now.
vendor_string="$(${FC} --version 2>/dev/null)"
# Query the compiler "vendor" (ie: the compiler's simple name) and
# isolate the version number.
# The last part ({ read first rest ; echo $first ; }) is a workaround
# to OS X's egrep only returning the first match.
fc_vendor=$(echo "${vendor_string}" | egrep -o 'ifort|GNU' | { read first rest ; echo $first ; })
if [ "x${fc_vendor}" = "xifort" ]; then
complex_return='intel'
elif [ "x${fc_vendor}" = "xGNU" ]; then
complex_return='gnu'
else
echo "${script_name}: unable to determine Fortran compiler vendor!"
complex_return='gnu'
fi
else
complex_return='gnu'
fi
fi
if [ "x${complex_return}" = "xgnu" ]; then
complex_return_intel01='0'
elif [ "x${complex_return}" = "xintel" ]; then
complex_return_intel01='1'
else
echo "${script_name}: unknown complex return type \"${complex_return}\"! Cannot continue."
echo "${script_name}: *** Acceptable values are \"gnu\" and \"intel\"."
exit 1
fi
echo "${script_name}: configuring complex return type as \"${complex_return}\"."
# Variables that may contain forward slashes, such as paths, need extra
# escaping when used in sed commands. We insert those extra escape
@@ -3165,7 +3221,8 @@ main()
| sed -e "s/@enable_memkind@/${enable_memkind_01}/g" \
| sed -e "s/@enable_pragma_omp_simd@/${enable_pragma_omp_simd_01}/g" \
| sed -e "s/@enable_sandbox@/${enable_sandbox_01}/g" \
| sed -e "s/@enable_shared@/${enable_shared_01}/g" \
| sed -e "s/@enable_shared@/${enable_shared_01}/g" \
| sed -e "s/@complex_return_intel@/${complex_return_intel01}/g" \
> "${bli_config_h_out_path}"

View File

@@ -34,6 +34,7 @@
#include "blis.h"
#ifdef BLIS_ENABLE_BLAS
//
// Define BLAS-to-BLIS interfaces.
@@ -48,45 +49,103 @@ ftype PASTEF772(ch,blasname,chc) \
const ftype* y, const f77_int* incy \
) \
{ \
dim_t n0; \
ftype* x0; \
ftype* y0; \
inc_t incx0; \
inc_t incy0; \
ftype rho; \
dim_t n0; \
ftype* x0; \
ftype* y0; \
inc_t incx0; \
inc_t incy0; \
ftype rho; \
\
/* Initialize BLIS. */ \
bli_init_auto(); \
/* Initialize BLIS. */ \
bli_init_auto(); \
\
/* Convert/typecast negative values of n to zero. */ \
bli_convert_blas_dim1( *n, n0 ); \
/* Convert/typecast negative values of n to zero. */ \
bli_convert_blas_dim1( *n, n0 ); \
\
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */ \
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */ \
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
\
/* Call BLIS interface. */ \
PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
( \
blis_conjx, \
BLIS_NO_CONJUGATE, \
n0, \
x0, incx0, \
y0, incy0, \
&rho, \
NULL, \
NULL \
); \
/* Call BLIS interface. */ \
PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
( \
blis_conjx, \
BLIS_NO_CONJUGATE, \
n0, \
x0, incx0, \
y0, incy0, \
&rho, \
NULL, \
NULL \
); \
\
/* Finalize BLIS. */ \
bli_finalize_auto(); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
\
return rho; \
return rho; \
}
#ifdef BLIS_ENABLE_BLAS
INSERT_GENTFUNCDOT_BLAS( dot, dotv )
INSERT_GENTFUNCDOTR_BLAS( dot, dotv )
#if BLIS_DISABLE_COMPLEX_RETURN_INTEL
INSERT_GENTFUNCDOTC_BLAS( dot, dotv )
#else
// For the "intel" complex return type, use a hidden parameter to return the result
#undef GENTFUNCDOT
#define GENTFUNCDOT( ftype, ch, chc, blis_conjx, blasname, blisname ) \
\
void PASTEF772(ch,blasname,chc) \
( \
ftype* rhop, \
const f77_int* n, \
const ftype* x, const f77_int* incx, \
const ftype* y, const f77_int* incy \
) \
{ \
dim_t n0; \
ftype* x0; \
ftype* y0; \
inc_t incx0; \
inc_t incy0; \
ftype rho; \
\
/* Initialize BLIS. */ \
bli_init_auto(); \
\
/* Convert/typecast negative values of n to zero. */ \
bli_convert_blas_dim1( *n, n0 ); \
\
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */ \
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
\
/* Call BLIS interface. */ \
PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
( \
blis_conjx, \
BLIS_NO_CONJUGATE, \
n0, \
x0, incx0, \
y0, incy0, \
&rho, \
NULL, \
NULL \
); \
\
/* Finalize BLIS. */ \
bli_finalize_auto(); \
\
*rhop = rho; \
}
INSERT_GENTFUNCDOTC_BLAS( dot, dotv )
#endif
// -- "Black sheep" dot product function definitions --

View File

@@ -32,6 +32,7 @@
*/
#ifdef BLIS_ENABLE_BLAS
//
// Prototype BLAS-to-BLIS interfaces.
@@ -46,8 +47,29 @@ BLIS_EXPORT_BLAS ftype PASTEF772(ch,blasname,chc) \
const ftype* y, const f77_int* incy \
);
#ifdef BLIS_ENABLE_BLAS
INSERT_GENTPROTDOT_BLAS( dot )
INSERT_GENTPROTDOTR_BLAS( dot )
#if BLIS_DISABLE_COMPLEX_RETURN_INTEL
INSERT_GENTPROTDOTC_BLAS( dot )
#else
// For the "intel" complex return type, use a hidden parameter to return the result
#undef GENTPROTDOT
#define GENTPROTDOT( ftype, ch, chc, blasname ) \
\
BLIS_EXPORT_BLAS void PASTEF772(ch,blasname,chc) \
( \
ftype* rhop, \
const f77_int* n, \
const ftype* x, const f77_int* incx, \
const ftype* y, const f77_int* incy \
);
INSERT_GENTPROTDOTC_BLAS( dot )
#endif
// -- "Black sheep" dot product function prototypes --
@@ -66,4 +88,5 @@ BLIS_EXPORT_BLAS double PASTEF77(d,sdot)
const float* x, const f77_int* incx,
const float* y, const f77_int* incy
);
#endif

View File

@@ -74,17 +74,33 @@ GENTFUNCCO( scomplex, float, c, s, blasname, blisname ) \
GENTFUNCCO( dcomplex, double, z, d, blasname, blisname )
// -- Basic one-operand macro with conjugation (real funcs only, used only for dot, ger) --
#define INSERT_GENTFUNCDOTR_BLAS( blasname, blisname ) \
\
GENTFUNCDOT( float, s, , BLIS_NO_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( double, d, , BLIS_NO_CONJUGATE, blasname, blisname )
// -- Basic one-operand macro with conjugation (complex funcs only, used only for dot, ger) --
#define INSERT_GENTFUNCDOTC_BLAS( blasname, blisname ) \
\
GENTFUNCDOT( scomplex, c, c, BLIS_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( scomplex, c, u, BLIS_NO_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( dcomplex, z, c, BLIS_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( dcomplex, z, u, BLIS_NO_CONJUGATE, blasname, blisname )
// -- Basic one-operand macro with conjugation (used only for dot, ger) --
#define INSERT_GENTFUNCDOT_BLAS( blasname, blisname ) \
\
GENTFUNCDOT( float, s, , BLIS_NO_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( double, d, , BLIS_NO_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( scomplex, c, c, BLIS_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( scomplex, c, u, BLIS_NO_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( dcomplex, z, c, BLIS_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( dcomplex, z, u, BLIS_NO_CONJUGATE, blasname, blisname )
INSERT_GENTFUNCDOTR_BLAS( blasname, blisname ) \
INSERT_GENTFUNCDOTC_BLAS( blasname, blisname )
// -- Basic one-operand macro with real projection --

View File

@@ -74,17 +74,33 @@ GENTPROTCO( scomplex, float, c, s, blasname ) \
GENTPROTCO( dcomplex, double, z, d, blasname )
// -- Basic one-operand macro with conjugation (real funcs only, used only for dot, ger) --
#define INSERT_GENTPROTDOTR_BLAS( blasname ) \
\
GENTPROTDOT( float, s, , blasname ) \
GENTPROTDOT( double, d, , blasname )
// -- Basic one-operand macro with conjugation (complex funcs only, used only for dot, ger) --
#define INSERT_GENTPROTDOTC_BLAS( blasname ) \
\
GENTPROTDOT( scomplex, c, c, blasname ) \
GENTPROTDOT( scomplex, c, u, blasname ) \
GENTPROTDOT( dcomplex, z, c, blasname ) \
GENTPROTDOT( dcomplex, z, u, blasname )
// -- Basic one-operand macro with conjugation (used only for dot, ger) --
#define INSERT_GENTPROTDOT_BLAS( blasname ) \
\
GENTPROTDOT( float, s, , blasname ) \
GENTPROTDOT( double, d, , blasname ) \
GENTPROTDOT( scomplex, c, c, blasname ) \
GENTPROTDOT( scomplex, c, u, blasname ) \
GENTPROTDOT( dcomplex, z, c, blasname ) \
GENTPROTDOT( dcomplex, z, u, blasname )
INSERT_GENTPROTDOTR_BLAS( blasname ) \
INSERT_GENTPROTDOTC_BLAS( blasname )
// -- Basic one-operand macro with real projection --