diff --git a/build/bli_config.h.in b/build/bli_config.h.in index 1bb2ef28b..7d910a2c2 100644 --- a/build/bli_config.h.in +++ b/build/bli_config.h.in @@ -165,5 +165,11 @@ #define BLIS_DISABLE_SHARED #endif +#if @complex_return_intel@ +#define BLIS_ENABLE_COMPLEX_RETURN_INTEL +#else +#define BLIS_DISABLE_COMPLEX_RETURN_INTEL +#endif + #endif diff --git a/configure b/configure index a80e8cbd8..2ccbede56 100755 --- a/configure +++ b/configure @@ -299,6 +299,15 @@ print_usage() echo " when debugging certain configuration issues, and/or as" echo " a sanity check to make sure these lists are constituted" echo " as expected." + echo " " + echo " --complex-return=gnu|intel" + echo " " + echo " Specify the way in which complex numbers are returned" + echo " from Fortran functions, either \"gnu\" (return in" + echo " registers) or \"intel\" (return via hidden argument)." + echo " If not specified and the environment variable FC is set," + echo " attempt to determine the return type from the compiler." + echo " Otherwise, the default is \"gnu\"." echo " " echo " -q, --quiet Suppress informational output. By default, configure" echo " is verbose. (NOTE: -q is not yet implemented)" @@ -309,6 +318,7 @@ print_usage() echo " " echo " CC Specifies the C compiler to use." echo " CXX Specifies the C++ compiler to use (sandbox only)." + echo " FC Specifies the Fortran compiler to use (only to determine --complex-return)." echo " RANLIB Specifies the ranlib executable to use." echo " AR Specifies the archiver to use." echo " CFLAGS Specifies additional compiler flags to use (prepended)." @@ -1954,6 +1964,7 @@ main() enable_sup_handling='yes' enable_memkind='' # The default memkind value is determined later on. force_version='no' + complex_return='default' # The sandbox flag and name. sandbox_flag='' @@ -2142,6 +2153,9 @@ main() show-config-list) show_config_list=1 ;; + complex-return=*) + complex_return=${OPTARG#*=} + ;; *) print_usage ;; @@ -2996,7 +3010,49 @@ main() enable_sandbox_01=0 fi + + # Check the method used for returning complex numbers + if [ "x${complex_return}" = "xdefault" ]; then + if [ -n "${FC}" ]; then + # Determine the complex return type from the given Fortran compiler + # Query the full vendor version string output. This includes the + # version number along with (potentially) a bunch of other textual + # clutter. + # NOTE: This maybe should use merged stdout/stderr rather than only + # stdout. But it works for now. + vendor_string="$(${FC} --version 2>/dev/null)" + + # Query the compiler "vendor" (ie: the compiler's simple name) and + # isolate the version number. + # The last part ({ read first rest ; echo $first ; }) is a workaround + # to OS X's egrep only returning the first match. + fc_vendor=$(echo "${vendor_string}" | egrep -o 'ifort|GNU' | { read first rest ; echo $first ; }) + + if [ "x${fc_vendor}" = "xifort" ]; then + complex_return='intel' + elif [ "x${fc_vendor}" = "xGNU" ]; then + complex_return='gnu' + else + echo "${script_name}: unable to determine Fortran compiler vendor!" + complex_return='gnu' + fi + else + complex_return='gnu' + fi + fi + + if [ "x${complex_return}" = "xgnu" ]; then + complex_return_intel01='0' + elif [ "x${complex_return}" = "xintel" ]; then + complex_return_intel01='1' + else + echo "${script_name}: unknown complex return type \"${complex_return}\"! Cannot continue." + echo "${script_name}: *** Acceptable values are \"gnu\" and \"intel\"." + exit 1 + fi + + echo "${script_name}: configuring complex return type as \"${complex_return}\"." # Variables that may contain forward slashes, such as paths, need extra # escaping when used in sed commands. We insert those extra escape @@ -3165,7 +3221,8 @@ main() | sed -e "s/@enable_memkind@/${enable_memkind_01}/g" \ | sed -e "s/@enable_pragma_omp_simd@/${enable_pragma_omp_simd_01}/g" \ | sed -e "s/@enable_sandbox@/${enable_sandbox_01}/g" \ - | sed -e "s/@enable_shared@/${enable_shared_01}/g" \ + | sed -e "s/@enable_shared@/${enable_shared_01}/g" \ + | sed -e "s/@complex_return_intel@/${complex_return_intel01}/g" \ > "${bli_config_h_out_path}" diff --git a/frame/compat/bla_dot.c b/frame/compat/bla_dot.c index 500a15020..3226d34a8 100644 --- a/frame/compat/bla_dot.c +++ b/frame/compat/bla_dot.c @@ -34,6 +34,7 @@ #include "blis.h" +#ifdef BLIS_ENABLE_BLAS // // Define BLAS-to-BLIS interfaces. @@ -48,45 +49,103 @@ ftype PASTEF772(ch,blasname,chc) \ const ftype* y, const f77_int* incy \ ) \ { \ - dim_t n0; \ - ftype* x0; \ - ftype* y0; \ - inc_t incx0; \ - inc_t incy0; \ - ftype rho; \ + dim_t n0; \ + ftype* x0; \ + ftype* y0; \ + inc_t incx0; \ + inc_t incy0; \ + ftype rho; \ \ - /* Initialize BLIS. */ \ - bli_init_auto(); \ + /* Initialize BLIS. */ \ + bli_init_auto(); \ \ - /* Convert/typecast negative values of n to zero. */ \ - bli_convert_blas_dim1( *n, n0 ); \ + /* Convert/typecast negative values of n to zero. */ \ + bli_convert_blas_dim1( *n, n0 ); \ \ - /* If the input increments are negative, adjust the pointers so we can - use positive increments instead. */ \ - bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \ - bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \ + /* If the input increments are negative, adjust the pointers so we can + use positive increments instead. */ \ + bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \ \ - /* Call BLIS interface. */ \ - PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ - ( \ - blis_conjx, \ - BLIS_NO_CONJUGATE, \ - n0, \ - x0, incx0, \ - y0, incy0, \ - &rho, \ - NULL, \ - NULL \ - ); \ + /* Call BLIS interface. */ \ + PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ + ( \ + blis_conjx, \ + BLIS_NO_CONJUGATE, \ + n0, \ + x0, incx0, \ + y0, incy0, \ + &rho, \ + NULL, \ + NULL \ + ); \ \ - /* Finalize BLIS. */ \ - bli_finalize_auto(); \ + /* Finalize BLIS. */ \ + bli_finalize_auto(); \ \ - return rho; \ + return rho; \ } -#ifdef BLIS_ENABLE_BLAS -INSERT_GENTFUNCDOT_BLAS( dot, dotv ) +INSERT_GENTFUNCDOTR_BLAS( dot, dotv ) + +#if BLIS_DISABLE_COMPLEX_RETURN_INTEL + +INSERT_GENTFUNCDOTC_BLAS( dot, dotv ) + +#else + +// For the "intel" complex return type, use a hidden parameter to return the result +#undef GENTFUNCDOT +#define GENTFUNCDOT( ftype, ch, chc, blis_conjx, blasname, blisname ) \ +\ +void PASTEF772(ch,blasname,chc) \ + ( \ + ftype* rhop, \ + const f77_int* n, \ + const ftype* x, const f77_int* incx, \ + const ftype* y, const f77_int* incy \ + ) \ +{ \ + dim_t n0; \ + ftype* x0; \ + ftype* y0; \ + inc_t incx0; \ + inc_t incy0; \ + ftype rho; \ +\ + /* Initialize BLIS. */ \ + bli_init_auto(); \ +\ + /* Convert/typecast negative values of n to zero. */ \ + bli_convert_blas_dim1( *n, n0 ); \ +\ + /* If the input increments are negative, adjust the pointers so we can + use positive increments instead. */ \ + bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \ +\ + /* Call BLIS interface. */ \ + PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ + ( \ + blis_conjx, \ + BLIS_NO_CONJUGATE, \ + n0, \ + x0, incx0, \ + y0, incy0, \ + &rho, \ + NULL, \ + NULL \ + ); \ +\ + /* Finalize BLIS. */ \ + bli_finalize_auto(); \ +\ + *rhop = rho; \ +} + +INSERT_GENTFUNCDOTC_BLAS( dot, dotv ) + +#endif // -- "Black sheep" dot product function definitions -- diff --git a/frame/compat/bla_dot.h b/frame/compat/bla_dot.h index 373e1a7b7..003de2453 100644 --- a/frame/compat/bla_dot.h +++ b/frame/compat/bla_dot.h @@ -32,6 +32,7 @@ */ +#ifdef BLIS_ENABLE_BLAS // // Prototype BLAS-to-BLIS interfaces. @@ -46,8 +47,29 @@ BLIS_EXPORT_BLAS ftype PASTEF772(ch,blasname,chc) \ const ftype* y, const f77_int* incy \ ); -#ifdef BLIS_ENABLE_BLAS -INSERT_GENTPROTDOT_BLAS( dot ) +INSERT_GENTPROTDOTR_BLAS( dot ) + +#if BLIS_DISABLE_COMPLEX_RETURN_INTEL + +INSERT_GENTPROTDOTC_BLAS( dot ) + +#else + +// For the "intel" complex return type, use a hidden parameter to return the result +#undef GENTPROTDOT +#define GENTPROTDOT( ftype, ch, chc, blasname ) \ +\ +BLIS_EXPORT_BLAS void PASTEF772(ch,blasname,chc) \ + ( \ + ftype* rhop, \ + const f77_int* n, \ + const ftype* x, const f77_int* incx, \ + const ftype* y, const f77_int* incy \ + ); + +INSERT_GENTPROTDOTC_BLAS( dot ) + +#endif // -- "Black sheep" dot product function prototypes -- @@ -66,4 +88,5 @@ BLIS_EXPORT_BLAS double PASTEF77(d,sdot) const float* x, const f77_int* incx, const float* y, const f77_int* incy ); + #endif diff --git a/frame/include/bli_gentfunc_macro_defs.h b/frame/include/bli_gentfunc_macro_defs.h index 82c40ecf1..011ebcdfb 100644 --- a/frame/include/bli_gentfunc_macro_defs.h +++ b/frame/include/bli_gentfunc_macro_defs.h @@ -74,17 +74,33 @@ GENTFUNCCO( scomplex, float, c, s, blasname, blisname ) \ GENTFUNCCO( dcomplex, double, z, d, blasname, blisname ) +// -- Basic one-operand macro with conjugation (real funcs only, used only for dot, ger) -- + + +#define INSERT_GENTFUNCDOTR_BLAS( blasname, blisname ) \ +\ +GENTFUNCDOT( float, s, , BLIS_NO_CONJUGATE, blasname, blisname ) \ +GENTFUNCDOT( double, d, , BLIS_NO_CONJUGATE, blasname, blisname ) + + +// -- Basic one-operand macro with conjugation (complex funcs only, used only for dot, ger) -- + + +#define INSERT_GENTFUNCDOTC_BLAS( blasname, blisname ) \ +\ +GENTFUNCDOT( scomplex, c, c, BLIS_CONJUGATE, blasname, blisname ) \ +GENTFUNCDOT( scomplex, c, u, BLIS_NO_CONJUGATE, blasname, blisname ) \ +GENTFUNCDOT( dcomplex, z, c, BLIS_CONJUGATE, blasname, blisname ) \ +GENTFUNCDOT( dcomplex, z, u, BLIS_NO_CONJUGATE, blasname, blisname ) + + // -- Basic one-operand macro with conjugation (used only for dot, ger) -- #define INSERT_GENTFUNCDOT_BLAS( blasname, blisname ) \ \ -GENTFUNCDOT( float, s, , BLIS_NO_CONJUGATE, blasname, blisname ) \ -GENTFUNCDOT( double, d, , BLIS_NO_CONJUGATE, blasname, blisname ) \ -GENTFUNCDOT( scomplex, c, c, BLIS_CONJUGATE, blasname, blisname ) \ -GENTFUNCDOT( scomplex, c, u, BLIS_NO_CONJUGATE, blasname, blisname ) \ -GENTFUNCDOT( dcomplex, z, c, BLIS_CONJUGATE, blasname, blisname ) \ -GENTFUNCDOT( dcomplex, z, u, BLIS_NO_CONJUGATE, blasname, blisname ) +INSERT_GENTFUNCDOTR_BLAS( blasname, blisname ) \ +INSERT_GENTFUNCDOTC_BLAS( blasname, blisname ) // -- Basic one-operand macro with real projection -- diff --git a/frame/include/bli_gentprot_macro_defs.h b/frame/include/bli_gentprot_macro_defs.h index f6aa70946..3db9cdc48 100644 --- a/frame/include/bli_gentprot_macro_defs.h +++ b/frame/include/bli_gentprot_macro_defs.h @@ -74,17 +74,33 @@ GENTPROTCO( scomplex, float, c, s, blasname ) \ GENTPROTCO( dcomplex, double, z, d, blasname ) +// -- Basic one-operand macro with conjugation (real funcs only, used only for dot, ger) -- + + +#define INSERT_GENTPROTDOTR_BLAS( blasname ) \ +\ +GENTPROTDOT( float, s, , blasname ) \ +GENTPROTDOT( double, d, , blasname ) + + +// -- Basic one-operand macro with conjugation (complex funcs only, used only for dot, ger) -- + + +#define INSERT_GENTPROTDOTC_BLAS( blasname ) \ +\ +GENTPROTDOT( scomplex, c, c, blasname ) \ +GENTPROTDOT( scomplex, c, u, blasname ) \ +GENTPROTDOT( dcomplex, z, c, blasname ) \ +GENTPROTDOT( dcomplex, z, u, blasname ) + + // -- Basic one-operand macro with conjugation (used only for dot, ger) -- #define INSERT_GENTPROTDOT_BLAS( blasname ) \ \ -GENTPROTDOT( float, s, , blasname ) \ -GENTPROTDOT( double, d, , blasname ) \ -GENTPROTDOT( scomplex, c, c, blasname ) \ -GENTPROTDOT( scomplex, c, u, blasname ) \ -GENTPROTDOT( dcomplex, z, c, blasname ) \ -GENTPROTDOT( dcomplex, z, u, blasname ) +INSERT_GENTPROTDOTR_BLAS( blasname ) \ +INSERT_GENTPROTDOTC_BLAS( blasname ) // -- Basic one-operand macro with real projection --