Modified Function definition for BLAS and CBLAS interfaces of DOTV and SWAPV APIs

Details: -Kernel is called directly from API call to avoid framework overhead in case of single and double precisions. -Currently these changes are applicable only for zen2 configuration. They will be enabled for zen family processors in future. -These changes improve performance of BLAS and CBLAS interfaces of API. They do not affect BLIS-specific APIs. Change-Id: I1eb7ca470ced82c3cfa8b22f2b53000d42fef96c Signed-off-by: Meghana Vankadari <Meghana.Vankadari@amd.com> AMD-Internal: [CPUPL-847,CPUPL-816]
2026-05-12 10:05:38 +00:00 · 2020-04-30 17:09:39 +05:30
parent 4ad5b1a5e6
commit 28bb28b79f
7 changed files with 642 additions and 27 deletions
--- a/frame/compat/bla_dot.c
+++ b/frame/compat/bla_dot.c
@@ -5,6 +5,7 @@
   libraries.

   Copyright (C) 2014, The University of Texas at Austin
+   Copyright (C) 2020, Advanced Micro Devices, Inc.

   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions are
@@ -86,8 +87,170 @@ ftype PASTEF772(ch,blasname,chc) \
 }

 #ifdef BLIS_ENABLE_BLAS
-INSERT_GENTFUNCDOT_BLAS( dot, dotv )
+#ifdef BLIS_CONFIG_ZEN2

+float sdot_
+     (
+       const f77_int* n,
+       const float*   x, const f77_int* incx,
+       const float*   y, const f77_int* incy
+     )
+{
+    dim_t  n0;
+    float* x0;
+    float* y0;
+    inc_t  incx0;
+    inc_t  incy0;
+    float  rho;
+
+    /* Initialize BLIS. */
+//  bli_init_auto();
+
+    /* Convert/typecast negative values of n to zero. */
+    if ( *n < 0 ) n0 = ( dim_t )0;
+    else              n0 = ( dim_t )(*n);
+
+	/* If the input increments are negative, adjust the pointers so we can
+	   use positive increments instead. */ 
+
+    if ( *incx < 0 )
+    {
+        /* The semantics of negative stride in BLAS are that the vector
+        operand be traversed in reverse order. (Another way to think
+        of this is that negative strides effectively reverse the order
+        of the vector, but without any explicit data movements.) This
+        is also how BLIS interprets negative strides. The differences
+        is that with BLAS, the caller *always* passes in the 0th (i.e.,
+        top-most or left-most) element of the vector, even when the
+        stride is negative. By contrast, in BLIS, negative strides are
+        used *relative* to the vector address as it is given. Thus, in
+        BLIS, if this backwards traversal is desired, the caller *must*
+        pass in the address to the (n-1)th (i.e., the bottom-most or
+        right-most) element along with a negative stride. */
+
+        x0    = ((float*)x) + (n0-1)*(-*incx);
+        incx0 = ( inc_t )(*incx);
+
+    }
+    else
+    {
+        x0    = ((float*)x);
+        incx0 = ( inc_t )(*incx);
+    }
+
+    if ( *incy < 0 )
+    {
+        y0    = ((float*)y) + (n0-1)*(-*incy);
+        incy0 = ( inc_t )(*incy);
+
+    }
+    else
+    {
+        y0    = ((float*)y);
+        incy0 = ( inc_t )(*incy);
+    }
+
+	/* Call BLIS kernel. */
+	bli_sdotv_zen_int10
+	(
+	  BLIS_NO_CONJUGATE,
+	  BLIS_NO_CONJUGATE,
+	  n0,
+	  x0, incx0,
+	  y0, incy0,
+	  &rho,
+	  NULL
+	);
+
+	/* Finalize BLIS. */
+//	bli_finalize_auto();
+
+	return rho;
+}
+
+double ddot_
+     (
+       const f77_int* n,
+       const double*   x, const f77_int* incx,
+       const double*   y, const f77_int* incy
+     )
+{
+	dim_t  n0;
+	double* x0;
+	double* y0;
+	inc_t  incx0;
+	inc_t  incy0;
+	double  rho;
+
+	/* Initialize BLIS. */
+//	bli_init_auto();
+
+	/* Convert/typecast negative values of n to zero. */
+    if ( *n < 0 ) n0 = ( dim_t )0;
+    else              n0 = ( dim_t )(*n);
+
+	/* If the input increments are negative, adjust the pointers so we can
+	   use positive increments instead. */ 
+
+    if ( *incx < 0 )
+    {
+        /* The semantics of negative stride in BLAS are that the vector
+        operand be traversed in reverse order. (Another way to think
+        of this is that negative strides effectively reverse the order
+        of the vector, but without any explicit data movements.) This
+        is also how BLIS interprets negative strides. The differences
+        is that with BLAS, the caller *always* passes in the 0th (i.e.,
+        top-most or left-most) element of the vector, even when the
+        stride is negative. By contrast, in BLIS, negative strides are
+        used *relative* to the vector address as it is given. Thus, in
+        BLIS, if this backwards traversal is desired, the caller *must*
+        pass in the address to the (n-1)th (i.e., the bottom-most or
+        right-most) element along with a negative stride. */
+
+        x0    = ((double*)x) + (n0-1)*(-*incx);
+        incx0 = ( inc_t )(*incx);
+
+    }
+    else
+    {
+        x0    = ((double*)x);
+        incx0 = ( inc_t )(*incx);
+    }
+
+    if ( *incy < 0 )
+    {
+        y0    = ((double*)y) + (n0-1)*(-*incy);
+        incy0 = ( inc_t )(*incy);
+
+    }
+    else
+    {
+        y0    = ((double*)y);
+        incy0 = ( inc_t )(*incy);
+    }
+
+	/* Call BLIS kernel. */
+	bli_ddotv_zen_int10
+	(
+	  BLIS_NO_CONJUGATE,
+	  BLIS_NO_CONJUGATE,
+	  n0,
+	  x0, incx0,
+	  y0, incy0,
+	  &rho,
+	  NULL
+	);
+
+	/* Finalize BLIS. */
+//	bli_finalize_auto();
+
+	return rho;
+}
+
+INSERT_GENTFUNCDOT_BLAS_ZEN2( dot, dotv )
+#else
+INSERT_GENTFUNCDOT_BLAS( dot, dotv )
+#endif

 // -- "Black sheep" dot product function definitions --

--- a/frame/compat/bla_swap.c
+++ b/frame/compat/bla_swap.c
@@ -5,6 +5,7 @@
   libraries.

   Copyright (C) 2014, The University of Texas at Austin
+   Copyright (C) 2020, Advanced Micro Devices, Inc.

   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions are
@@ -48,38 +49,189 @@ void PASTEF77(ch,blasname) \
       ftype*   y, const f77_int* incy  \
     ) \
 { \
-	dim_t  n0; \
-	ftype* x0; \
-	ftype* y0; \
-	inc_t  incx0; \
-	inc_t  incy0; \
+    dim_t  n0; \
+    ftype* x0; \
+    ftype* y0; \
+    inc_t  incx0; \
+    inc_t  incy0; \
 \
-	/* Initialize BLIS. */ \
-	bli_init_auto(); \
+    /* Initialize BLIS. */ \
+    bli_init_auto(); \
 \
-	/* Convert/typecast negative values of n to zero. */ \
-	bli_convert_blas_dim1( *n, n0 ); \
+    /* Convert/typecast negative values of n to zero. */ \
+    bli_convert_blas_dim1( *n, n0 ); \
 \
-	/* If the input increments are negative, adjust the pointers so we can
-	   use positive increments instead. */ \
-	bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
-	bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
+    /* If the input increments are negative, adjust the pointers so we can
+       use positive increments instead. */ \
+    bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
+    bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
 \
-	/* Call BLIS interface. */ \
-	PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
-	( \
-	  n0, \
-	  x0, incx0, \
-	  y0, incy0, \
-	  NULL, \
-	  NULL  \
-	); \
+    /* Call BLIS interface. */ \
+    PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
+    ( \
+      n0, \
+      x0, incx0, \
+      y0, incy0, \
+      NULL, \
+      NULL  \
+    ); \
 \
-	/* Finalize BLIS. */ \
-	bli_finalize_auto(); \
+    /* Finalize BLIS. */ \
+    bli_finalize_auto(); \
 }

 #ifdef BLIS_ENABLE_BLAS
+#ifdef BLIS_CONFIG_ZEN2
+
+void sswap_
+     (
+       const f77_int* n,
+       float*   x, const f77_int* incx,
+       float*   y, const f77_int* incy
+     )
+{
+    dim_t  n0;
+    float* x0;
+    float* y0;
+    inc_t  incx0;
+    inc_t  incy0;
+
+    /* Initialize BLIS. */
+//  bli_init_auto();
+
+    /* Convert/typecast negative values of n to zero. */
+    if ( *n < 0 ) n0 = ( dim_t )0;
+    else              n0 = ( dim_t )(*n);
+
+    /* If the input increments are negative, adjust the pointers so we can
+       use positive increments instead. */
+    if ( *incx < 0 )
+    {
+        /* The semantics of negative stride in BLAS are that the vector
+        operand be traversed in reverse order. (Another way to think
+        of this is that negative strides effectively reverse the order
+        of the vector, but without any explicit data movements.) This
+        is also how BLIS interprets negative strides. The differences
+        is that with BLAS, the caller *always* passes in the 0th (i.e.,
+        top-most or left-most) element of the vector, even when the
+        stride is negative. By contrast, in BLIS, negative strides are
+        used *relative* to the vector address as it is given. Thus, in
+        BLIS, if this backwards traversal is desired, the caller *must*
+        pass in the address to the (n-1)th (i.e., the bottom-most or
+        right-most) element along with a negative stride. */
+
+        x0    = (x) + (n0-1)*(-*incx);
+        incx0 = ( inc_t )(*incx);
+
+    }
+    else
+    {
+        x0    = (x);
+        incx0 = ( inc_t )(*incx);
+    }
+
+    if ( *incy < 0 )
+    {
+        y0    = (y) + (n0-1)*(-*incy);
+        incy0 = ( inc_t )(*incy);
+
+    }
+    else
+    {
+        y0    = (y);
+        incy0 = ( inc_t )(*incy);
+    }
+
+
+    /* Call BLIS kernel */
+    bli_sswapv_zen_int8
+    (
+        n0,
+        x0, incx0,
+        y0, incy0,
+        NULL
+    );
+
+    /* Finalize BLIS. */
+//    bli_finalize_auto();
+}
+
+void dswap_
+     (
+       const f77_int* n,
+       double*   x, const f77_int* incx,
+       double*   y, const f77_int* incy
+     )
+{
+    dim_t  n0;
+    double* x0;
+    double* y0;
+    inc_t  incx0;
+    inc_t  incy0;
+
+    /* Initialize BLIS. */
+//  bli_init_auto();
+
+    /* Convert/typecast negative values of n to zero. */
+    if ( *n < 0 ) n0 = ( dim_t )0;
+    else              n0 = ( dim_t )(*n);
+
+    /* If the input increments are negative, adjust the pointers so we can
+       use positive increments instead. */
+    if ( *incx < 0 )
+    {
+        /* The semantics of negative stride in BLAS are that the vector
+        operand be traversed in reverse order. (Another way to think
+        of this is that negative strides effectively reverse the order
+        of the vector, but without any explicit data movements.) This
+        is also how BLIS interprets negative strides. The differences
+        is that with BLAS, the caller *always* passes in the 0th (i.e.,
+        top-most or left-most) element of the vector, even when the
+        stride is negative. By contrast, in BLIS, negative strides are
+        used *relative* to the vector address as it is given. Thus, in
+        BLIS, if this backwards traversal is desired, the caller *must*
+        pass in the address to the (n-1)th (i.e., the bottom-most or
+        right-most) element along with a negative stride. */
+
+        x0    = (x) + (n0-1)*(-*incx);
+        incx0 = ( inc_t )(*incx);
+
+    }
+    else
+    {
+        x0    = (x);
+        incx0 = ( inc_t )(*incx);
+    }
+
+    if ( *incy < 0 )
+    {
+        y0    = (y) + (n0-1)*(-*incy);
+        incy0 = ( inc_t )(*incy);
+
+    }
+    else
+    {
+        y0    = (y);
+        incy0 = ( inc_t )(*incy);
+    }
+
+
+    /* Call BLIS kernel */
+    bli_dswapv_zen_int8
+    (
+        n0,
+        x0, incx0,
+        y0, incy0,
+        NULL
+    );
+
+    /* Finalize BLIS. */
+//    bli_finalize_auto();
+}
+
+INSERT_GENTFUNC_BLAS_ZEN2( swap, swapv )
+
+#else
 INSERT_GENTFUNC_BLAS( swap, swapv )
 #endif
-
+#endif
--- a/frame/compat/cblas/src/cblas_ddot.c
+++ b/frame/compat/cblas/src/cblas_ddot.c
@@ -7,6 +7,8 @@
 * It calls the fortran wrapper before calling ddot.
 *
 * Written by Keita Teranishi.  2/11/1998
+ * 
+ * Copyright (C) 2020, Advanced Micro Devices, Inc.
 *
 */
 #include "cblas.h"
@@ -22,7 +24,80 @@ double cblas_ddot( f77_int N, const double *X,
   #define F77_incX incX
   #define F77_incY incY
 #endif
+#ifdef BLIS_CONFIG_ZEN2
+        dim_t  n0;
+        double* x0;
+        double* y0;
+        inc_t  incx0;
+        inc_t  incy0;
+
+        /* Initialize BLIS. */
+//      bli_init_auto();
+
+        /* Convert/typecast negative values of n to zero. */
+    if ( F77_N < 0 ) n0 = ( dim_t )0;
+    else              n0 = ( dim_t )(F77_N);
+
+        /* If the input increments are negative, adjust the pointers so we can
+           use positive increments instead. */
+
+    if ( F77_incX < 0 )
+    {
+        /* The semantics of negative stride in BLAS are that the vector
+        operand be traversed in reverse order. (Another way to think
+        of this is that negative strides effectively reverse the order
+        of the vector, but without any explicit data movements.) This
+        is also how BLIS interprets negative strides. The differences
+        is that with BLAS, the caller *always* passes in the 0th (i.e.,
+        top-most or left-most) element of the vector, even when the
+        stride is negative. By contrast, in BLIS, negative strides are
+        used *relative* to the vector address as it is given. Thus, in
+        BLIS, if this backwards traversal is desired, the caller *must*
+        pass in the address to the (n-1)th (i.e., the bottom-most or
+        right-most) element along with a negative stride. */
+
+        x0    = ((double*)X) + (n0-1)*(-F77_incX);
+        incx0 = ( inc_t )(F77_incX);
+
+    }
+    else
+    {
+        x0    = ((double*)X);
+        incx0 = ( inc_t )(F77_incX);
+    }
+
+    if ( F77_incY < 0 )
+    {
+        y0    = ((double*)Y) + (n0-1)*(-F77_incY);
+        incy0 = ( inc_t )(F77_incY);
+
+    }
+    else
+    {
+        y0    = ((double*)Y);
+        incy0 = ( inc_t )(F77_incY);
+    }
+
+        /* Call BLIS kernel. */
+        bli_ddotv_zen_int10
+        (
+          BLIS_NO_CONJUGATE,
+          BLIS_NO_CONJUGATE,
+          n0,
+          x0, incx0,
+          y0, incy0,
+          &dot,
+          NULL
+        );
+
+        /* Finalize BLIS. */
+//      bli_finalize_auto();
+
+        return dot;
+
+#else
   F77_ddot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot);
   return dot;
+#endif
 }   
 #endif
--- a/frame/compat/cblas/src/cblas_dswap.c
+++ b/frame/compat/cblas/src/cblas_dswap.c
@@ -7,6 +7,8 @@
 *
 * Written by Keita Teranishi.  2/11/1998
 *
+ * Copyright (C) 2020, Advanced Micro Devices, Inc.
+ *
 */
 #include "cblas.h"
 #include "cblas_f77.h"
@@ -20,6 +22,74 @@ void cblas_dswap( f77_int N, double *X, f77_int incX, double *Y,
   #define F77_incX incX
   #define F77_incY incY
 #endif
+
+#ifdef BLIS_CONFIG_ZEN2
+    dim_t  n0;
+    double* x0;
+    double* y0;
+    inc_t  incx0;
+    inc_t  incy0;
+
+    /* Initialize BLIS. */
+//  bli_init_auto();
+
+    /* Convert/typecast negative values of n to zero. */
+    if ( F77_N < 0 ) n0 = ( dim_t )0;
+    else              n0 = ( dim_t )(F77_N);
+
+    /* If the input increments are negative, adjust the pointers so we can
+       use positive increments instead. */
+    if ( F77_incX < 0 )
+    {
+        /* The semantics of negative stride in BLAS are that the vector
+        operand be traversed in reverse order. (Another way to think
+        of this is that negative strides effectively reverse the order
+        of the vector, but without any explicit data movements.) This
+        is also how BLIS interprets negative strides. The differences
+        is that with BLAS, the caller *always* passes in the 0th (i.e.,
+        top-most or left-most) element of the vector, even when the
+        stride is negative. By contrast, in BLIS, negative strides are
+        used *relative* to the vector address as it is given. Thus, in
+        BLIS, if this backwards traversal is desired, the caller *must*
+        pass in the address to the (n-1)th (i.e., the bottom-most or
+        right-most) element along with a negative stride. */
+
+        x0    = (X) + (n0-1)*(-F77_incX);
+        incx0 = ( inc_t )(F77_incX);
+
+    }
+    else
+    {
+        x0    = (X);
+        incx0 = ( inc_t )(F77_incX);
+    }
+
+    if ( F77_incY < 0 )
+    {
+        y0    = (Y) + (n0-1)*(-F77_incY);
+        incy0 = ( inc_t )(F77_incY);
+
+    }
+    else
+    {
+        y0    = (Y);
+        incy0 = ( inc_t )(F77_incY);
+    }
+
+
+    /* Call BLIS kernel */
+    bli_dswapv_zen_int8
+    (
+        n0,
+        x0, incx0,
+        y0, incy0,
+        NULL
+    );
+
+    /* Finalize BLIS. */
+//    bli_finalize_auto();
+#else
   F77_dswap( &F77_N, X, &F77_incX, Y, &F77_incY);
+#endif
 }
 #endif
--- a/frame/compat/cblas/src/cblas_sdot.c
+++ b/frame/compat/cblas/src/cblas_sdot.c
@@ -8,6 +8,8 @@
 *
 * Written by Keita Teranishi.  2/11/1998
 *
+ * Copyright (C) 2020, Advanced Micro Devices, Inc.
+ *
 */
 #include "cblas.h"
 #include "cblas_f77.h"
@@ -22,7 +24,79 @@ float cblas_sdot( f77_int N, const float *X,
   #define F77_incX incX
   #define F77_incY incY
 #endif
+#ifdef BLIS_CONFIG_ZEN2
+        dim_t  n0;
+        float* x0;
+        float* y0;
+        inc_t  incx0;
+        inc_t  incy0;
+
+        /* Initialize BLIS. */
+//      bli_init_auto();
+
+        /* Convert/typecast negative values of n to zero. */
+    if ( F77_N < 0 ) n0 = ( dim_t )0;
+    else              n0 = ( dim_t )(F77_N);
+
+        /* If the input increments are negative, adjust the pointers so we can
+           use positive increments instead. */
+
+    if ( F77_incX < 0 )
+    {
+        /* The semantics of negative stride in BLAS are that the vector
+        operand be traversed in reverse order. (Another way to think
+        of this is that negative strides effectively reverse the order
+        of the vector, but without any explicit data movements.) This
+        is also how BLIS interprets negative strides. The differences
+        is that with BLAS, the caller *always* passes in the 0th (i.e.,
+        top-most or left-most) element of the vector, even when the
+        stride is negative. By contrast, in BLIS, negative strides are
+        used *relative* to the vector address as it is given. Thus, in
+        BLIS, if this backwards traversal is desired, the caller *must*
+        pass in the address to the (n-1)th (i.e., the bottom-most or
+        right-most) element along with a negative stride. */
+
+        x0    = ((float*)X) + (n0-1)*(-F77_incX);
+        incx0 = ( inc_t )(F77_incX);
+
+    }
+    else
+    {
+        x0    = ((float*)X);
+        incx0 = ( inc_t )(F77_incX);
+    }
+
+    if ( F77_incY < 0 )
+    {
+        y0    = ((float*)Y) + (n0-1)*(-F77_incY);
+        incy0 = ( inc_t )(F77_incY);
+
+    }
+    else
+    {
+        y0    = ((float*)Y);
+        incy0 = ( inc_t )(F77_incY);
+    }
+
+        /* Call BLIS kernel. */
+        bli_sdotv_zen_int10
+        (
+          BLIS_NO_CONJUGATE,
+          BLIS_NO_CONJUGATE,
+          n0,
+          x0, incx0,
+          y0, incy0,
+          &dot,
+          NULL
+        );
+
+        /* Finalize BLIS. */
+//      bli_finalize_auto();
+
+        return dot;
+#else
   F77_sdot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot);
   return dot;
+#endif
 }   
 #endif
--- a/frame/compat/cblas/src/cblas_sswap.c
+++ b/frame/compat/cblas/src/cblas_sswap.c
@@ -7,7 +7,9 @@
 *
 * Written by Keita Teranishi.  2/11/1998
 *
- */
+ * Copyright (C) 2020, Advanced Micro Devices, Inc.
+ *
+*/
 #include "cblas.h"
 #include "cblas_f77.h"
 void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y,
@@ -20,6 +22,76 @@ void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y,
   #define F77_incX incX
   #define F77_incY incY
 #endif
+
+#ifdef BLIS_CONFIG_ZEN2
+
+    dim_t  n0;
+    float* x0;
+    float* y0;
+    inc_t  incx0;
+    inc_t  incy0;
+
+    /* Initialize BLIS. */
+//  bli_init_auto();
+
+    /* Convert/typecast negative values of n to zero. */
+    if ( F77_N < 0 ) n0 = ( dim_t )0;
+    else              n0 = ( dim_t )(F77_N);
+
+    /* If the input increments are negative, adjust the pointers so we can
+       use positive increments instead. */
+    if ( F77_incX < 0 )
+    {
+        /* The semantics of negative stride in BLAS are that the vector
+        operand be traversed in reverse order. (Another way to think
+        of this is that negative strides effectively reverse the order
+        of the vector, but without any explicit data movements.) This
+        is also how BLIS interprets negative strides. The differences
+        is that with BLAS, the caller *always* passes in the 0th (i.e.,
+        top-most or left-most) element of the vector, even when the
+        stride is negative. By contrast, in BLIS, negative strides are
+        used *relative* to the vector address as it is given. Thus, in
+        BLIS, if this backwards traversal is desired, the caller *must*
+        pass in the address to the (n-1)th (i.e., the bottom-most or
+        right-most) element along with a negative stride. */
+
+        x0    = (X) + (n0-1)*(-F77_incX);
+        incx0 = ( inc_t )(F77_incX);
+
+    }
+    else
+    {
+        x0    = (X);
+        incx0 = ( inc_t )(F77_incX);
+    }
+
+    if ( F77_incY < 0 )
+    {
+        y0    = (Y) + (n0-1)*(-F77_incY);
+        incy0 = ( inc_t )(F77_incY);
+
+    }
+    else
+    {
+        y0    = (Y);
+        incy0 = ( inc_t )(F77_incY);
+    }
+
+
+    /* Call BLIS kernel */
+    bli_sswapv_zen_int8
+    (
+        n0,
+        x0, incx0,
+        y0, incy0,
+        NULL
+    );
+
+    /* Finalize BLIS. */
+//    bli_finalize_auto();
+
+#else
   F77_sswap( &F77_N, X, &F77_incX, Y, &F77_incY);
+#endif
 }
 #endif
--- a/frame/include/bli_gentfunc_macro_defs.h
+++ b/frame/include/bli_gentfunc_macro_defs.h
@@ -56,10 +56,12 @@ GENTFUNC( double,   d, blasname, blisname ) \
 GENTFUNC( scomplex, c, blasname, blisname ) \
 GENTFUNC( dcomplex, z, blasname, blisname )

+
 #define INSERT_GENTFUNC_BLAS_ZEN2( blasname, blisname ) \
 \
 GENTFUNC( scomplex, c, blasname, blisname ) \
 GENTFUNC( dcomplex, z, blasname, blisname )
+
 // -- Basic one-operand macro with real domain only --


@@ -80,6 +82,13 @@ GENTFUNCCO( dcomplex, double, z, d, blasname, blisname )

 // -- Basic one-operand macro with conjugation (used only for dot, ger) --

+#define INSERT_GENTFUNCDOT_BLAS_ZEN2( blasname, blisname ) \
+\
+GENTFUNCDOT( scomplex, c, c, BLIS_CONJUGATE,    blasname, blisname ) \
+GENTFUNCDOT( scomplex, c, u, BLIS_NO_CONJUGATE, blasname, blisname ) \
+GENTFUNCDOT( dcomplex, z, c, BLIS_CONJUGATE,    blasname, blisname ) \
+GENTFUNCDOT( dcomplex, z, u, BLIS_NO_CONJUGATE, blasname, blisname )
+

 #define INSERT_GENTFUNCDOT_BLAS( blasname, blisname ) \
 \