diff --git a/frame/3/bli_l3_tapi.c b/frame/3/bli_l3_tapi.c index 4eeba1971..7b7f758ab 100644 --- a/frame/3/bli_l3_tapi.c +++ b/frame/3/bli_l3_tapi.c @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -64,7 +65,11 @@ void PASTEMAC2(ch,opname,EX_SUF) \ \ const num_t dt = PASTEMAC(ch,type); \ \ - obj_t alphao, ao, bo, betao, co; \ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t bo = BLIS_OBJECT_INITIALIZER; \ + obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m_a, n_a; \ dim_t m_b, n_b; \ @@ -72,12 +77,12 @@ void PASTEMAC2(ch,opname,EX_SUF) \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ bli_set_dims_with_trans( transb, k, n, &m_b, &n_b ); \ \ - bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_init_finish_1x1( dt, alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt, beta, &betao ); \ \ - bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ - bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ - bli_obj_create_with_attached_buffer( dt, m, n, c, rs_c, cs_c, &co ); \ + bli_obj_init_finish( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ + bli_obj_init_finish( dt, m, n, c, rs_c, cs_c, &co ); \ \ bli_obj_set_conjtrans( transa, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ @@ -122,7 +127,11 @@ void PASTEMAC2(ch,opname,EX_SUF) \ \ const num_t dt = PASTEMAC(ch,type); \ \ - obj_t alphao, ao, bo, betao, co; \ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t bo = BLIS_OBJECT_INITIALIZER; \ + obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t mn_a; \ dim_t m_b, n_b; \ @@ -130,12 +139,12 @@ void PASTEMAC2(ch,opname,EX_SUF) \ bli_set_dim_with_side( side, m, n, &mn_a ); \ bli_set_dims_with_trans( transb, m, n, &m_b, &n_b ); \ \ - bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_init_finish_1x1( dt, alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt, beta, &betao ); \ \ - bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ - bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ - bli_obj_create_with_attached_buffer( dt, m, n, c, rs_c, cs_c, &co ); \ + bli_obj_init_finish( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ + bli_obj_init_finish( dt, m, n, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_conj( conja, &ao ); \ @@ -183,17 +192,20 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt_r = PASTEMAC(chr,type); \ const num_t dt = PASTEMAC(ch,type); \ \ - obj_t alphao, ao, betao, co; \ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m_a, n_a; \ \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ \ - bli_obj_create_1x1_with_attached_buffer( dt_r, alpha, &alphao ); \ - bli_obj_create_1x1_with_attached_buffer( dt_r, beta, &betao ); \ + bli_obj_init_finish_1x1( dt_r, alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt_r, beta, &betao ); \ \ - bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ - bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ + bli_obj_init_finish( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploc, &co ); \ bli_obj_set_conjtrans( transa, &ao ); \ @@ -239,7 +251,11 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt_r = PASTEMAC(chr,type); \ const num_t dt = PASTEMAC(ch,type); \ \ - obj_t alphao, ao, bo, betao, co; \ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t bo = BLIS_OBJECT_INITIALIZER; \ + obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m_a, n_a; \ dim_t m_b, n_b; \ @@ -247,12 +263,12 @@ void PASTEMAC2(ch,opname,EX_SUF) \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ bli_set_dims_with_trans( transb, m, k, &m_b, &n_b ); \ \ - bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_1x1_with_attached_buffer( dt_r, beta, &betao ); \ + bli_obj_init_finish_1x1( dt, alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt_r, beta, &betao ); \ \ - bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ - bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ - bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ + bli_obj_init_finish( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ + bli_obj_init_finish( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploc, &co ); \ bli_obj_set_conjtrans( transa, &ao ); \ @@ -297,17 +313,20 @@ void PASTEMAC2(ch,opname,EX_SUF) \ \ const num_t dt = PASTEMAC(ch,type); \ \ - obj_t alphao, ao, betao, co; \ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m_a, n_a; \ \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ \ - bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_init_finish_1x1( dt, alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt, beta, &betao ); \ \ - bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ - bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ + bli_obj_init_finish( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploc, &co ); \ bli_obj_set_conjtrans( transa, &ao ); \ @@ -352,7 +371,11 @@ void PASTEMAC2(ch,opname,EX_SUF) \ \ const num_t dt = PASTEMAC(ch,type); \ \ - obj_t alphao, ao, bo, betao, co; \ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t bo = BLIS_OBJECT_INITIALIZER; \ + obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m_a, n_a; \ dim_t m_b, n_b; \ @@ -360,12 +383,12 @@ void PASTEMAC2(ch,opname,EX_SUF) \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ bli_set_dims_with_trans( transb, m, k, &m_b, &n_b ); \ \ - bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_init_finish_1x1( dt, alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt, beta, &betao ); \ \ - bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ - bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ - bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ + bli_obj_init_finish( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ + bli_obj_init_finish( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploc, &co ); \ bli_obj_set_conjtrans( transa, &ao ); \ @@ -414,7 +437,11 @@ void PASTEMAC2(ch,opname,EX_SUF) \ \ const num_t dt = PASTEMAC(ch,type); \ \ - obj_t alphao, ao, bo, betao, co; \ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t bo = BLIS_OBJECT_INITIALIZER; \ + obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t mn_a; \ dim_t m_b, n_b; \ @@ -422,12 +449,12 @@ void PASTEMAC2(ch,opname,EX_SUF) \ bli_set_dim_with_side( side, m, n, &mn_a ); \ bli_set_dims_with_trans( transb, m, n, &m_b, &n_b ); \ \ - bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_init_finish_1x1( dt, alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt, beta, &betao ); \ \ - bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ - bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ - bli_obj_create_with_attached_buffer( dt, m, n, c, rs_c, cs_c, &co ); \ + bli_obj_init_finish( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ + bli_obj_init_finish( dt, m, n, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_diag( diaga, &ao ); \ @@ -475,16 +502,18 @@ void PASTEMAC2(ch,opname,EX_SUF) \ \ const num_t dt = PASTEMAC(ch,type); \ \ - obj_t alphao, ao, bo; \ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t bo = BLIS_OBJECT_INITIALIZER; \ \ dim_t mn_a; \ \ bli_set_dim_with_side( side, m, n, &mn_a ); \ \ - bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt, alpha, &alphao ); \ \ - bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ - bli_obj_create_with_attached_buffer( dt, m, n, b, rs_b, cs_b, &bo ); \ + bli_obj_init_finish( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m, n, b, rs_b, cs_b, &bo ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_diag( diaga, &ao ); \ diff --git a/frame/base/bli_param_map.c b/frame/base/bli_param_map.c index de877f686..d20eece43 100644 --- a/frame/base/bli_param_map.c +++ b/frame/base/bli_param_map.c @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -98,61 +99,8 @@ void bli_param_map_blis_to_netlib_machval( machval_t machval, char* blas_machval // --- BLAS/LAPACK to BLIS mappings -------------------------------------------- -void bli_param_map_netlib_to_blis_side( char side, side_t* blis_side ) -{ - if ( side == 'l' || side == 'L' ) *blis_side = BLIS_LEFT; - else if ( side == 'r' || side == 'R' ) *blis_side = BLIS_RIGHT; - else - { - // Instead of reporting an error to the framework, default to - // an arbitrary value. This is needed because this function is - // called by the BLAS compatibility layer AFTER it has already - // checked errors and called xerbla(). If the application wants - // to override the BLAS compatibility layer's xerbla--which - // responds to errors with abort()--we need to also NOT call - // abort() here, since either way it has already been dealt - // with. - //bli_check_error_code( BLIS_INVALID_SIDE ); - *blis_side = BLIS_LEFT; - } -} - -void bli_param_map_netlib_to_blis_uplo( char uplo, uplo_t* blis_uplo ) -{ - if ( uplo == 'l' || uplo == 'L' ) *blis_uplo = BLIS_LOWER; - else if ( uplo == 'u' || uplo == 'U' ) *blis_uplo = BLIS_UPPER; - else - { - // See comment for bli_param_map_netlib_to_blis_side() above. - //bli_check_error_code( BLIS_INVALID_UPLO ); - *blis_uplo = BLIS_LOWER; - } -} - -void bli_param_map_netlib_to_blis_trans( char trans, trans_t* blis_trans ) -{ - if ( trans == 'n' || trans == 'N' ) *blis_trans = BLIS_NO_TRANSPOSE; - else if ( trans == 't' || trans == 'T' ) *blis_trans = BLIS_TRANSPOSE; - else if ( trans == 'c' || trans == 'C' ) *blis_trans = BLIS_CONJ_TRANSPOSE; - else - { - // See comment for bli_param_map_netlib_to_blis_side() above. - //bli_check_error_code( BLIS_INVALID_TRANS ); - *blis_trans = BLIS_NO_TRANSPOSE; - } -} - -void bli_param_map_netlib_to_blis_diag( char diag, diag_t* blis_diag ) -{ - if ( diag == 'n' || diag == 'N' ) *blis_diag = BLIS_NONUNIT_DIAG; - else if ( diag == 'u' || diag == 'U' ) *blis_diag = BLIS_UNIT_DIAG; - else - { - // See comment for bli_param_map_netlib_to_blis_side() above. - //bli_check_error_code( BLIS_INVALID_DIAG ); - *blis_diag = BLIS_NONUNIT_DIAG; - } -} +// NOTE: These functions were converted into static functions. Please see this +// file's corresponding header for those definitions. // --- BLIS char to BLIS mappings ---------------------------------------------- diff --git a/frame/base/bli_param_map.h b/frame/base/bli_param_map.h index 829fe808c..8ec4188ec 100644 --- a/frame/base/bli_param_map.h +++ b/frame/base/bli_param_map.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -44,10 +45,64 @@ void bli_param_map_blis_to_netlib_machval( machval_t machval, char* blas_machval // --- BLAS/LAPACK to BLIS mappings -------------------------------------------- -void bli_param_map_netlib_to_blis_side( char side, side_t* blis_side ); -void bli_param_map_netlib_to_blis_uplo( char uplo, uplo_t* blis_uplo ); -void bli_param_map_netlib_to_blis_trans( char trans, trans_t* blis_trans ); -void bli_param_map_netlib_to_blis_diag( char diag, diag_t* blis_diag ); +// NOTE: These static functions were converted from regular functions in order +// to reduce function call overhead within the BLAS compatibility layer. + +static void bli_param_map_netlib_to_blis_side( char side, side_t* blis_side ) +{ + if ( side == 'l' || side == 'L' ) *blis_side = BLIS_LEFT; + else if ( side == 'r' || side == 'R' ) *blis_side = BLIS_RIGHT; + else + { + // Instead of reporting an error to the framework, default to + // an arbitrary value. This is needed because this function is + // called by the BLAS compatibility layer AFTER it has already + // checked errors and called xerbla(). If the application wants + // to override the BLAS compatibility layer's xerbla--which + // responds to errors with abort()--we need to also NOT call + // abort() here, since either way it has already been dealt + // with. + //bli_check_error_code( BLIS_INVALID_SIDE ); + *blis_side = BLIS_LEFT; + } +} + +static void bli_param_map_netlib_to_blis_uplo( char uplo, uplo_t* blis_uplo ) +{ + if ( uplo == 'l' || uplo == 'L' ) *blis_uplo = BLIS_LOWER; + else if ( uplo == 'u' || uplo == 'U' ) *blis_uplo = BLIS_UPPER; + else + { + // See comment for bli_param_map_netlib_to_blis_side() above. + //bli_check_error_code( BLIS_INVALID_UPLO ); + *blis_uplo = BLIS_LOWER; + } +} + +static void bli_param_map_netlib_to_blis_trans( char trans, trans_t* blis_trans ) +{ + if ( trans == 'n' || trans == 'N' ) *blis_trans = BLIS_NO_TRANSPOSE; + else if ( trans == 't' || trans == 'T' ) *blis_trans = BLIS_TRANSPOSE; + else if ( trans == 'c' || trans == 'C' ) *blis_trans = BLIS_CONJ_TRANSPOSE; + else + { + // See comment for bli_param_map_netlib_to_blis_side() above. + //bli_check_error_code( BLIS_INVALID_TRANS ); + *blis_trans = BLIS_NO_TRANSPOSE; + } +} + +static void bli_param_map_netlib_to_blis_diag( char diag, diag_t* blis_diag ) +{ + if ( diag == 'n' || diag == 'N' ) *blis_diag = BLIS_NONUNIT_DIAG; + else if ( diag == 'u' || diag == 'U' ) *blis_diag = BLIS_UNIT_DIAG; + else + { + // See comment for bli_param_map_netlib_to_blis_side() above. + //bli_check_error_code( BLIS_INVALID_DIAG ); + *blis_diag = BLIS_NONUNIT_DIAG; + } +} // --- BLIS char to BLIS mappings ---------------------------------------------- diff --git a/frame/compat/bla_gemm.c b/frame/compat/bla_gemm.c index 1effececa..e04e48cf5 100644 --- a/frame/compat/bla_gemm.c +++ b/frame/compat/bla_gemm.c @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -38,6 +39,9 @@ // // Define BLAS-to-BLIS interfaces. // + +#ifdef BLIS_BLAS3_CALLS_TAPI + #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ @@ -84,7 +88,7 @@ void PASTEF77(ch,blasname) \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ bli_param_map_netlib_to_blis_trans( *transb, &blis_transb ); \ \ - /* Convert/typecast negative values of m, n, and k to zero. */ \ + /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *n, n0 ); \ bli_convert_blas_dim1( *k, k0 ); \ @@ -118,6 +122,105 @@ void PASTEF77(ch,blasname) \ bli_finalize_auto(); \ } +#else + +#undef GENTFUNC +#define GENTFUNC( ftype, ch, blasname, blisname ) \ +\ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + trans_t blis_transa; \ + trans_t blis_transb; \ + dim_t m0, n0, k0; \ +\ + /* Initialize BLIS. */ \ + bli_init_auto(); \ +\ + /* Perform BLAS parameter checking. */ \ + PASTEBLACHK(blasname) \ + ( \ + MKSTR(ch), \ + MKSTR(blasname), \ + transa, \ + transb, \ + m, \ + n, \ + k, \ + lda, \ + ldb, \ + ldc \ + ); \ +\ + /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ + bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ + bli_param_map_netlib_to_blis_trans( *transb, &blis_transb ); \ +\ + /* Typecast BLAS integers to BLIS integers. */ \ + bli_convert_blas_dim1( *m, m0 ); \ + bli_convert_blas_dim1( *n, n0 ); \ + bli_convert_blas_dim1( *k, k0 ); \ +\ + /* Set the row and column strides of the matrix operands. */ \ + const inc_t rs_a = 1; \ + const inc_t cs_a = *lda; \ + const inc_t rs_b = 1; \ + const inc_t cs_b = *ldb; \ + const inc_t rs_c = 1; \ + const inc_t cs_c = *ldc; \ +\ + const num_t dt = PASTEMAC(ch,type); \ +\ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t bo = BLIS_OBJECT_INITIALIZER; \ + obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t co = BLIS_OBJECT_INITIALIZER; \ +\ + dim_t m0_a, n0_a; \ + dim_t m0_b, n0_b; \ +\ + bli_set_dims_with_trans( blis_transa, m0, k0, &m0_a, &n0_a ); \ + bli_set_dims_with_trans( blis_transb, k0, n0, &m0_b, &n0_b ); \ +\ + bli_obj_init_finish_1x1( dt, (ftype*)alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt, (ftype*)beta, &betao ); \ +\ + bli_obj_init_finish( dt, m0_a, n0_a, (ftype*)a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m0_b, n0_b, (ftype*)b, rs_b, cs_b, &bo ); \ + bli_obj_init_finish( dt, m0, n0, (ftype*)c, rs_c, cs_c, &co ); \ +\ + bli_obj_set_conjtrans( blis_transa, &ao ); \ + bli_obj_set_conjtrans( blis_transb, &bo ); \ +\ + PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ + ( \ + &alphao, \ + &ao, \ + &bo, \ + &betao, \ + &co, \ + NULL, \ + NULL \ + ); \ +\ + /* Finalize BLIS. */ \ + bli_finalize_auto(); \ +} + +#endif + #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( gemm, gemm ) #endif diff --git a/frame/compat/bla_hemm.c b/frame/compat/bla_hemm.c index 88e9c8b55..79ccd9a2c 100644 --- a/frame/compat/bla_hemm.c +++ b/frame/compat/bla_hemm.c @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -38,6 +39,9 @@ // // Define BLAS-to-BLIS interfaces. // + +#ifdef BLIS_BLAS3_CALLS_TAPI + #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ @@ -116,6 +120,110 @@ void PASTEF77(ch,blasname) \ bli_finalize_auto(); \ } +#else + +#undef GENTFUNCCO +#define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ +\ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + side_t blis_side; \ + uplo_t blis_uploa; \ + dim_t m0, n0; \ +\ + /* Initialize BLIS. */ \ + bli_init_auto(); \ +\ + /* Perform BLAS parameter checking. */ \ + PASTEBLACHK(blasname) \ + ( \ + MKSTR(ch), \ + MKSTR(blasname), \ + side, \ + uploa, \ + m, \ + n, \ + lda, \ + ldb, \ + ldc \ + ); \ +\ + /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ + bli_param_map_netlib_to_blis_side( *side, &blis_side ); \ + bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ +\ + /* Convert/typecast negative values of m and n to zero. */ \ + bli_convert_blas_dim1( *m, m0 ); \ + bli_convert_blas_dim1( *n, n0 ); \ +\ + /* Set the row and column strides of the matrix operands. */ \ + const inc_t rs_a = 1; \ + const inc_t cs_a = *lda; \ + const inc_t rs_b = 1; \ + const inc_t cs_b = *ldb; \ + const inc_t rs_c = 1; \ + const inc_t cs_c = *ldc; \ +\ + const num_t dt = PASTEMAC(ch,type); \ +\ + const conj_t conja = BLIS_NO_CONJUGATE; \ + const trans_t transb = BLIS_NO_TRANSPOSE; \ + const struc_t struca = BLIS_HERMITIAN; \ +\ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t bo = BLIS_OBJECT_INITIALIZER; \ + obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t co = BLIS_OBJECT_INITIALIZER; \ +\ + dim_t mn0_a; \ + dim_t m0_b, n0_b; \ +\ + bli_set_dim_with_side( blis_side, m0, n0, &mn0_a ); \ + bli_set_dims_with_trans( transb, m0, n0, &m0_b, &n0_b ); \ +\ + bli_obj_init_finish_1x1( dt, (ftype*)alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt, (ftype*)beta, &betao ); \ +\ + bli_obj_init_finish( dt, mn0_a, mn0_a, (ftype*)a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m0_b, n0_b, (ftype*)b, rs_b, cs_b, &bo ); \ + bli_obj_init_finish( dt, m0, n0, (ftype*)c, rs_c, cs_c, &co ); \ +\ + bli_obj_set_uplo( blis_uploa, &ao ); \ + bli_obj_set_conj( conja, &ao ); \ + bli_obj_set_conjtrans( transb, &bo ); \ +\ + bli_obj_set_struc( struca, &ao ); \ +\ + PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ + ( \ + blis_side, \ + &alphao, \ + &ao, \ + &bo, \ + &betao, \ + &co, \ + NULL, \ + NULL \ + ); \ +\ + /* Finalize BLIS. */ \ + bli_finalize_auto(); \ +} + +#endif + #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCCO_BLAS( hemm, hemm ) #endif diff --git a/frame/compat/bla_her2k.c b/frame/compat/bla_her2k.c index 0bbe98e1c..566cd41fb 100644 --- a/frame/compat/bla_her2k.c +++ b/frame/compat/bla_her2k.c @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -38,6 +39,9 @@ // // Define BLAS-to-BLIS interfaces. // + +#ifdef BLIS_BLAS3_CALLS_TAPI + #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ @@ -132,6 +136,126 @@ void PASTEF77(ch,blasname) \ bli_finalize_auto(); \ } +#else + +#undef GENTFUNCCO +#define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ +\ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + uplo_t blis_uploc; \ + trans_t blis_transa; \ + dim_t m0, k0; \ +\ + /* Initialize BLIS. */ \ + bli_init_auto(); \ +\ + /* Perform BLAS parameter checking. */ \ + PASTEBLACHK(blasname) \ + ( \ + MKSTR(ch), \ + MKSTR(blasname), \ + uploc, \ + transa, \ + m, \ + k, \ + lda, \ + ldb, \ + ldc \ + ); \ +\ + /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ + bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \ + bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ +\ + /* Convert/typecast negative values of m and k to zero. */ \ + bli_convert_blas_dim1( *m, m0 ); \ + bli_convert_blas_dim1( *k, k0 ); \ +\ + /* We emulate the BLAS early return behavior with the following + conditional, which returns if one of the following is true: + - matrix C is empty + - the rank-2k product is empty (either because alpha is zero or k + is zero) AND matrix C is not scaled. */ \ + if ( m0 == 0 || \ + ( ( PASTEMAC(ch,eq0)( *alpha ) || k0 == 0 ) \ + && PASTEMAC(chr,eq1)( *beta ) \ + ) \ + ) \ + { \ + /* Finalize BLIS. */ \ + bli_finalize_auto(); \ +\ + return; \ + } \ +\ + /* Set the row and column strides of the matrix operands. */ \ + const inc_t rs_a = 1; \ + const inc_t cs_a = *lda; \ + const inc_t rs_b = 1; \ + const inc_t cs_b = *ldb; \ + const inc_t rs_c = 1; \ + const inc_t cs_c = *ldc; \ +\ + const num_t dt_r = PASTEMAC(chr,type); \ + const num_t dt = PASTEMAC(ch,type); \ +\ + const trans_t transb = blis_transa; \ + const struc_t strucc = BLIS_HERMITIAN; \ +\ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t bo = BLIS_OBJECT_INITIALIZER; \ + obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t co = BLIS_OBJECT_INITIALIZER; \ +\ + dim_t m0_a, n0_a; \ + dim_t m0_b, n0_b; \ +\ + bli_set_dims_with_trans( blis_transa, m0, k0, &m0_a, &n0_a ); \ + bli_set_dims_with_trans( transb, m0, k0, &m0_b, &n0_b ); \ +\ + bli_obj_init_finish_1x1( dt, (ftype* )alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt_r, (ftype_r*)beta, &betao ); \ +\ + bli_obj_init_finish( dt, m0_a, n0_a, (ftype*)a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m0_b, n0_b, (ftype*)b, rs_b, cs_b, &bo ); \ + bli_obj_init_finish( dt, m0, m0, (ftype*)c, rs_c, cs_c, &co ); \ +\ + bli_obj_set_uplo( blis_uploc, &co ); \ + bli_obj_set_conjtrans( blis_transa, &ao ); \ + bli_obj_set_conjtrans( transb, &bo ); \ +\ + bli_obj_set_struc( strucc, &co ); \ +\ + PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ + ( \ + &alphao, \ + &ao, \ + &bo, \ + &betao, \ + &co, \ + NULL, \ + NULL \ + ); \ +\ + /* Finalize BLIS. */ \ + bli_finalize_auto(); \ +} + +#endif + #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCCO_BLAS( her2k, her2k ) #endif diff --git a/frame/compat/bla_herk.c b/frame/compat/bla_herk.c index 88185de0b..c919be9d7 100644 --- a/frame/compat/bla_herk.c +++ b/frame/compat/bla_herk.c @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -38,6 +39,9 @@ // // Define BLAS-to-BLIS interfaces. // + +#ifdef BLIS_BLAS3_CALLS_TAPI + #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ @@ -125,6 +129,115 @@ void PASTEF77(ch,blasname) \ bli_finalize_auto(); \ } +#else + +#undef GENTFUNCCO +#define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ +\ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype_r* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + uplo_t blis_uploc; \ + trans_t blis_transa; \ + dim_t m0, k0; \ +\ + /* Initialize BLIS. */ \ + bli_init_auto(); \ +\ + /* Perform BLAS parameter checking. */ \ + PASTEBLACHK(blasname) \ + ( \ + MKSTR(ch), \ + MKSTR(blasname), \ + uploc, \ + transa, \ + m, \ + k, \ + lda, \ + ldc \ + ); \ +\ + /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ + bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \ + bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ +\ + /* Convert/typecast negative values of m and k to zero. */ \ + bli_convert_blas_dim1( *m, m0 ); \ + bli_convert_blas_dim1( *k, k0 ); \ +\ + /* We emulate the BLAS early return behavior with the following + conditional, which returns if one of the following is true: + - matrix C is empty + - the rank-k product is empty (either because alpha is zero or k + is zero) AND matrix C is not scaled. */ \ + if ( m0 == 0 || \ + ( ( PASTEMAC(chr,eq0)( *alpha ) || k0 == 0 ) \ + && PASTEMAC(chr,eq1)( *beta ) \ + ) \ + ) \ + { \ + /* Finalize BLIS. */ \ + bli_finalize_auto(); \ +\ + return; \ + } \ +\ + /* Set the row and column strides of the matrix operands. */ \ + const inc_t rs_a = 1; \ + const inc_t cs_a = *lda; \ + const inc_t rs_c = 1; \ + const inc_t cs_c = *ldc; \ +\ + const num_t dt_r = PASTEMAC(chr,type); \ + const num_t dt = PASTEMAC(ch,type); \ +\ + const struc_t strucc = BLIS_HERMITIAN; \ +\ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t co = BLIS_OBJECT_INITIALIZER; \ +\ + dim_t m0_a, n0_a; \ +\ + bli_set_dims_with_trans( blis_transa, m0, k0, &m0_a, &n0_a ); \ +\ + bli_obj_init_finish_1x1( dt_r, (ftype_r*)alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt_r, (ftype_r*)beta, &betao ); \ +\ + bli_obj_init_finish( dt, m0_a, n0_a, (ftype*)a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m0, m0, (ftype*)c, rs_c, cs_c, &co ); \ +\ + bli_obj_set_uplo( blis_uploc, &co ); \ + bli_obj_set_conjtrans( blis_transa, &ao ); \ +\ + bli_obj_set_struc( strucc, &co ); \ +\ + PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ + ( \ + &alphao, \ + &ao, \ + &betao, \ + &co, \ + NULL, \ + NULL \ + ); \ +\ + /* Finalize BLIS. */ \ + bli_finalize_auto(); \ +} + +#endif + #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCCO_BLAS( herk, herk ) #endif diff --git a/frame/compat/bla_symm.c b/frame/compat/bla_symm.c index 02d3a3b27..3711b34c9 100644 --- a/frame/compat/bla_symm.c +++ b/frame/compat/bla_symm.c @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -38,6 +39,9 @@ // // Define BLAS-to-BLIS interfaces. // + +#ifdef BLIS_BLAS3_CALLS_TAPI + #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ @@ -116,6 +120,110 @@ void PASTEF77(ch,blasname) \ bli_finalize_auto(); \ } +#else + +#undef GENTFUNC +#define GENTFUNC( ftype, ch, blasname, blisname ) \ +\ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + side_t blis_side; \ + uplo_t blis_uploa; \ + dim_t m0, n0; \ +\ + /* Initialize BLIS. */ \ + bli_init_auto(); \ +\ + /* Perform BLAS parameter checking. */ \ + PASTEBLACHK(blasname) \ + ( \ + MKSTR(ch), \ + MKSTR(blasname), \ + side, \ + uploa, \ + m, \ + n, \ + lda, \ + ldb, \ + ldc \ + ); \ +\ + /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ + bli_param_map_netlib_to_blis_side( *side, &blis_side ); \ + bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ +\ + /* Convert/typecast negative values of m and n to zero. */ \ + bli_convert_blas_dim1( *m, m0 ); \ + bli_convert_blas_dim1( *n, n0 ); \ +\ + /* Set the row and column strides of the matrix operands. */ \ + const inc_t rs_a = 1; \ + const inc_t cs_a = *lda; \ + const inc_t rs_b = 1; \ + const inc_t cs_b = *ldb; \ + const inc_t rs_c = 1; \ + const inc_t cs_c = *ldc; \ +\ + const num_t dt = PASTEMAC(ch,type); \ +\ + const conj_t conja = BLIS_NO_CONJUGATE; \ + const trans_t transb = BLIS_NO_TRANSPOSE; \ + const struc_t struca = BLIS_SYMMETRIC; \ +\ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t bo = BLIS_OBJECT_INITIALIZER; \ + obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t co = BLIS_OBJECT_INITIALIZER; \ +\ + dim_t mn0_a; \ + dim_t m0_b, n0_b; \ +\ + bli_set_dim_with_side( blis_side, m0, n0, &mn0_a ); \ + bli_set_dims_with_trans( transb, m0, n0, &m0_b, &n0_b ); \ +\ + bli_obj_init_finish_1x1( dt, (ftype*)alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt, (ftype*)beta, &betao ); \ +\ + bli_obj_init_finish( dt, mn0_a, mn0_a, (ftype*)a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m0_b, n0_b, (ftype*)b, rs_b, cs_b, &bo ); \ + bli_obj_init_finish( dt, m0, n0, (ftype*)c, rs_c, cs_c, &co ); \ +\ + bli_obj_set_uplo( blis_uploa, &ao ); \ + bli_obj_set_conj( conja, &ao ); \ + bli_obj_set_conjtrans( transb, &bo ); \ +\ + bli_obj_set_struc( struca, &ao ); \ +\ + PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ + ( \ + blis_side, \ + &alphao, \ + &ao, \ + &bo, \ + &betao, \ + &co, \ + NULL, \ + NULL \ + ); \ +\ + /* Finalize BLIS. */ \ + bli_finalize_auto(); \ +} + +#endif + #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( symm, symm ) #endif diff --git a/frame/compat/bla_syr2k.c b/frame/compat/bla_syr2k.c index 7e611b1d6..a977820a6 100644 --- a/frame/compat/bla_syr2k.c +++ b/frame/compat/bla_syr2k.c @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -38,6 +39,9 @@ // // Define BLAS-to-BLIS interfaces. // + +#ifdef BLIS_BLAS3_CALLS_TAPI + #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ @@ -124,6 +128,117 @@ void PASTEF77(ch,blasname) \ bli_finalize_auto(); \ } +#else + +#undef GENTFUNC +#define GENTFUNC( ftype, ch, blasname, blisname ) \ +\ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + uplo_t blis_uploc; \ + trans_t blis_transa; \ + dim_t m0, k0; \ +\ + /* Initialize BLIS. */ \ + bli_init_auto(); \ +\ + /* Perform BLAS parameter checking. */ \ + PASTEBLACHK(blasname) \ + ( \ + MKSTR(ch), \ + MKSTR(blasname), \ + uploc, \ + transa, \ + m, \ + k, \ + lda, \ + ldb, \ + ldc \ + ); \ +\ + /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ + bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \ + bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ +\ + /* The real domain ssyr2k and dsyr2k in netlib BLAS treat a trans value + of 'C' (conjugate-transpose) as 'T' (transpose only). So, we have + to go out of our way a little to support this behavior. */ \ + if ( bli_is_real( PASTEMAC(ch,type) ) && \ + bli_is_conjtrans( blis_transa ) ) \ + { \ + blis_transa = BLIS_TRANSPOSE; \ + } \ +\ + /* Convert/typecast negative values of m and k to zero. */ \ + bli_convert_blas_dim1( *m, m0 ); \ + bli_convert_blas_dim1( *k, k0 ); \ +\ + /* Set the row and column strides of the matrix operands. */ \ + const inc_t rs_a = 1; \ + const inc_t cs_a = *lda; \ + const inc_t rs_b = 1; \ + const inc_t cs_b = *ldb; \ + const inc_t rs_c = 1; \ + const inc_t cs_c = *ldc; \ +\ + const num_t dt = PASTEMAC(ch,type); \ +\ + const trans_t transb = blis_transa; \ + const struc_t strucc = BLIS_SYMMETRIC; \ +\ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t bo = BLIS_OBJECT_INITIALIZER; \ + obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t co = BLIS_OBJECT_INITIALIZER; \ +\ + dim_t m0_a, n0_a; \ + dim_t m0_b, n0_b; \ +\ + bli_set_dims_with_trans( blis_transa, m0, k0, &m0_a, &n0_a ); \ + bli_set_dims_with_trans( transb, m0, k0, &m0_b, &n0_b ); \ +\ + bli_obj_init_finish_1x1( dt, (ftype*)alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt, (ftype*)beta, &betao ); \ +\ + bli_obj_init_finish( dt, m0_a, n0_a, (ftype*)a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m0_b, n0_b, (ftype*)b, rs_b, cs_b, &bo ); \ + bli_obj_init_finish( dt, m0, m0, (ftype*)c, rs_c, cs_c, &co ); \ +\ + bli_obj_set_uplo( blis_uploc, &co ); \ + bli_obj_set_conjtrans( blis_transa, &ao ); \ + bli_obj_set_conjtrans( transb, &bo ); \ +\ + bli_obj_set_struc( strucc, &co ); \ +\ + PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ + ( \ + &alphao, \ + &ao, \ + &bo, \ + &betao, \ + &co, \ + NULL, \ + NULL \ + ); \ +\ + /* Finalize BLIS. */ \ + bli_finalize_auto(); \ +} + +#endif + #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( syr2k, syr2k ) #endif diff --git a/frame/compat/bla_syrk.c b/frame/compat/bla_syrk.c index 9c08dd06b..5bc2d356a 100644 --- a/frame/compat/bla_syrk.c +++ b/frame/compat/bla_syrk.c @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -38,6 +39,9 @@ // // Define BLAS-to-BLIS interfaces. // + +#ifdef BLIS_BLAS3_CALLS_TAPI + #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ @@ -117,6 +121,106 @@ void PASTEF77(ch,blasname) \ bli_finalize_auto(); \ } +#else + +#undef GENTFUNC +#define GENTFUNC( ftype, ch, blasname, blisname ) \ +\ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + uplo_t blis_uploc; \ + trans_t blis_transa; \ + dim_t m0, k0; \ +\ + /* Initialize BLIS. */ \ + bli_init_auto(); \ +\ + /* Perform BLAS parameter checking. */ \ + PASTEBLACHK(blasname) \ + ( \ + MKSTR(ch), \ + MKSTR(blasname), \ + uploc, \ + transa, \ + m, \ + k, \ + lda, \ + ldc \ + ); \ +\ + /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ + bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \ + bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ +\ + /* The real domain ssyrk and dsyrk in netlib BLAS treat a trans value + of 'C' (conjugate-transpose) as 'T' (transpose only). So, we have + to go out of our way a little to support this behavior. */ \ + if ( bli_is_real( PASTEMAC(ch,type) ) && \ + bli_is_conjtrans( blis_transa ) ) \ + { \ + blis_transa = BLIS_TRANSPOSE; \ + } \ +\ + /* Convert/typecast negative values of m and k to zero. */ \ + bli_convert_blas_dim1( *m, m0 ); \ + bli_convert_blas_dim1( *k, k0 ); \ +\ + /* Set the row and column strides of the matrix operands. */ \ + const inc_t rs_a = 1; \ + const inc_t cs_a = *lda; \ + const inc_t rs_c = 1; \ + const inc_t cs_c = *ldc; \ +\ + const num_t dt = PASTEMAC(ch,type); \ +\ + const struc_t strucc = BLIS_SYMMETRIC; \ +\ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t co = BLIS_OBJECT_INITIALIZER; \ +\ + dim_t m0_a, n0_a; \ +\ + bli_set_dims_with_trans( blis_transa, m0, k0, &m0_a, &n0_a ); \ +\ + bli_obj_init_finish_1x1( dt, (ftype*)alpha, &alphao ); \ + bli_obj_init_finish_1x1( dt, (ftype*)beta, &betao ); \ +\ + bli_obj_init_finish( dt, m0_a, n0_a, (ftype*)a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m0, m0, (ftype*)c, rs_c, cs_c, &co ); \ +\ + bli_obj_set_uplo( blis_uploc, &co ); \ + bli_obj_set_conjtrans( blis_transa, &ao ); \ +\ + bli_obj_set_struc( strucc, &co ); \ +\ + PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ + ( \ + &alphao, \ + &ao, \ + &betao, \ + &co, \ + NULL, \ + NULL \ + ); \ +\ + /* Finalize BLIS. */ \ + bli_finalize_auto(); \ +} + +#endif + #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( syrk, syrk ) #endif diff --git a/frame/compat/bla_trmm.c b/frame/compat/bla_trmm.c index 116d2b8c4..7ca20da74 100644 --- a/frame/compat/bla_trmm.c +++ b/frame/compat/bla_trmm.c @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -38,6 +39,9 @@ // // Define BLAS-to-BLIS interfaces. // + +#ifdef BLIS_BLAS3_CALLS_TAPI + #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ @@ -116,6 +120,103 @@ void PASTEF77(ch,blasname) \ bli_finalize_auto(); \ } +#else + +#undef GENTFUNC +#define GENTFUNC( ftype, ch, blasname, blisname ) \ +\ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ) \ +{ \ + side_t blis_side; \ + uplo_t blis_uploa; \ + trans_t blis_transa; \ + diag_t blis_diaga; \ + dim_t m0, n0; \ +\ + /* Initialize BLIS. */ \ + bli_init_auto(); \ +\ + /* Perform BLAS parameter checking. */ \ + PASTEBLACHK(blasname) \ + ( \ + MKSTR(ch), \ + MKSTR(blasname), \ + side, \ + uploa, \ + transa, \ + diaga, \ + m, \ + n, \ + lda, \ + ldb \ + ); \ +\ + /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ + bli_param_map_netlib_to_blis_side( *side, &blis_side ); \ + bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ + bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ + bli_param_map_netlib_to_blis_diag( *diaga, &blis_diaga ); \ +\ + /* Convert/typecast negative values of m and n to zero. */ \ + bli_convert_blas_dim1( *m, m0 ); \ + bli_convert_blas_dim1( *n, n0 ); \ +\ + /* Set the row and column strides of the matrix operands. */ \ + const inc_t rs_a = 1; \ + const inc_t cs_a = *lda; \ + const inc_t rs_b = 1; \ + const inc_t cs_b = *ldb; \ +\ + const num_t dt = PASTEMAC(ch,type); \ +\ + const struc_t struca = BLIS_TRIANGULAR; \ +\ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t bo = BLIS_OBJECT_INITIALIZER; \ +\ + dim_t mn0_a; \ +\ + bli_set_dim_with_side( blis_side, m0, n0, &mn0_a ); \ +\ + bli_obj_init_finish_1x1( dt, (ftype*)alpha, &alphao ); \ +\ + bli_obj_init_finish( dt, mn0_a, mn0_a, (ftype*)a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m0, n0, (ftype*)b, rs_b, cs_b, &bo ); \ +\ + bli_obj_set_uplo( blis_uploa, &ao ); \ + bli_obj_set_diag( blis_diaga, &ao ); \ + bli_obj_set_conjtrans( blis_transa, &ao ); \ +\ + bli_obj_set_struc( struca, &ao ); \ +\ + PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ + ( \ + blis_side, \ + &alphao, \ + &ao, \ + &bo, \ + NULL, \ + NULL \ + ); \ +\ + /* Finalize BLIS. */ \ + bli_finalize_auto(); \ +} + +#endif + #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( trmm, trmm ) #endif diff --git a/frame/compat/bla_trsm.c b/frame/compat/bla_trsm.c index 70597cc93..0e1c2e329 100644 --- a/frame/compat/bla_trsm.c +++ b/frame/compat/bla_trsm.c @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -38,6 +39,9 @@ // // Define BLAS-to-BLIS interfaces. // + +#ifdef BLIS_BLAS3_CALLS_TAPI + #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ @@ -116,6 +120,103 @@ void PASTEF77(ch,blasname) \ bli_finalize_auto(); \ } +#else + +#undef GENTFUNC +#define GENTFUNC( ftype, ch, blasname, blisname ) \ +\ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ) \ +{ \ + side_t blis_side; \ + uplo_t blis_uploa; \ + trans_t blis_transa; \ + diag_t blis_diaga; \ + dim_t m0, n0; \ +\ + /* Initialize BLIS. */ \ + bli_init_auto(); \ +\ + /* Perform BLAS parameter checking. */ \ + PASTEBLACHK(blasname) \ + ( \ + MKSTR(ch), \ + MKSTR(blasname), \ + side, \ + uploa, \ + transa, \ + diaga, \ + m, \ + n, \ + lda, \ + ldb \ + ); \ +\ + /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ + bli_param_map_netlib_to_blis_side( *side, &blis_side ); \ + bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ + bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ + bli_param_map_netlib_to_blis_diag( *diaga, &blis_diaga ); \ +\ + /* Convert/typecast negative values of m and n to zero. */ \ + bli_convert_blas_dim1( *m, m0 ); \ + bli_convert_blas_dim1( *n, n0 ); \ +\ + /* Set the row and column strides of the matrix operands. */ \ + const inc_t rs_a = 1; \ + const inc_t cs_a = *lda; \ + const inc_t rs_b = 1; \ + const inc_t cs_b = *ldb; \ +\ + const num_t dt = PASTEMAC(ch,type); \ +\ + const struc_t struca = BLIS_TRIANGULAR; \ +\ + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ + obj_t ao = BLIS_OBJECT_INITIALIZER; \ + obj_t bo = BLIS_OBJECT_INITIALIZER; \ +\ + dim_t mn0_a; \ +\ + bli_set_dim_with_side( blis_side, m0, n0, &mn0_a ); \ +\ + bli_obj_init_finish_1x1( dt, (ftype*)alpha, &alphao ); \ +\ + bli_obj_init_finish( dt, mn0_a, mn0_a, (ftype*)a, rs_a, cs_a, &ao ); \ + bli_obj_init_finish( dt, m0, n0, (ftype*)b, rs_b, cs_b, &bo ); \ +\ + bli_obj_set_uplo( blis_uploa, &ao ); \ + bli_obj_set_diag( blis_diaga, &ao ); \ + bli_obj_set_conjtrans( blis_transa, &ao ); \ +\ + bli_obj_set_struc( struca, &ao ); \ +\ + PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ + ( \ + blis_side, \ + &alphao, \ + &ao, \ + &bo, \ + NULL, \ + NULL \ + ); \ +\ + /* Finalize BLIS. */ \ + bli_finalize_auto(); \ +} + +#endif + #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( trsm, trsm ) #endif diff --git a/frame/include/bli_config_macro_defs.h b/frame/include/bli_config_macro_defs.h index 46f78c27f..8734ff672 100644 --- a/frame/include/bli_config_macro_defs.h +++ b/frame/include/bli_config_macro_defs.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -45,11 +46,11 @@ // internally within BLIS as well as those exposed in the native BLAS-like BLIS // interface. #ifndef BLIS_INT_TYPE_SIZE -#ifdef BLIS_ARCH_64 -#define BLIS_INT_TYPE_SIZE 64 -#else -#define BLIS_INT_TYPE_SIZE 32 -#endif + #ifdef BLIS_ARCH_64 + #define BLIS_INT_TYPE_SIZE 64 + #else + #define BLIS_INT_TYPE_SIZE 32 + #endif #endif @@ -157,7 +158,19 @@ // C99 type "long int". Note that this ONLY affects integers used within the // BLAS compatibility layer. #ifndef BLIS_BLAS_INT_TYPE_SIZE -#define BLIS_BLAS_INT_TYPE_SIZE 32 + #define BLIS_BLAS_INT_TYPE_SIZE 32 +#endif + +// By default, the level-3 BLAS routines are implemented by directly calling +// the BLIS object API. Alternatively, they may first call the typed BLIS +// API, which will then call the object API. +//#define BLIS_BLAS3_CALLS_TAPI +#ifdef BLIS_BLAS3_CALLS_TAPI + #undef BLIS_BLAS3_CALLS_OAPI +#else + // Default behavior is to call object API directly. + #undef BLIS_BLAS3_CALLS_OAPI // In case user explicitly enabled. + #define BLIS_BLAS3_CALLS_OAPI #endif diff --git a/frame/include/bli_obj_macro_defs.h b/frame/include/bli_obj_macro_defs.h index e3eb2b874..788ac0a75 100644 --- a/frame/include/bli_obj_macro_defs.h +++ b/frame/include/bli_obj_macro_defs.h @@ -6,6 +6,7 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP + Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1127,6 +1128,55 @@ static void bli_obj_set_panel_stride( inc_t ps, obj_t* obj ) obj->ps = ps; } +// -- Initialization-related macros -- + +// Finish the initialization started by the matrix-specific static initializer +// (e.g. BLIS_OBJECT_PREINITIALIZER) +// NOTE: This is intended only for use in the BLAS compatibility API and typed +// BLIS API. + +static void bli_obj_init_finish( num_t dt, dim_t m, dim_t n, void* p, inc_t rs, inc_t cs, obj_t* obj ) +{ + bli_obj_set_as_root( obj ); + + bli_obj_set_dt( dt, obj ); + bli_obj_set_target_dt( dt, obj ); + bli_obj_set_exec_dt( dt, obj ); + bli_obj_set_comp_dt( dt, obj ); + + bli_obj_set_dims( m, n, obj ); + bli_obj_set_strides( rs, cs, obj ); + + siz_t elem_size = sizeof( float ); + if ( bli_dt_prec_is_double( dt ) ) elem_size *= 2; + if ( bli_dt_dom_is_complex( dt ) ) elem_size *= 2; + bli_obj_set_elem_size( elem_size, obj ); + + bli_obj_set_buffer( p, obj ); + + bli_obj_set_scalar_dt( dt, obj ); + void* restrict s = bli_obj_internal_scalar_buffer( obj ); + + if ( bli_dt_prec_is_single( dt ) ) { (( scomplex* )s)->real = 1.0F; + (( scomplex* )s)->imag = 0.0F; } + else if ( bli_dt_prec_is_double( dt ) ) { (( dcomplex* )s)->real = 1.0; + (( dcomplex* )s)->imag = 0.0; } +} + +// Finish the initialization started by the 1x1-specific static initializer +// (e.g. BLIS_OBJECT_PREINITIALIZER_1X1) +// NOTE: This is intended only for use in the BLAS compatibility API and typed +// BLIS API. + +static void bli_obj_init_finish_1x1( num_t dt, void* p, obj_t* obj ) +{ + bli_obj_set_as_root( obj ); + + bli_obj_set_dt( dt, obj ); + + bli_obj_set_buffer( p, obj ); +} + // -- Miscellaneous object macros -- // Toggle the region referenced (or "stored"). @@ -1158,38 +1208,6 @@ static void bli_obj_set_defaults( obj_t* obj ) obj->info = obj->info | BLIS_BITVAL_DENSE | BLIS_BITVAL_GENERAL; } -// Initializors for global scalar constants. -// NOTE: These must remain cpp macros since they are initializor -// expressions, not functions. - -#define bli_obj_init_const( buffer0 ) \ -{ \ - .root = NULL, \ -\ - .off = { 0, 0 }, \ - .dim = { 1, 1 }, \ - .diag_off = 0, \ -\ - .info = 0x0 | BLIS_BITVAL_CONST_TYPE | \ - BLIS_BITVAL_DENSE | \ - BLIS_BITVAL_GENERAL, \ - .elem_size = sizeof( constdata_t ), \ -\ - .buffer = buffer0, \ - .rs = 1, \ - .cs = 1, \ - .is = 1 \ -} - -#define bli_obj_init_constdata( val ) \ -{ \ - .s = ( float )val, \ - .d = ( double )val, \ - .c = { .real = ( float )val, .imag = 0.0f }, \ - .z = { .real = ( double )val, .imag = 0.0 }, \ - .i = ( gint_t )val, \ -} - // Acquire buffer at object's submatrix offset (offset-aware buffer query). static void* bli_obj_buffer_at_off( obj_t* obj ) diff --git a/frame/include/bli_param_macro_defs.h b/frame/include/bli_param_macro_defs.h index b22949c07..28b93743f 100644 --- a/frame/include/bli_param_macro_defs.h +++ b/frame/include/bli_param_macro_defs.h @@ -132,12 +132,36 @@ static dom_t bli_dt_domain( num_t dt ) ( dt & BLIS_DOMAIN_BIT ); } +static bool_t bli_dt_dom_is_real( num_t dt ) +{ + return ( bool_t ) + ( ( dt & BLIS_DOMAIN_BIT ) == BLIS_REAL ); +} + +static bool_t bli_dt_dom_is_complex( num_t dt ) +{ + return ( bool_t ) + ( ( dt & BLIS_DOMAIN_BIT ) == BLIS_COMPLEX ); +} + static prec_t bli_dt_prec( num_t dt ) { return ( prec_t ) ( dt & BLIS_PRECISION_BIT ); } +static bool_t bli_dt_prec_is_single( num_t dt ) +{ + return ( bool_t ) + ( ( dt & BLIS_PRECISION_BIT ) == BLIS_SINGLE_PREC ); +} + +static bool_t bli_dt_prec_is_double( num_t dt ) +{ + return ( bool_t ) + ( ( dt & BLIS_PRECISION_BIT ) == BLIS_DOUBLE_PREC ); +} + static num_t bli_dt_proj_to_real( num_t dt ) { return ( num_t ) diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 2b778e663..605338963 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -863,8 +863,10 @@ typedef enum BLIS_MC, BLIS_KC, BLIS_NC, + BLIS_M2, // level-2 blocksize in m dimension BLIS_N2, // level-2 blocksize in n dimension + BLIS_AF, // level-1f axpyf fusing factor BLIS_DF, // level-1f dotxf fusing factor BLIS_XF, // level-1f dotxaxpyf fusing factor @@ -1139,6 +1141,71 @@ typedef struct obj_s dim_t n_panel; // n dimension of a "full" panel } obj_t; +// Pre-initializors. Things that must be set afterwards: +// - root object pointer +// - info bitfields: dt, target_dt, exec_dt, comp_dt +// - info2 bitfields: scalar_dt +// - elem_size +// - dims, strides +// - buffer +// - internal scalar buffer (must always set imaginary component) + +#define BLIS_OBJECT_INITIALIZER \ +{ \ + .root = NULL, \ +\ + .off = { 0, 0 }, \ + .dim = { 0, 0 }, \ + .diag_off = 0, \ +\ + .info = 0x0 | BLIS_BITVAL_DENSE | \ + BLIS_BITVAL_GENERAL, \ + .info2 = 0x0, \ + .elem_size = sizeof( float ), /* this is changed later. */ \ +\ + .buffer = NULL, \ + .rs = 0, \ + .cs = 0, \ + .is = 1, \ +\ + .scalar = { 0.0, 0.0 }, \ +\ + .m_padded = 0, \ + .n_padded = 0, \ + .ps = 0, \ + .pd = 0, \ + .m_panel = 0, \ + .n_panel = 0 \ +} + +#define BLIS_OBJECT_INITIALIZER_1X1 \ +{ \ + .root = NULL, \ +\ + .off = { 0, 0 }, \ + .dim = { 1, 1 }, \ + .diag_off = 0, \ +\ + .info = 0x0 | BLIS_BITVAL_DENSE | \ + BLIS_BITVAL_GENERAL, \ + .info2 = 0x0, \ + .elem_size = sizeof( float ), /* this is changed later. */ \ +\ + .buffer = NULL, \ + .rs = 0, \ + .cs = 0, \ + .is = 1, \ +\ + .scalar = { 0.0, 0.0 }, \ +\ + .m_padded = 0, \ + .n_padded = 0, \ + .ps = 0, \ + .pd = 0, \ + .m_panel = 0, \ + .n_panel = 0 \ +} + // Define these macros here since they must be updated if contents of // obj_t changes. @@ -1205,6 +1272,39 @@ static void bli_obj_init_subpart_from( obj_t* a, obj_t* b ) b->n_panel = a->n_panel; } +// Initializors for global scalar constants. +// NOTE: These must remain cpp macros since they are initializor +// expressions, not functions. + +#define bli_obj_init_const( buffer0 ) \ +{ \ + .root = NULL, \ +\ + .off = { 0, 0 }, \ + .dim = { 1, 1 }, \ + .diag_off = 0, \ +\ + .info = 0x0 | BLIS_BITVAL_CONST_TYPE | \ + BLIS_BITVAL_DENSE | \ + BLIS_BITVAL_GENERAL, \ + .info2 = 0x0, \ + .elem_size = sizeof( constdata_t ), \ +\ + .buffer = buffer0, \ + .rs = 1, \ + .cs = 1, \ + .is = 1 \ +} + +#define bli_obj_init_constdata( val ) \ +{ \ + .s = ( float )val, \ + .d = ( double )val, \ + .c = { .real = ( float )val, .imag = 0.0f }, \ + .z = { .real = ( double )val, .imag = 0.0 }, \ + .i = ( gint_t )val, \ +} + // -- Context type --