Added BLIS, BLAS, and CBLAS interface for cblas?amin

Details:
      - Amin api returns index of minimum absolute value in a vector.
      - Added amin reference blis kernel.
      - Added blas and cblas interface for amin.

AMD-Internal: [CPUPL-1155]

Change-Id: I89c1e37e86950a4582bba70a5d8fc70ac915bd3c
This commit is contained in:
Nageshwar Singh
2020-09-23 21:33:39 +05:30
committed by Dipal M Zambare
parent 602b99a41d
commit 4b56cc94da
32 changed files with 1814 additions and 1006 deletions

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -47,7 +48,7 @@ void PASTEMAC(opname,_check) \
obj_t* y \
) \
{ \
bli_l1v_xy_check( x, y ); \
bli_l1v_xy_check( x, y ); \
}
GENFRONT( addv )
@@ -65,10 +66,11 @@ void PASTEMAC(opname,_check) \
obj_t* index \
) \
{ \
bli_l1v_xi_check( x, index ); \
bli_l1v_xi_check( x, index ); \
}
GENFRONT( amaxv )
GENFRONT( aminv )
#undef GENFRONT
@@ -82,7 +84,7 @@ void PASTEMAC(opname,_check) \
obj_t* y \
) \
{ \
bli_l1v_axby_check( alpha, x, beta, y ); \
bli_l1v_axby_check( alpha, x, beta, y ); \
}
GENFRONT( axpbyv )
@@ -98,7 +100,7 @@ void PASTEMAC(opname,_check) \
obj_t* y \
) \
{ \
bli_l1v_axy_check( alpha, x, y ); \
bli_l1v_axy_check( alpha, x, y ); \
}
GENFRONT( axpyv )
@@ -115,7 +117,7 @@ void PASTEMAC(opname,_check) \
obj_t* rho \
) \
{ \
bli_l1v_dot_check( &BLIS_ONE, x, y, &BLIS_ONE, rho ); \
bli_l1v_dot_check( &BLIS_ONE, x, y, &BLIS_ONE, rho ); \
}
GENFRONT( dotv )
@@ -133,7 +135,7 @@ void PASTEMAC(opname,_check) \
obj_t* rho \
) \
{ \
bli_l1v_dot_check( alpha, x, y, beta, rho ); \
bli_l1v_dot_check( alpha, x, y, beta, rho ); \
}
GENFRONT( dotxv )
@@ -147,7 +149,7 @@ void PASTEMAC(opname,_check) \
obj_t* x \
) \
{ \
bli_l1v_x_check( x ); \
bli_l1v_x_check( x ); \
}
GENFRONT( invertv )
@@ -162,7 +164,7 @@ void PASTEMAC(opname,_check) \
obj_t* x \
) \
{ \
bli_l1v_ax_check( alpha, x ); \
bli_l1v_ax_check( alpha, x ); \
}
GENFRONT( scalv )
@@ -179,7 +181,7 @@ void PASTEMAC(opname,_check) \
obj_t* y \
) \
{ \
bli_l1v_xby_check( x, beta, y ); \
bli_l1v_xby_check( x, beta, y ); \
}
GENFRONT( xpbyv )
@@ -190,93 +192,93 @@ GENFRONT( xpbyv )
void bli_l1v_xy_check
(
obj_t* x,
obj_t* y
obj_t* y
)
{
err_t e_val;
err_t e_val;
// Check object datatypes.
// Check object datatypes.
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check for consistent datatypes.
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
// Check object dimensions.
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
}
void bli_l1v_axy_check
(
obj_t* alpha,
obj_t* x,
obj_t* y
obj_t* y
)
{
err_t e_val;
err_t e_val;
// Check object datatypes.
// Check object datatypes.
e_val = bli_check_noninteger_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_noninteger_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check for consistent datatypes.
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
// Check object dimensions.
e_val = bli_check_scalar_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( alpha );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( alpha );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
}
void bli_l1v_xby_check
@@ -286,48 +288,48 @@ void bli_l1v_xby_check
obj_t* y
)
{
err_t e_val;
err_t e_val;
// Check object datatypes.
// Check object datatypes.
e_val = bli_check_noninteger_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_noninteger_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check for consistent datatypes.
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
// Check object dimensions.
e_val = bli_check_scalar_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( beta );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( beta );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
}
void bli_l1v_axby_check
@@ -338,57 +340,57 @@ void bli_l1v_axby_check
obj_t* y
)
{
err_t e_val;
err_t e_val;
// Check object datatypes.
// Check object datatypes.
e_val = bli_check_noninteger_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_noninteger_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_noninteger_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_noninteger_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check for consistent datatypes.
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
// Check object dimensions.
e_val = bli_check_scalar_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( alpha );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( alpha );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( beta );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( beta );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
}
void bli_l1v_dot_check
@@ -397,128 +399,128 @@ void bli_l1v_dot_check
obj_t* x,
obj_t* y,
obj_t* beta,
obj_t* rho
obj_t* rho
)
{
err_t e_val;
err_t e_val;
// Check object datatypes.
// Check object datatypes.
e_val = bli_check_noninteger_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_noninteger_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
e_val = bli_check_noninteger_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_noninteger_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_noninteger_object( rho );
bli_check_error_code( e_val );
e_val = bli_check_noninteger_object( rho );
bli_check_error_code( e_val );
e_val = bli_check_nonconstant_object( rho );
bli_check_error_code( e_val );
e_val = bli_check_nonconstant_object( rho );
bli_check_error_code( e_val );
// Check for consistent datatypes.
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
// Check object dimensions.
e_val = bli_check_scalar_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( rho );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( rho );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( alpha );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( alpha );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( beta );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( beta );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( rho );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( rho );
bli_check_error_code( e_val );
}
void bli_l1v_x_check
(
obj_t* x
obj_t* x
)
{
err_t e_val;
err_t e_val;
// Check object datatypes.
// Check object datatypes.
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
// Check object dimensions.
// Check object dimensions.
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
}
void bli_l1v_ax_check
(
obj_t* alpha,
obj_t* x
obj_t* x
)
{
err_t e_val;
err_t e_val;
// Check object datatypes.
// Check object datatypes.
e_val = bli_check_noninteger_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_noninteger_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
// Check object dimensions.
// Check object dimensions.
e_val = bli_check_scalar_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( alpha );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( alpha );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
}
void bli_l1v_xi_check
@@ -527,33 +529,33 @@ void bli_l1v_xi_check
obj_t* index
)
{
err_t e_val;
err_t e_val;
// Check object datatypes.
// Check object datatypes.
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_integer_object( index );
bli_check_error_code( e_val );
e_val = bli_check_integer_object( index );
bli_check_error_code( e_val );
e_val = bli_check_nonconstant_object( index );
bli_check_error_code( e_val );
e_val = bli_check_nonconstant_object( index );
bli_check_error_code( e_val );
// Check object dimensions.
// Check object dimensions.
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( index );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( index );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( index );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( index );
bli_check_error_code( e_val );
}

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -62,7 +63,7 @@ void PASTEMAC(opname,_check) \
);
GENTPROT( amaxv )
GENTPROT( aminv )
#undef GENTPROT
#define GENTPROT( opname ) \
@@ -163,14 +164,14 @@ GENTPROT( xpbyv )
void bli_l1v_xy_check
(
obj_t* x,
obj_t* y
obj_t* y
);
void bli_l1v_axy_check
(
obj_t* alpha,
obj_t* x,
obj_t* y
obj_t* y
);
void bli_l1v_xby_check
@@ -194,18 +195,18 @@ void bli_l1v_dot_check
obj_t* x,
obj_t* y,
obj_t* beta,
obj_t* rho
obj_t* rho
);
void bli_l1v_x_check
(
obj_t* x
obj_t* x
);
void bli_l1v_ax_check
(
obj_t* alpha,
obj_t* x
obj_t* x
);
void bli_l1v_xi_check

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -54,6 +55,7 @@ GENFRONT( addv )
GENFRONT( copyv )
GENFRONT( subv )
GENFRONT( amaxv )
GENFRONT( aminv )
GENFRONT( axpbyv )
GENFRONT( axpyv )
GENFRONT( scal2v )

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -46,6 +47,7 @@ GENPROT( addv )
GENPROT( copyv )
GENPROT( subv )
GENPROT( amaxv )
GENPROT( aminv )
GENPROT( axpbyv )
GENPROT( axpyv )
GENPROT( scal2v )

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -69,6 +70,7 @@ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \
);
INSERT_GENTDEF( amaxv )
INSERT_GENTDEF( aminv )
// axpbyv

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -72,6 +73,7 @@ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \
);
INSERT_GENTDEF( amaxv )
INSERT_GENTDEF( aminv )
// axpbyv

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -54,6 +55,12 @@ INSERT_GENTPROT_BASIC0( addv_ker_name )
INSERT_GENTPROT_BASIC0( amaxv_ker_name )
#undef GENTPROT
#define GENTPROT AMINV_KER_PROT
INSERT_GENTPROT_BASIC0( aminv_ker_name )
#undef GENTPROT
#define GENTPROT AXPBYV_KER_PROT

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -59,6 +60,15 @@ void PASTEMAC(ch,opname) \
cntx_t* restrict cntx \
); \
#define AMINV_KER_PROT( ctype, ch, opname ) \
\
void PASTEMAC(ch,opname) \
( \
dim_t n, \
ctype* restrict x, inc_t incx, \
dim_t* restrict index, \
cntx_t* restrict cntx \
); \
#define AXPBYV_KER_PROT( ctype, ch, opname ) \
\

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -128,7 +129,7 @@ void PASTEMAC(opname,EX_SUF) \
}
GENFRONT( amaxv )
GENFRONT( aminv )
#undef GENFRONT
#define GENFRONT( opname ) \

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -45,7 +46,7 @@ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \
obj_t* x, \
obj_t* y \
BLIS_OAPI_EX_PARAMS \
);
);
GENTPROT( addv )
GENTPROT( copyv )
@@ -63,6 +64,7 @@ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \
);
GENTPROT( amaxv )
GENTPROT( aminv )
#undef GENTPROT
@@ -89,7 +91,7 @@ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \
obj_t* x, \
obj_t* y \
BLIS_OAPI_EX_PARAMS \
);
);
GENTPROT( axpyv )
GENTPROT( scal2v )
@@ -104,7 +106,7 @@ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \
obj_t* y, \
obj_t* rho \
BLIS_OAPI_EX_PARAMS \
);
);
GENTPROT( dotv )
@@ -120,7 +122,7 @@ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \
obj_t* beta, \
obj_t* rho \
BLIS_OAPI_EX_PARAMS \
);
);
GENTPROT( dotxv )
@@ -132,7 +134,7 @@ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \
( \
obj_t* x \
BLIS_OAPI_EX_PARAMS \
);
);
GENTPROT( invertv )
@@ -145,7 +147,7 @@ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \
obj_t* alpha, \
obj_t* x \
BLIS_OAPI_EX_PARAMS \
);
);
GENTPROT( scalv )
GENTPROT( setv )
@@ -159,7 +161,7 @@ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \
obj_t* x, \
obj_t* y \
BLIS_OAPI_EX_PARAMS \
);
);
GENTPROT( swapv )

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -110,7 +111,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
}
INSERT_GENTFUNC_BASIC( amaxv, BLIS_AMAXV_KER )
INSERT_GENTFUNC_BASIC( aminv, BLIS_AMINV_KER )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname, kerid ) \

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -66,6 +67,7 @@ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \
); \
INSERT_GENTPROT_BASIC0( amaxv )
INSERT_GENTPROT_BASIC0( aminv )
#undef GENTPROT

View File

@@ -1,9 +1,11 @@
##Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.##
##Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved##
target_sources("${PROJECT_NAME}"
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/bla_amax.c
${CMAKE_CURRENT_SOURCE_DIR}/bla_amax.h
${CMAKE_CURRENT_SOURCE_DIR}/bla_amin.c
${CMAKE_CURRENT_SOURCE_DIR}/bla_amin.h
${CMAKE_CURRENT_SOURCE_DIR}/bla_asum.c
${CMAKE_CURRENT_SOURCE_DIR}/bla_asum.h
${CMAKE_CURRENT_SOURCE_DIR}/bla_axpy.c
@@ -67,7 +69,7 @@ ${CMAKE_CURRENT_SOURCE_DIR}/bli_blas.h
#Add all subdirectories
# add_subdirectory(attic)
add_subdirectory(blis)
add_subdirectory(cblas)
add_subdirectory(check)
add_subdirectory(f2c)
add_subdirectory(blis)
add_subdirectory(cblas)
add_subdirectory(check)
add_subdirectory(f2c)

95
frame/compat/bla_amin.c Normal file
View File

@@ -0,0 +1,95 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
//
// Define BLAS-to-BLIS interfaces.
//
#undef GENTFUNC
#define GENTFUNC( ftype_x, chx, blasname, blisname ) \
\
f77_int PASTEF772(i,chx,blasname) \
( \
const f77_int* n, \
const ftype_x* x, const f77_int* incx \
) \
{ \
dim_t n0; \
ftype_x* x0; \
inc_t incx0; \
gint_t bli_index; \
f77_int f77_index; \
\
/* If the vector is empty, return an index of zero. This early check
is needed to emulate netlib BLAS. Without it, bli_?aminv() will
return 0, which ends up getting incremented to 1 (below) before
being returned, which is not what we want. */ \
if ( *n < 1 || *incx <= 0 ) return 0; \
\
/* Initialize BLIS. */ \
bli_init_auto(); \
\
/* Convert/typecast negative values of n to zero. */ \
bli_convert_blas_dim1( *n, n0 ); \
\
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */ \
bli_convert_blas_incv( n0, (ftype_x*)x, *incx, x0, incx0 ); \
\
/* Call BLIS interface. */ \
PASTEMAC2(chx,blisname,BLIS_TAPI_EX_SUF) \
( \
n0, \
x0, incx0, \
&bli_index, \
NULL, \
NULL \
); \
\
/* Convert zero-based BLIS (C) index to one-based BLAS (Fortran)
index. Also, if the BLAS integer size differs from the BLIS
integer size, that typecast occurs here. */ \
f77_index = bli_index + 1; \
\
/* Finalize BLIS. */ \
bli_finalize_auto(); \
\
return f77_index; \
}
#ifdef BLIS_ENABLE_BLAS
INSERT_GENTFUNC_BLAS( amin, aminv )
#endif

50
frame/compat/bla_amin.h Normal file
View File

@@ -0,0 +1,50 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// Prototype BLAS-to-BLIS interfaces.
//
#undef GENTPROT
#define GENTPROT( ftype_x, chx, blasname ) \
\
BLIS_EXPORT_BLAS f77_int PASTEF772(i,chx,blasname) \
( \
const f77_int* n, \
const ftype_x* x, const f77_int* incx \
);
#ifdef BLIS_ENABLE_BLAS
INSERT_GENTPROT_BLAS( amin )
#endif

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -103,11 +103,6 @@
#include "bla_xerbla_array.h"
// -- Level-0 BLAS prototypes --
#include "bla_cabs1.h"
// -- Level-1 BLAS prototypes --
#include "bla_amax.h"
@@ -200,7 +195,7 @@
#include "bla_trsm_check.h"
#include "bla_gemmt_check.h"
// -- Batch Extension prototypes --
// -- Batch Extension prototypes --
#include "bla_gemm_batch.h"
@@ -209,4 +204,11 @@
#include "b77_thread.h"
// -- Auxiliary Routines --
#include "bla_cabs1.h"
#include "bla_amin.h"
#include "f77_amin_sub.h"
#endif // BLIS_ENABLE_BLAS

View File

@@ -1,9 +1,11 @@
##Copyright (C) 2020, Advanced Micro Devices, Inc.##
##Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.##
target_sources("${PROJECT_NAME}"
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/f77_amax_sub.c
${CMAKE_CURRENT_SOURCE_DIR}/f77_amax_sub.h
${CMAKE_CURRENT_SOURCE_DIR}/f77_amin_sub.c
${CMAKE_CURRENT_SOURCE_DIR}/f77_amin_sub.h
${CMAKE_CURRENT_SOURCE_DIR}/f77_asum_sub.c
${CMAKE_CURRENT_SOURCE_DIR}/f77_asum_sub.h
${CMAKE_CURRENT_SOURCE_DIR}/f77_dot_sub.c
@@ -13,5 +15,5 @@ ${CMAKE_CURRENT_SOURCE_DIR}/f77_nrm2_sub.h
)

View File

@@ -0,0 +1,61 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
#include "f77_amin_sub.h"
//
// Define CBLAS subrotine wrapper interfaces.
//
#undef GENTFUNC
#define GENTFUNC( ftype_x, chx, blasname, blisname ) \
\
void PASTEF773(i,chx,blasname,sub) \
( \
const f77_int* n, \
const ftype_x* x, const f77_int* incx, \
f77_int* rval \
) \
{ \
*rval = PASTEF772(i,chx,blasname) \
( \
n, \
x, incx \
); \
}
#ifdef BLIS_ENABLE_CBLAS
INSERT_GENTFUNC_BLAS( amin, NULL )
#endif

View File

@@ -0,0 +1,50 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// Prototype CBLAS subroutine wrapper interfaces.
//
#undef GENTPROT
#define GENTPROT( ftype_x, chx, blasname ) \
\
BLIS_EXPORT_BLAS void PASTEF773(i,chx,blasname,sub) \
( \
const f77_int* n, \
const ftype_x* x, const f77_int* incx, \
f77_int* rval \
);
#ifdef BLIS_ENABLE_CBLAS
INSERT_GENTPROT_BLAS( amin )
#endif

View File

@@ -1,4 +1,4 @@
##Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. ##
##Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.##
target_sources("${PROJECT_NAME}"
PRIVATE
@@ -78,6 +78,10 @@ ${CMAKE_CURRENT_SOURCE_DIR}/cblas_icamax.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_idamax.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_isamax.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_izamax.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_icamin.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_idamin.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_isamin.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_izamin.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_sasum.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_saxpy.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_scasum.c
@@ -161,7 +165,3 @@ ${CMAKE_CURRENT_SOURCE_DIR}/cblas_daxpby.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_caxpby.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_zaxpby.c
)

View File

@@ -111,7 +111,7 @@ BLIS_EXPORT_BLAS f77_int cblas_izamax(f77_int N, const void *X, f77_int incX);
* ===========================================================================
*/
/*
/*
* Routines with standard 4 prefixes (s, d, c, z)
*/
void BLIS_EXPORT_BLAS cblas_sswap(f77_int N, float *X, f77_int incX,
@@ -153,7 +153,7 @@ void BLIS_EXPORT_BLAS cblas_zaxpby(f77_int N, const void *alpha,
void *Y, f77_int incY);
/*
/*
* Routines with S and D prefix only
*/
void BLIS_EXPORT_BLAS cblas_srotg(float *a, float *b, float *c, float *s);
@@ -171,7 +171,7 @@ void BLIS_EXPORT_BLAS cblas_drotm(f77_int N, double *X, f77_int incX,
double *Y, f77_int incY, const double *P);
/*
/*
* Routines with S D C Z CS and ZD prefixes
*/
void BLIS_EXPORT_BLAS cblas_sscal(f77_int N, float alpha, float *X, f77_int incX);
@@ -187,7 +187,7 @@ void BLIS_EXPORT_BLAS cblas_zdscal(f77_int N, double alpha, void *X, f77_int inc
* ===========================================================================
*/
/*
/*
* Routines with standard 4 prefixes (S, D, C, Z)
*/
void BLIS_EXPORT_BLAS cblas_sgemv(enum CBLAS_ORDER order,
@@ -323,7 +323,7 @@ void BLIS_EXPORT_BLAS cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, const void *Ap, void *X, f77_int incX);
/*
/*
* Routines with S and D prefixes only
*/
void BLIS_EXPORT_BLAS cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
@@ -385,7 +385,7 @@ void BLIS_EXPORT_BLAS cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int incX, const double *Y, f77_int incY, double *A);
/*
/*
* Routines with C and Z prefixes only
*/
void BLIS_EXPORT_BLAS cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
@@ -456,7 +456,7 @@ void BLIS_EXPORT_BLAS cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
* ===========================================================================
*/
/*
/*
* Routines with standard 4 prefixes (S, D, C, Z)
*/
void BLIS_EXPORT_BLAS cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA,
@@ -600,7 +600,7 @@ void BLIS_EXPORT_BLAS cblas_zgemmt(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
const void *beta, void *C, f77_int ldc);
/*
/*
* Routines with prefixes C and Z only
*/
void BLIS_EXPORT_BLAS cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
@@ -637,12 +637,11 @@ void BLIS_EXPORT_BLAS cblas_xerbla(f77_int p, const char *rout, const char *form
/*
* ===========================================================================
* Prototypes for Aux Functions BLAS
* Prototypes for extension BLAS routines
* ===========================================================================
*/
BLIS_EXPORT_BLAS float cblas_scabs1( const void *z);
BLIS_EXPORT_BLAS double cblas_dcabs1( const void *z);
@@ -687,6 +686,12 @@ void BLIS_EXPORT_BLAS cblas_zgemm_batch(enum CBLAS_ORDER Order,
const void *beta_array, void **C, f77_int *ldc_array,
f77_int group_count, f77_int *group_size);
// -- AMIN APIs -------
BLIS_EXPORT_BLAS f77_int cblas_isamin(f77_int N, const float *X, f77_int incX);
BLIS_EXPORT_BLAS f77_int cblas_idamin(f77_int N, const double *X, f77_int incX);
BLIS_EXPORT_BLAS f77_int cblas_icamin(f77_int N, const void *X, f77_int incX);
BLIS_EXPORT_BLAS f77_int cblas_izamin(f77_int N, const void *X, f77_int incX);
#ifdef __cplusplus
}
#endif

View File

@@ -41,6 +41,7 @@
#define F77_zswap zswap_
#define F77_zcopy zcopy_
#define F77_zaxpy zaxpy_
#define F77_zaxpby zaxpby_
#define F77_izamax_sub izamaxsub_
#define F77_sdot_sub sdotsub_
#define F77_ddot_sub ddotsub_
@@ -186,6 +187,11 @@
#define F77_caxpby caxpby_
#define F77_zaxpby zaxpby_
#define F77_isamin_sub isaminsub_
#define F77_idamin_sub idaminsub_
#define F77_icamin_sub icaminsub_
#define F77_izamin_sub izaminsub_
// -- Batch APIs --
#define F77_sgemm_batch sgemm_batch_
#define F77_dgemm_batch dgemm_batch_

View File

@@ -0,0 +1,26 @@
#include "blis.h"
#ifdef BLIS_ENABLE_CBLAS
/*
* cblas_icamin.c
*
* The program is a C interface to icamin.
* It calls the fortran wrapper before calling icamin.
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*/
#include "cblas.h"
#include "cblas_f77.h"
f77_int cblas_icamin( f77_int N, const void *X, f77_int incX)
{
f77_int iamin;
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX;
#else
#define F77_N N
#define F77_incX incX
#endif
F77_icamin_sub( &F77_N, (scomplex*)X, &F77_incX, &iamin);
return iamin ? iamin-1 : 0;
}
#endif

View File

@@ -0,0 +1,27 @@
#include "blis.h"
#ifdef BLIS_ENABLE_CBLAS
/*
* cblas_idamin.c
*
* The program is a C interface to idamin.
* It calls the fortran wrapper before calling idamin.
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*/
#include "cblas.h"
#include "cblas_f77.h"
f77_int cblas_idamin( f77_int N, const double *X, f77_int incX)
{
f77_int iamin;
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX;
#else
#define F77_N N
#define F77_incX incX
#endif
F77_idamin_sub( &F77_N, X, &F77_incX, &iamin);
return iamin ? iamin-1 : 0;
}
#endif

View File

@@ -0,0 +1,27 @@
#include "blis.h"
#ifdef BLIS_ENABLE_CBLAS
/*
* cblas_isamin.c
*
* The program is a C interface to isamin.
* It calls the fortran wrapper before calling isamin.
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
#include "cblas_f77.h"
f77_int cblas_isamin( f77_int N, const float *X, f77_int incX)
{
f77_int iamin;
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX;
#else
#define F77_N N
#define F77_incX incX
#endif
F77_isamin_sub( &F77_N, X, &F77_incX, &iamin);
return iamin ? iamin-1 : 0;
}
#endif

View File

@@ -0,0 +1,26 @@
#include "blis.h"
#ifdef BLIS_ENABLE_CBLAS
/*
* cblas_izamin.c
*
* The program is a C interface to izamin.
* It calls the fortran wrapper before calling izamin.
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
#include "cblas_f77.h"
f77_int cblas_izamin( f77_int N, const void *X, f77_int incX)
{
f77_int iamin;
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX;
#else
#define F77_N N
#define F77_incX incX
#endif
F77_izamin_sub( &F77_N, (dcomplex*)X, &F77_incX, &iamin);
return (iamin ? iamin-1 : 0);
}
#endif

View File

@@ -6,7 +6,7 @@
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2016, Hewlett Packard Enterprise Development LP
Copyright (C) 2020, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -134,39 +134,39 @@ typedef uint32_t objbits_t; // object information bit field
#ifdef BLIS_ENABLE_C99_COMPLEX
#if __STDC_VERSION__ >= 199901L
#include <complex.h>
#if __STDC_VERSION__ >= 199901L
#include <complex.h>
// Typedef official complex types to BLIS complex type names.
typedef float complex scomplex;
typedef double complex dcomplex;
#else
#error "Configuration requested C99 complex types, but C99 does not appear to be supported."
#endif
// Typedef official complex types to BLIS complex type names.
typedef float complex scomplex;
typedef double complex dcomplex;
#else
#error "Configuration requested C99 complex types, but C99 does not appear to be supported."
#endif
#else // ifndef BLIS_ENABLE_C99_COMPLEX
// This cpp guard provides a temporary hack to allow libflame
// interoperability with BLIS.
#ifndef _DEFINED_SCOMPLEX
#define _DEFINED_SCOMPLEX
typedef struct
{
float real;
float imag;
} scomplex;
#endif
// This cpp guard provides a temporary hack to allow libflame
// interoperability with BLIS.
#ifndef _DEFINED_SCOMPLEX
#define _DEFINED_SCOMPLEX
typedef struct
{
float real;
float imag;
} scomplex;
#endif
// This cpp guard provides a temporary hack to allow libflame
// interoperability with BLIS.
#ifndef _DEFINED_DCOMPLEX
#define _DEFINED_DCOMPLEX
typedef struct
{
double real;
double imag;
} dcomplex;
#endif
// This cpp guard provides a temporary hack to allow libflame
// interoperability with BLIS.
#ifndef _DEFINED_DCOMPLEX
#define _DEFINED_DCOMPLEX
typedef struct
{
double real;
double imag;
} dcomplex;
#endif
#endif // BLIS_ENABLE_C99_COMPLEX
@@ -382,7 +382,7 @@ typedef void* void_fp;
#define BLIS_BITVAL_SINGLE_PREC 0x0
#define BLIS_BITVAL_DOUBLE_PREC BLIS_PRECISION_BIT
#define BLIS_BITVAL_FLOAT_TYPE 0x0
#define BLIS_BITVAL_SCOMPLEX_TYPE BLIS_DOMAIN_BIT
#define BLIS_BITVAL_SCOMPLEX_TYPE BLIS_DOMAIN_BIT
#define BLIS_BITVAL_DOUBLE_TYPE BLIS_PRECISION_BIT
#define BLIS_BITVAL_DCOMPLEX_TYPE ( BLIS_DOMAIN_BIT | BLIS_PRECISION_BIT )
#define BLIS_BITVAL_INT_TYPE 0x04
@@ -392,10 +392,10 @@ typedef void* void_fp;
#define BLIS_BITVAL_NO_CONJ 0x0
#define BLIS_BITVAL_CONJ BLIS_CONJ_BIT
#define BLIS_BITVAL_CONJ_TRANS ( BLIS_CONJ_BIT | BLIS_TRANS_BIT )
#define BLIS_BITVAL_ZEROS 0x0
#define BLIS_BITVAL_ZEROS 0x0
#define BLIS_BITVAL_UPPER ( BLIS_UPPER_BIT | BLIS_DIAG_BIT )
#define BLIS_BITVAL_LOWER ( BLIS_LOWER_BIT | BLIS_DIAG_BIT )
#define BLIS_BITVAL_DENSE BLIS_UPLO_BITS
#define BLIS_BITVAL_DENSE BLIS_UPLO_BITS
#define BLIS_BITVAL_NONUNIT_DIAG 0x0
#define BLIS_BITVAL_UNIT_DIAG BLIS_UNIT_DIAG_BIT
#define BLIS_BITVAL_INVERT_DIAG BLIS_INVERT_DIAG_BIT
@@ -454,50 +454,50 @@ typedef void* void_fp;
typedef enum
{
BLIS_NO_TRANSPOSE = 0x0,
BLIS_TRANSPOSE = BLIS_BITVAL_TRANS,
BLIS_CONJ_NO_TRANSPOSE = BLIS_BITVAL_CONJ,
BLIS_CONJ_TRANSPOSE = BLIS_BITVAL_CONJ_TRANS
BLIS_NO_TRANSPOSE = 0x0,
BLIS_TRANSPOSE = BLIS_BITVAL_TRANS,
BLIS_CONJ_NO_TRANSPOSE = BLIS_BITVAL_CONJ,
BLIS_CONJ_TRANSPOSE = BLIS_BITVAL_CONJ_TRANS
} trans_t;
typedef enum
{
BLIS_NO_CONJUGATE = 0x0,
BLIS_CONJUGATE = BLIS_BITVAL_CONJ
BLIS_NO_CONJUGATE = 0x0,
BLIS_CONJUGATE = BLIS_BITVAL_CONJ
} conj_t;
typedef enum
{
BLIS_ZEROS = BLIS_BITVAL_ZEROS,
BLIS_LOWER = BLIS_BITVAL_LOWER,
BLIS_UPPER = BLIS_BITVAL_UPPER,
BLIS_DENSE = BLIS_BITVAL_DENSE
BLIS_ZEROS = BLIS_BITVAL_ZEROS,
BLIS_LOWER = BLIS_BITVAL_LOWER,
BLIS_UPPER = BLIS_BITVAL_UPPER,
BLIS_DENSE = BLIS_BITVAL_DENSE
} uplo_t;
typedef enum
{
BLIS_LEFT = 0x0,
BLIS_RIGHT
BLIS_LEFT = 0x0,
BLIS_RIGHT
} side_t;
typedef enum
{
BLIS_NONUNIT_DIAG = 0x0,
BLIS_UNIT_DIAG = BLIS_BITVAL_UNIT_DIAG
BLIS_NONUNIT_DIAG = 0x0,
BLIS_UNIT_DIAG = BLIS_BITVAL_UNIT_DIAG
} diag_t;
typedef enum
{
BLIS_NO_INVERT_DIAG = 0x0,
BLIS_INVERT_DIAG = BLIS_BITVAL_INVERT_DIAG
BLIS_NO_INVERT_DIAG = 0x0,
BLIS_INVERT_DIAG = BLIS_BITVAL_INVERT_DIAG
} invdiag_t;
typedef enum
{
BLIS_GENERAL = BLIS_BITVAL_GENERAL,
BLIS_HERMITIAN = BLIS_BITVAL_HERMITIAN,
BLIS_SYMMETRIC = BLIS_BITVAL_SYMMETRIC,
BLIS_TRIANGULAR = BLIS_BITVAL_TRIANGULAR
BLIS_GENERAL = BLIS_BITVAL_GENERAL,
BLIS_HERMITIAN = BLIS_BITVAL_HERMITIAN,
BLIS_SYMMETRIC = BLIS_BITVAL_SYMMETRIC,
BLIS_TRIANGULAR = BLIS_BITVAL_TRIANGULAR
} struc_t;
@@ -505,26 +505,26 @@ typedef enum
typedef enum
{
BLIS_FLOAT = BLIS_BITVAL_FLOAT_TYPE,
BLIS_DOUBLE = BLIS_BITVAL_DOUBLE_TYPE,
BLIS_SCOMPLEX = BLIS_BITVAL_SCOMPLEX_TYPE,
BLIS_DCOMPLEX = BLIS_BITVAL_DCOMPLEX_TYPE,
BLIS_INT = BLIS_BITVAL_INT_TYPE,
BLIS_CONSTANT = BLIS_BITVAL_CONST_TYPE,
BLIS_DT_LO = BLIS_FLOAT,
BLIS_DT_HI = BLIS_DCOMPLEX
BLIS_FLOAT = BLIS_BITVAL_FLOAT_TYPE,
BLIS_DOUBLE = BLIS_BITVAL_DOUBLE_TYPE,
BLIS_SCOMPLEX = BLIS_BITVAL_SCOMPLEX_TYPE,
BLIS_DCOMPLEX = BLIS_BITVAL_DCOMPLEX_TYPE,
BLIS_INT = BLIS_BITVAL_INT_TYPE,
BLIS_CONSTANT = BLIS_BITVAL_CONST_TYPE,
BLIS_DT_LO = BLIS_FLOAT,
BLIS_DT_HI = BLIS_DCOMPLEX
} num_t;
typedef enum
{
BLIS_REAL = BLIS_BITVAL_REAL,
BLIS_COMPLEX = BLIS_BITVAL_COMPLEX
BLIS_REAL = BLIS_BITVAL_REAL,
BLIS_COMPLEX = BLIS_BITVAL_COMPLEX
} dom_t;
typedef enum
{
BLIS_SINGLE_PREC = BLIS_BITVAL_SINGLE_PREC,
BLIS_DOUBLE_PREC = BLIS_BITVAL_DOUBLE_PREC
BLIS_SINGLE_PREC = BLIS_BITVAL_SINGLE_PREC,
BLIS_DOUBLE_PREC = BLIS_BITVAL_DOUBLE_PREC
} prec_t;
@@ -532,31 +532,31 @@ typedef enum
typedef enum
{
BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED,
BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS,
BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS,
BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS,
BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS,
BLIS_PACKED_ROW_PANELS_4MI = BLIS_BITVAL_PACKED_ROW_PANELS_4MI,
BLIS_PACKED_COL_PANELS_4MI = BLIS_BITVAL_PACKED_COL_PANELS_4MI,
BLIS_PACKED_ROW_PANELS_3MI = BLIS_BITVAL_PACKED_ROW_PANELS_3MI,
BLIS_PACKED_COL_PANELS_3MI = BLIS_BITVAL_PACKED_COL_PANELS_3MI,
BLIS_PACKED_ROW_PANELS_4MS = BLIS_BITVAL_PACKED_ROW_PANELS_4MS,
BLIS_PACKED_COL_PANELS_4MS = BLIS_BITVAL_PACKED_COL_PANELS_4MS,
BLIS_PACKED_ROW_PANELS_3MS = BLIS_BITVAL_PACKED_ROW_PANELS_3MS,
BLIS_PACKED_COL_PANELS_3MS = BLIS_BITVAL_PACKED_COL_PANELS_3MS,
BLIS_PACKED_ROW_PANELS_RO = BLIS_BITVAL_PACKED_ROW_PANELS_RO,
BLIS_PACKED_COL_PANELS_RO = BLIS_BITVAL_PACKED_COL_PANELS_RO,
BLIS_PACKED_ROW_PANELS_IO = BLIS_BITVAL_PACKED_ROW_PANELS_IO,
BLIS_PACKED_COL_PANELS_IO = BLIS_BITVAL_PACKED_COL_PANELS_IO,
BLIS_PACKED_ROW_PANELS_RPI = BLIS_BITVAL_PACKED_ROW_PANELS_RPI,
BLIS_PACKED_COL_PANELS_RPI = BLIS_BITVAL_PACKED_COL_PANELS_RPI,
BLIS_PACKED_ROW_PANELS_1E = BLIS_BITVAL_PACKED_ROW_PANELS_1E,
BLIS_PACKED_COL_PANELS_1E = BLIS_BITVAL_PACKED_COL_PANELS_1E,
BLIS_PACKED_ROW_PANELS_1R = BLIS_BITVAL_PACKED_ROW_PANELS_1R,
BLIS_PACKED_COL_PANELS_1R = BLIS_BITVAL_PACKED_COL_PANELS_1R
BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED,
BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS,
BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS,
BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS,
BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS,
BLIS_PACKED_ROW_PANELS_4MI = BLIS_BITVAL_PACKED_ROW_PANELS_4MI,
BLIS_PACKED_COL_PANELS_4MI = BLIS_BITVAL_PACKED_COL_PANELS_4MI,
BLIS_PACKED_ROW_PANELS_3MI = BLIS_BITVAL_PACKED_ROW_PANELS_3MI,
BLIS_PACKED_COL_PANELS_3MI = BLIS_BITVAL_PACKED_COL_PANELS_3MI,
BLIS_PACKED_ROW_PANELS_4MS = BLIS_BITVAL_PACKED_ROW_PANELS_4MS,
BLIS_PACKED_COL_PANELS_4MS = BLIS_BITVAL_PACKED_COL_PANELS_4MS,
BLIS_PACKED_ROW_PANELS_3MS = BLIS_BITVAL_PACKED_ROW_PANELS_3MS,
BLIS_PACKED_COL_PANELS_3MS = BLIS_BITVAL_PACKED_COL_PANELS_3MS,
BLIS_PACKED_ROW_PANELS_RO = BLIS_BITVAL_PACKED_ROW_PANELS_RO,
BLIS_PACKED_COL_PANELS_RO = BLIS_BITVAL_PACKED_COL_PANELS_RO,
BLIS_PACKED_ROW_PANELS_IO = BLIS_BITVAL_PACKED_ROW_PANELS_IO,
BLIS_PACKED_COL_PANELS_IO = BLIS_BITVAL_PACKED_COL_PANELS_IO,
BLIS_PACKED_ROW_PANELS_RPI = BLIS_BITVAL_PACKED_ROW_PANELS_RPI,
BLIS_PACKED_COL_PANELS_RPI = BLIS_BITVAL_PACKED_COL_PANELS_RPI,
BLIS_PACKED_ROW_PANELS_1E = BLIS_BITVAL_PACKED_ROW_PANELS_1E,
BLIS_PACKED_COL_PANELS_1E = BLIS_BITVAL_PACKED_COL_PANELS_1E,
BLIS_PACKED_ROW_PANELS_1R = BLIS_BITVAL_PACKED_ROW_PANELS_1R,
BLIS_PACKED_COL_PANELS_1R = BLIS_BITVAL_PACKED_COL_PANELS_1R
} pack_t;
// We combine row and column packing into one "type", and we start
@@ -570,11 +570,11 @@ typedef enum
typedef enum
{
BLIS_PACK_FWD_IF_UPPER = BLIS_BITVAL_PACK_FWD_IF_UPPER,
BLIS_PACK_REV_IF_UPPER = BLIS_BITVAL_PACK_REV_IF_UPPER,
BLIS_PACK_FWD_IF_UPPER = BLIS_BITVAL_PACK_FWD_IF_UPPER,
BLIS_PACK_REV_IF_UPPER = BLIS_BITVAL_PACK_REV_IF_UPPER,
BLIS_PACK_FWD_IF_LOWER = BLIS_BITVAL_PACK_FWD_IF_LOWER,
BLIS_PACK_REV_IF_LOWER = BLIS_BITVAL_PACK_REV_IF_LOWER
BLIS_PACK_FWD_IF_LOWER = BLIS_BITVAL_PACK_FWD_IF_LOWER,
BLIS_PACK_REV_IF_LOWER = BLIS_BITVAL_PACK_REV_IF_LOWER
} packord_t;
@@ -582,10 +582,10 @@ typedef enum
typedef enum
{
BLIS_BUFFER_FOR_A_BLOCK = BLIS_BITVAL_BUFFER_FOR_A_BLOCK,
BLIS_BUFFER_FOR_B_PANEL = BLIS_BITVAL_BUFFER_FOR_B_PANEL,
BLIS_BUFFER_FOR_C_PANEL = BLIS_BITVAL_BUFFER_FOR_C_PANEL,
BLIS_BUFFER_FOR_GEN_USE = BLIS_BITVAL_BUFFER_FOR_GEN_USE
BLIS_BUFFER_FOR_A_BLOCK = BLIS_BITVAL_BUFFER_FOR_A_BLOCK,
BLIS_BUFFER_FOR_B_PANEL = BLIS_BITVAL_BUFFER_FOR_B_PANEL,
BLIS_BUFFER_FOR_C_PANEL = BLIS_BITVAL_BUFFER_FOR_C_PANEL,
BLIS_BUFFER_FOR_GEN_USE = BLIS_BITVAL_BUFFER_FOR_GEN_USE
} packbuf_t;
@@ -593,8 +593,8 @@ typedef enum
typedef enum
{
BLIS_FWD,
BLIS_BWD
BLIS_FWD,
BLIS_BWD
} dir_t;
@@ -602,22 +602,22 @@ typedef enum
typedef enum
{
BLIS_SUBPART0,
BLIS_SUBPART1,
BLIS_SUBPART2,
BLIS_SUBPART1AND0,
BLIS_SUBPART1AND2,
BLIS_SUBPART1A,
BLIS_SUBPART1B,
BLIS_SUBPART00,
BLIS_SUBPART10,
BLIS_SUBPART20,
BLIS_SUBPART01,
BLIS_SUBPART11,
BLIS_SUBPART21,
BLIS_SUBPART02,
BLIS_SUBPART12,
BLIS_SUBPART22
BLIS_SUBPART0,
BLIS_SUBPART1,
BLIS_SUBPART2,
BLIS_SUBPART1AND0,
BLIS_SUBPART1AND2,
BLIS_SUBPART1A,
BLIS_SUBPART1B,
BLIS_SUBPART00,
BLIS_SUBPART10,
BLIS_SUBPART20,
BLIS_SUBPART01,
BLIS_SUBPART11,
BLIS_SUBPART21,
BLIS_SUBPART02,
BLIS_SUBPART12,
BLIS_SUBPART22
} subpart_t;
@@ -625,8 +625,8 @@ typedef enum
typedef enum
{
BLIS_M = 0,
BLIS_N = 1
BLIS_M = 0,
BLIS_N = 1
} mdim_t;
@@ -634,17 +634,17 @@ typedef enum
typedef enum
{
BLIS_MACH_EPS = 0,
BLIS_MACH_SFMIN,
BLIS_MACH_BASE,
BLIS_MACH_PREC,
BLIS_MACH_NDIGMANT,
BLIS_MACH_RND,
BLIS_MACH_EMIN,
BLIS_MACH_RMIN,
BLIS_MACH_EMAX,
BLIS_MACH_RMAX,
BLIS_MACH_EPS2
BLIS_MACH_EPS = 0,
BLIS_MACH_SFMIN,
BLIS_MACH_BASE,
BLIS_MACH_PREC,
BLIS_MACH_NDIGMANT,
BLIS_MACH_RND,
BLIS_MACH_EMIN,
BLIS_MACH_RMIN,
BLIS_MACH_EMAX,
BLIS_MACH_RMAX,
BLIS_MACH_EPS2
} machval_t;
#define BLIS_NUM_MACH_PARAMS 11
@@ -656,15 +656,15 @@ typedef enum
typedef enum
{
BLIS_3MH = 0,
BLIS_3M1,
BLIS_4MH,
BLIS_4M1B,
BLIS_4M1A,
BLIS_1M,
BLIS_NAT,
BLIS_IND_FIRST = 0,
BLIS_IND_LAST = BLIS_NAT
BLIS_3MH = 0,
BLIS_3M1,
BLIS_4MH,
BLIS_4M1B,
BLIS_4M1A,
BLIS_1M,
BLIS_NAT,
BLIS_IND_FIRST = 0,
BLIS_IND_LAST = BLIS_NAT
} ind_t;
#define BLIS_NUM_IND_METHODS (BLIS_NAT+1)
@@ -684,32 +684,33 @@ typedef enum
typedef enum
{
BLIS_ADDV_KER = 0,
BLIS_AMAXV_KER,
BLIS_AXPBYV_KER,
BLIS_AXPYV_KER,
BLIS_COPYV_KER,
BLIS_DOTV_KER,
BLIS_DOTXV_KER,
BLIS_INVERTV_KER,
BLIS_SCALV_KER,
BLIS_SCAL2V_KER,
BLIS_SETV_KER,
BLIS_SUBV_KER,
BLIS_SWAPV_KER,
BLIS_XPBYV_KER
BLIS_ADDV_KER = 0,
BLIS_AMAXV_KER,
BLIS_AMINV_KER,
BLIS_AXPBYV_KER,
BLIS_AXPYV_KER,
BLIS_COPYV_KER,
BLIS_DOTV_KER,
BLIS_DOTXV_KER,
BLIS_INVERTV_KER,
BLIS_SCALV_KER,
BLIS_SCAL2V_KER,
BLIS_SETV_KER,
BLIS_SUBV_KER,
BLIS_SWAPV_KER,
BLIS_XPBYV_KER
} l1vkr_t;
#define BLIS_NUM_LEVEL1V_KERS 14
#define BLIS_NUM_LEVEL1V_KERS 15
typedef enum
{
BLIS_AXPY2V_KER = 0,
BLIS_DOTAXPYV_KER,
BLIS_AXPYF_KER,
BLIS_DOTXF_KER,
BLIS_DOTXAXPYF_KER
BLIS_AXPY2V_KER = 0,
BLIS_DOTAXPYV_KER,
BLIS_AXPYF_KER,
BLIS_DOTXF_KER,
BLIS_DOTXAXPYF_KER
} l1fkr_t;
#define BLIS_NUM_LEVEL1F_KERS 5
@@ -717,71 +718,71 @@ typedef enum
typedef enum
{
BLIS_PACKM_0XK_KER = 0,
BLIS_PACKM_1XK_KER = 1,
BLIS_PACKM_2XK_KER = 2,
BLIS_PACKM_3XK_KER = 3,
BLIS_PACKM_4XK_KER = 4,
BLIS_PACKM_5XK_KER = 5,
BLIS_PACKM_6XK_KER = 6,
BLIS_PACKM_7XK_KER = 7,
BLIS_PACKM_8XK_KER = 8,
BLIS_PACKM_9XK_KER = 9,
BLIS_PACKM_10XK_KER = 10,
BLIS_PACKM_11XK_KER = 11,
BLIS_PACKM_12XK_KER = 12,
BLIS_PACKM_13XK_KER = 13,
BLIS_PACKM_14XK_KER = 14,
BLIS_PACKM_15XK_KER = 15,
BLIS_PACKM_16XK_KER = 16,
BLIS_PACKM_17XK_KER = 17,
BLIS_PACKM_18XK_KER = 18,
BLIS_PACKM_19XK_KER = 19,
BLIS_PACKM_20XK_KER = 20,
BLIS_PACKM_21XK_KER = 21,
BLIS_PACKM_22XK_KER = 22,
BLIS_PACKM_23XK_KER = 23,
BLIS_PACKM_24XK_KER = 24,
BLIS_PACKM_25XK_KER = 25,
BLIS_PACKM_26XK_KER = 26,
BLIS_PACKM_27XK_KER = 27,
BLIS_PACKM_28XK_KER = 28,
BLIS_PACKM_29XK_KER = 29,
BLIS_PACKM_30XK_KER = 30,
BLIS_PACKM_31XK_KER = 31,
BLIS_PACKM_0XK_KER = 0,
BLIS_PACKM_1XK_KER = 1,
BLIS_PACKM_2XK_KER = 2,
BLIS_PACKM_3XK_KER = 3,
BLIS_PACKM_4XK_KER = 4,
BLIS_PACKM_5XK_KER = 5,
BLIS_PACKM_6XK_KER = 6,
BLIS_PACKM_7XK_KER = 7,
BLIS_PACKM_8XK_KER = 8,
BLIS_PACKM_9XK_KER = 9,
BLIS_PACKM_10XK_KER = 10,
BLIS_PACKM_11XK_KER = 11,
BLIS_PACKM_12XK_KER = 12,
BLIS_PACKM_13XK_KER = 13,
BLIS_PACKM_14XK_KER = 14,
BLIS_PACKM_15XK_KER = 15,
BLIS_PACKM_16XK_KER = 16,
BLIS_PACKM_17XK_KER = 17,
BLIS_PACKM_18XK_KER = 18,
BLIS_PACKM_19XK_KER = 19,
BLIS_PACKM_20XK_KER = 20,
BLIS_PACKM_21XK_KER = 21,
BLIS_PACKM_22XK_KER = 22,
BLIS_PACKM_23XK_KER = 23,
BLIS_PACKM_24XK_KER = 24,
BLIS_PACKM_25XK_KER = 25,
BLIS_PACKM_26XK_KER = 26,
BLIS_PACKM_27XK_KER = 27,
BLIS_PACKM_28XK_KER = 28,
BLIS_PACKM_29XK_KER = 29,
BLIS_PACKM_30XK_KER = 30,
BLIS_PACKM_31XK_KER = 31,
BLIS_UNPACKM_0XK_KER = 0,
BLIS_UNPACKM_1XK_KER = 1,
BLIS_UNPACKM_2XK_KER = 2,
BLIS_UNPACKM_3XK_KER = 3,
BLIS_UNPACKM_4XK_KER = 4,
BLIS_UNPACKM_5XK_KER = 5,
BLIS_UNPACKM_6XK_KER = 6,
BLIS_UNPACKM_7XK_KER = 7,
BLIS_UNPACKM_8XK_KER = 8,
BLIS_UNPACKM_9XK_KER = 9,
BLIS_UNPACKM_10XK_KER = 10,
BLIS_UNPACKM_11XK_KER = 11,
BLIS_UNPACKM_12XK_KER = 12,
BLIS_UNPACKM_13XK_KER = 13,
BLIS_UNPACKM_14XK_KER = 14,
BLIS_UNPACKM_15XK_KER = 15,
BLIS_UNPACKM_16XK_KER = 16,
BLIS_UNPACKM_17XK_KER = 17,
BLIS_UNPACKM_18XK_KER = 18,
BLIS_UNPACKM_19XK_KER = 19,
BLIS_UNPACKM_20XK_KER = 20,
BLIS_UNPACKM_21XK_KER = 21,
BLIS_UNPACKM_22XK_KER = 22,
BLIS_UNPACKM_23XK_KER = 23,
BLIS_UNPACKM_24XK_KER = 24,
BLIS_UNPACKM_25XK_KER = 25,
BLIS_UNPACKM_26XK_KER = 26,
BLIS_UNPACKM_27XK_KER = 27,
BLIS_UNPACKM_28XK_KER = 28,
BLIS_UNPACKM_29XK_KER = 29,
BLIS_UNPACKM_30XK_KER = 30,
BLIS_UNPACKM_31XK_KER = 31
BLIS_UNPACKM_0XK_KER = 0,
BLIS_UNPACKM_1XK_KER = 1,
BLIS_UNPACKM_2XK_KER = 2,
BLIS_UNPACKM_3XK_KER = 3,
BLIS_UNPACKM_4XK_KER = 4,
BLIS_UNPACKM_5XK_KER = 5,
BLIS_UNPACKM_6XK_KER = 6,
BLIS_UNPACKM_7XK_KER = 7,
BLIS_UNPACKM_8XK_KER = 8,
BLIS_UNPACKM_9XK_KER = 9,
BLIS_UNPACKM_10XK_KER = 10,
BLIS_UNPACKM_11XK_KER = 11,
BLIS_UNPACKM_12XK_KER = 12,
BLIS_UNPACKM_13XK_KER = 13,
BLIS_UNPACKM_14XK_KER = 14,
BLIS_UNPACKM_15XK_KER = 15,
BLIS_UNPACKM_16XK_KER = 16,
BLIS_UNPACKM_17XK_KER = 17,
BLIS_UNPACKM_18XK_KER = 18,
BLIS_UNPACKM_19XK_KER = 19,
BLIS_UNPACKM_20XK_KER = 20,
BLIS_UNPACKM_21XK_KER = 21,
BLIS_UNPACKM_22XK_KER = 22,
BLIS_UNPACKM_23XK_KER = 23,
BLIS_UNPACKM_24XK_KER = 24,
BLIS_UNPACKM_25XK_KER = 25,
BLIS_UNPACKM_26XK_KER = 26,
BLIS_UNPACKM_27XK_KER = 27,
BLIS_UNPACKM_28XK_KER = 28,
BLIS_UNPACKM_29XK_KER = 29,
BLIS_UNPACKM_30XK_KER = 30,
BLIS_UNPACKM_31XK_KER = 31
} l1mkr_t;
@@ -791,11 +792,11 @@ typedef enum
typedef enum
{
BLIS_GEMM_UKR = 0,
BLIS_GEMMTRSM_L_UKR,
BLIS_GEMMTRSM_U_UKR,
BLIS_TRSM_L_UKR,
BLIS_TRSM_U_UKR
BLIS_GEMM_UKR = 0,
BLIS_GEMMTRSM_L_UKR,
BLIS_GEMMTRSM_U_UKR,
BLIS_TRSM_L_UKR,
BLIS_TRSM_U_UKR
} l3ukr_t;
#define BLIS_NUM_LEVEL3_UKRS 5
@@ -803,10 +804,10 @@ typedef enum
typedef enum
{
BLIS_REFERENCE_UKERNEL = 0,
BLIS_VIRTUAL_UKERNEL,
BLIS_OPTIMIZED_UKERNEL,
BLIS_NOTAPPLIC_UKERNEL
BLIS_REFERENCE_UKERNEL = 0,
BLIS_VIRTUAL_UKERNEL,
BLIS_OPTIMIZED_UKERNEL,
BLIS_NOTAPPLIC_UKERNEL
} kimpl_t;
#define BLIS_NUM_UKR_IMPL_TYPES 4
@@ -1533,13 +1534,13 @@ typedef enum
BLIS_INVALID_COL_STRIDE = ( -51),
BLIS_INVALID_DIM_STRIDE_COMBINATION = ( -52),
// Structure-specific errors
// Structure-specific errors
BLIS_EXPECTED_GENERAL_OBJECT = ( -60),
BLIS_EXPECTED_HERMITIAN_OBJECT = ( -61),
BLIS_EXPECTED_SYMMETRIC_OBJECT = ( -62),
BLIS_EXPECTED_TRIANGULAR_OBJECT = ( -63),
// Storage-specific errors
// Storage-specific errors
BLIS_EXPECTED_UPPER_OR_LOWER_OBJECT = ( -70),
// Partitioning-specific errors
@@ -1553,7 +1554,7 @@ typedef enum
// Packing-specific errors
BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK = (-100),
// Buffer-specific errors
// Buffer-specific errors
BLIS_EXPECTED_NONNULL_OBJECT_BUFFER = (-110),
// Memory errors

View File

@@ -1,9 +1,10 @@
##Copyright (C) 2020, Advanced Micro Devices, Inc.##
##Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.##
target_sources("${PROJECT_NAME}"
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/bli_addv_ref.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_amaxv_ref.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_aminv_ref.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_axpbyv_ref.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_axpyv_ref.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_copyv_ref.c

View File

@@ -0,0 +1,149 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
//
// Define BLAS-like interfaces with typed operands.
//
#undef GENTFUNCR
#define GENTFUNCR( ctype, ctype_r, ch, chr, opname, arch, suf ) \
\
void PASTEMAC3(ch,opname,arch,suf) \
( \
dim_t n, \
ctype* restrict x, inc_t incx, \
dim_t* restrict i_min, \
cntx_t* restrict cntx \
) \
{ \
dim_t* zero_i = PASTEMAC(i,0); \
\
ctype_r chi1_r; \
ctype_r chi1_i; \
ctype_r abs_chi1; \
ctype_r abs_chi1_min; \
dim_t i_min_l; \
\
/* If the vector length is zero, return early. This directly emulates
the behavior of netlib BLAS's i?amin() routines. */ \
if ( bli_zero_dim1( n ) ) \
{ \
PASTEMAC(i,copys)( *zero_i, *i_min ); \
return; \
} \
\
/* Initialize the index of the minimum absolute value to zero. */ \
PASTEMAC(i,copys)( *zero_i, i_min_l ); \
\
/* Initialize the minimum absolute value with the first value of the vector,
By this the comparison will happen across all vector elements. */ \
\
ctype* chi1 = x; \
\
/* Get the real and imaginary components of chi1. */ \
PASTEMAC2(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \
\
/* Replace chi1_r and chi1_i with their absolute values. */ \
PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \
PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \
PASTEMAC(chr,sets)( chi1_r, chi1_i, abs_chi1_min ); \
\
if ( incx == 1 ) \
{ \
ctype* chi1 = x; \
\
for ( dim_t i = 0; i < n; ++i ) \
{ \
/* Get the real and imaginary components of chi1. */ \
PASTEMAC2(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \
\
/* Replace chi1_r and chi1_i with their absolute values. */ \
PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \
PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \
\
/* Add the real and imaginary absolute values together. */ \
PASTEMAC(chr,set0s)( abs_chi1 ); \
PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \
PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \
\
/* If the absolute value of the current element is less that of
the previous smallest, save it and its index. If NaN is
encountered, then treat it the same as if it were a valid
value that was larger than any previously seen.*/ \
if ( (abs_chi1_min > abs_chi1) || ( bli_isnan( abs_chi1 ) && !bli_isnan( abs_chi1_min ) ) ) \
{ \
abs_chi1_min = abs_chi1; \
i_min_l = i; \
} \
\
chi1 += 1; \
} \
} \
else \
{ \
for ( dim_t i = 0; i < n; ++i ) \
{ \
ctype* chi1 = x + (i )*incx; \
\
/* Get the real and imaginary components of chi1. */ \
PASTEMAC2(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \
\
/* Replace chi1_r and chi1_i with their absolute values. */ \
PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \
PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \
\
/* Add the real and imaginary absolute values together. */ \
PASTEMAC(chr,set0s)( abs_chi1 ); \
PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \
PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \
\
/* If the absolute value of the current element is less that of
the previous smallest, save it and its index. If NaN is
encountered, then treat it the same as if it were a valid
value that was larger than any previously seen.*/ \
if ( (abs_chi1_min < abs_chi1) || ( bli_isnan( abs_chi1 ) && !bli_isnan( abs_chi1_min ) ) ) \
{ \
abs_chi1_min = abs_chi1; \
i_min_l = i; \
} \
} \
} \
\
/* Store the final index to the output variable. */ \
PASTEMAC(i,copys)( i_min_l, *i_min ); \
}
INSERT_GENTFUNCR_BASIC2( aminv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX )

File diff suppressed because it is too large Load Diff

View File

@@ -38,7 +38,7 @@
# Makefile
#
# Field G. Van Zee
#
#
# Makefile for standalone BLIS test drivers.
#
@@ -179,8 +179,8 @@ CFLAGS += -I$(TEST_SRC_PATH)
all: blis openblas mkl
blis: check-env \
test_cabs1_blis.x \
test_dotv_blis.x \
test_aminv_blis.x \
test_axpyv_blis.x \
test_axpbyv_blis.x \
test_gemv_blis.x \
@@ -204,8 +204,8 @@ blis: check-env \
test_gemm_batch_blis.x
openblas: check-env \
test_cabs1_openblas.x \
test_dotv_openblas.x \
test_aminv_openblas.x \
test_axpyv_openblas.x \
test_axpbyv_openblas.x \
test_gemv_openblas.x \
@@ -226,7 +226,7 @@ openblas: check-env \
test_her2k_openblas.x \
test_trmm_openblas.x \
test_trsm_openblas.x \
test_gemm_batch_openblas.c
test_gemm_batch_openblas.x
atlas: check-env \
test_dotv_atlas.x \
@@ -250,8 +250,8 @@ atlas: check-env \
test_trsm_atlas.x
mkl: check-env \
test_cabs1_mkl.x \
test_dotv_mkl.x \
test_aminv_mkl.x \
test_axpyv_mkl.x \
test_axpbyv_mkl.x \
test_gemv_mkl.x \

237
test/test_aminv.c Normal file
View File

@@ -0,0 +1,237 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef WIN32
#include <io.h>
#else
#include <unistd.h>
#endif
#include "blis.h"
#define PRINT
// #define BLIS // For blis keep this line and comment CBLAS and BLAS
#ifdef BLIS_ENABLE_CBLAS
#define CHECK_CBLAS // For cblas keep this line and comment BLIS and BLAS
// #define CHECK_BLAS // For blas keep this line and comment BLIS only.
#endif
#ifdef CHECK_CBLAS
#include "cblas.h"
#endif
/*
* cblas_i?amin
* Finds the index of the element with minimum absolute value.
*
* Sample prototype for CBLAS interface API for SP is as follows:
*
* CBLAS_INDEX cblas_iamin (const int n, const float *x, const int incx);
*/
int main (int argc, char** argv )
{
obj_t x;
dim_t n;
num_t dt;
obj_t idx;
num_t dt_idx;
dim_t p_begin, p_end, p_inc;
dim_t p;
int n_input;
int r, n_repeats;
double dtime;
double dtime_save;
double gflops;
n_repeats = 3;
#ifndef PRINT
p_begin = 40;
p_end = 40000;
p_inc = 40;
n_input = -1;
#else
p_begin = 16;
p_end = 16;
p_inc = 1;
n_input = 16;
#endif
#if 1
dt = BLIS_FLOAT;
// dt = BLIS_DOUBLE;
#else
dt = BLIS_SCOMPLEX;
// dt = BLIS_DCOMPLEX;
#endif
dt_idx = BLIS_INT;
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
#ifdef BLIS
printf( "data_iaminv_blis" );
#else
printf( "data_iaminv_%s", BLAS );
#endif
printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n",
( unsigned long )(p - p_begin)/p_inc + 1,
( unsigned long )0, 0.0 );
for ( p = p_end; p_begin <= p; p -= p_inc )
{
if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
else n = ( dim_t ) n_input;
bli_obj_create( dt, n, 1, 0, 0, &x );
bli_obj_create( dt_idx, 1, 1, 0, 0, &idx );
bli_randm( &x );
dtime_save = 1.0e9;
f77_int min_index = -1;
#ifdef BLIS
dim_t* indxp = NULL;
#endif
for ( r = 0; r < n_repeats; ++r )
{
dtime = bli_clock();
#ifdef PRINT
bli_printm( "x", &x, "%4.3f", "" );
#endif
#ifdef BLIS
bli_aminv( &x,
&idx );
#else
if ( bli_is_float( dt ) )
{
f77_int nn = bli_obj_length( &x );
f77_int incx = bli_obj_vector_inc( &x );
float* xp = bli_obj_buffer( &x );
#ifndef CHECK_BLAS
min_index = cblas_isamin( nn,
xp,
incx );
#else
min_index = isamin_( &nn,
xp,
&incx );
#endif
}
else if (bli_is_double( dt ) )
{
f77_int nn = bli_obj_length( &x );
f77_int incx = bli_obj_vector_inc( &x );
double* xp = bli_obj_buffer( &x );
#ifndef CHECK_BLAS
min_index = cblas_idamin( nn,
xp,
incx );
#else
min_index = idamin_( &nn,
xp,
&incx );
#endif
}
else if ( bli_is_scomplex( dt ) )
{
f77_int nn = bli_obj_length( &x );
f77_int incx = bli_obj_vector_inc( &x );
scomplex* xp = bli_obj_buffer( &x );
#ifndef CHECK_BLAS
min_index = cblas_icamin( nn,
xp,
incx );
#else
min_index = icamin_( &nn,
xp,
&incx );
#endif
}
else if ( bli_is_dcomplex( dt ) )
{
f77_int nn = bli_obj_length( &x );
f77_int incx = bli_obj_vector_inc( &x );
dcomplex* xp = bli_obj_buffer( &x );
#ifndef CHECK_BLAS
min_index = cblas_izamin( nn,
xp,
incx );
#else
min_index = izamin_( &nn,
xp,
&incx );
#endif
}
#endif
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
gflops = ( 1.0 * n ) / dtime_save / 1.0e9;
if ( bli_obj_is_complex( &x ) ) gflops *= 2.0;
#ifdef BLIS
printf( "data_iaminv_blis" );
indxp = (dim_t *)bli_obj_buffer( &idx );
printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ], Min Index = %d;\n",
( unsigned long )(p - p_begin)/p_inc + 1,
( unsigned long )n, gflops, indxp );
#else
printf( "data_iaminv_%s", BLAS );
printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ], Min Index = %d;\n",
( unsigned long )(p - p_begin)/p_inc + 1,
( unsigned long )n, gflops, min_index );
#endif
bli_obj_free( &x );
bli_obj_free( &idx );
}
bli_finalize();
return 0;
}