Merge branch 'dev'

This commit is contained in:
Field G. Van Zee
2021-07-09 18:10:46 -05:00
89 changed files with 2570 additions and 1191 deletions

View File

@@ -1297,17 +1297,17 @@ bli_malloc_user
bli_mbool_create
bli_mbool_free
bli_mbool_init
bli_membrk_acquire_m
bli_membrk_compute_pool_block_sizes
bli_membrk_compute_pool_block_sizes_dt
bli_membrk_finalize
bli_membrk_finalize_pools
bli_membrk_init
bli_membrk_init_pools
bli_membrk_pool_size
bli_membrk_query
bli_membrk_release
bli_membrk_rntm_set_membrk
bli_pba_acquire_m
bli_pba_compute_pool_block_sizes
bli_pba_compute_pool_block_sizes_dt
bli_pba_finalize
bli_pba_finalize_pools
bli_pba_init
bli_pba_init_pools
bli_pba_pool_size
bli_pba_query
bli_pba_release
bli_pba_rntm_set_pba
bli_memsys_finalize
bli_memsys_init
bli_mkherm

View File

@@ -53,7 +53,7 @@ This index provides a quick way to jump directly to the description for each ope
* **[Level-3](BLISObjectAPI.md#level-3-operations)**: Operations with matrices that are multiplication-like:
* [gemm](BLISObjectAPI.md#gemm), [hemm](BLISObjectAPI.md#hemm), [herk](BLISObjectAPI.md#herk), [her2k](BLISObjectAPI.md#her2k), [symm](BLISObjectAPI.md#symm), [syrk](BLISObjectAPI.md#syrk), [syr2k](BLISObjectAPI.md#syr2k), [trmm](BLISObjectAPI.md#trmm), [trmm3](BLISObjectAPI.md#trmm3), [trsm](BLISObjectAPI.md#trsm)
* **[Utility](BLISObjectAPI.md#Utility-operations)**: Miscellaneous operations on matrices and vectors:
* [asumv](BLISObjectAPI.md#asumv), [norm1v](BLISObjectAPI.md#norm1v), [normfv](BLISObjectAPI.md#normfv), [normiv](BLISObjectAPI.md#normiv), [norm1m](BLISObjectAPI.md#norm1m), [normfm](BLISObjectAPI.md#normfm), [normim](BLISObjectAPI.md#normim), [mkherm](BLISObjectAPI.md#mkherm), [mksymm](BLISObjectAPI.md#mksymm), [mktrim](BLISObjectAPI.md#mktrim), [fprintv](BLISObjectAPI.md#fprintv), [fprintm](BLISObjectAPI.md#fprintm),[printv](BLISObjectAPI.md#printv), [printm](BLISObjectAPI.md#printm), [randv](BLISObjectAPI.md#randv), [randm](BLISObjectAPI.md#randm), [sumsqv](BLISObjectAPI.md#sumsqv), [getijm](BLISObjectAPI.md#getijm), [setijm](BLISObjectAPI.md#setijm)
* [asumv](BLISObjectAPI.md#asumv), [norm1v](BLISObjectAPI.md#norm1v), [normfv](BLISObjectAPI.md#normfv), [normiv](BLISObjectAPI.md#normiv), [norm1m](BLISObjectAPI.md#norm1m), [normfm](BLISObjectAPI.md#normfm), [normim](BLISObjectAPI.md#normim), [mkherm](BLISObjectAPI.md#mkherm), [mksymm](BLISObjectAPI.md#mksymm), [mktrim](BLISObjectAPI.md#mktrim), [fprintv](BLISObjectAPI.md#fprintv), [fprintm](BLISObjectAPI.md#fprintm),[printv](BLISObjectAPI.md#printv), [printm](BLISObjectAPI.md#printm), [randv](BLISObjectAPI.md#randv), [randm](BLISObjectAPI.md#randm), [sumsqv](BLISObjectAPI.md#sumsqv), [getsc](BLISObjectAPI.md#getsc), [getijv](BLISObjectAPI.md#getijv), [getijm](BLISObjectAPI.md#getijm), [setsc](BLISObjectAPI.md#setsc), [setijv](BLISObjectAPI.md#setijv), [setijm](BLISObjectAPI.md#setijm), [eqsc](BLISObjectAPI.md#eqsc), [eqv](BLISObjectAPI.md#eqv), [eqm](BLISObjectAPI.md#eqm)
@@ -790,6 +790,8 @@ Perform
```
where `x` and `y` are vectors of length _n_.
Observed object properties: `conj?(x)`.
---
#### dotv
@@ -807,6 +809,8 @@ Perform
```
where `x` and `y` are vectors of length _n_, and `rho` is a scalar.
Observed object properties: `conj?(x)`, `conj?(y)`.
---
#### dotxv
@@ -826,6 +830,8 @@ Perform
```
where `x` and `y` are vectors of length _n_, and `alpha`, `beta`, and `rho` are scalars.
Observed object properties: `conj?(alpha)`, `conj?(beta)`, `conj?(x)`, `conj?(y)`.
---
#### invertv
@@ -2125,6 +2131,34 @@ where, on entry, `scale` and `sumsq` contain `scale_old` and `sumsq_old`, respec
---
#### getsc
```c
void bli_getsc
(
obj_t* chi,
double* zeta_r,
double* zeta_i
)
```
Copy the real and imaginary values from the scalar object `chi` to `zeta_r` and `zeta_i`. If `chi` is stored as a real type, then `zeta_i` is set to zero. (If `chi` is stored in single precision, the corresponding elements are typecast/promoted during the copy.)
---
#### getijv
```c
err_t bli_getijv
(
dim_t i,
obj_t* b,
double* ar,
double* ai
)
```
Copy the real and imaginary values at the `i`th element of vector object `x` to `ar` and `ai`. If elements of `x` are stored as real types, then only `ar` is overwritten and `ai` is left unchanged. (If `x` contains elements stored in single precision, the corresponding elements are typecast/promoted during the copy.)
If either the element offset `i` is beyond the vector dimension of `x` or less than zero, the function returns `BLIS_FAILURE` without taking any action. Similarly, if `x` is a global scalar constant such as `BLIS_ONE`, the function returns `BLIS_FAILURE`.
---
#### getijm
```c
err_t bli_getijm
@@ -2136,8 +2170,38 @@ err_t bli_getijm
double* ai
)
```
Copy the real and imaginary values at the (`i`,`j`) element of object `b` to `ar` and `ai`. f elements of `b` are stored as real types, then only `ar` is overwritten and `ai` is left unchanged. (If `b` contains elements stored in single precision, the corresponding elements are typecast/promoted during the copy.)
If either the row offset `i` is beyond the _m_ dimension of `b`, or column offset `j` is beyond the _n_ dimension of `b`, the function does not perform any copy and returns `BLIS_FAILURE`. Similarly, if `b` is a global scalar constant such as `BLIS_ONE`, `BLIS_FAILURE` is returned.
Copy the real and imaginary values at the (`i`,`j`) element of object `b` to `ar` and `ai`. If elements of `b` are stored as real types, then only `ar` is overwritten and `ai` is left unchanged. (If `b` contains elements stored in single precision, the corresponding elements are typecast/promoted during the copy.)
If either the row offset `i` is beyond the _m_ dimension of `b` or less than zero, or column offset `j` is beyond the _n_ dimension of `b` or less than zero, the function returns `BLIS_FAILURE` without taking any action. Similarly, if `b` is a global scalar constant such as `BLIS_ONE`, the function returns `BLIS_FAILURE`.
---
#### setsc
```c
void bli_setsc
(
double* zeta_r,
double* zeta_i,
obj_t* chi
);
```
Copy real and imaginary values `zeta_r` and `zeta_i` to the scalar object `chi`. If `chi` is stored as a real type, then `zeta_i` is ignored. (If `chi` is stored in single precision, the contents are typecast/demoted during the copy.)
---
#### setijv
```c
err_t bli_setijv
(
double ar,
double ai,
dim_t i,
obj_t* x
);
```
Copy real and imaginary values `ar` and `ai` to the `i`th element of vector object `x`. If elements of `x` are stored as real types, then only `ar` is copied and `ai` is ignored. (If `x` contains elements stored in single precision, the corresponding elements are typecast/demoted during the copy.)
If the element offset `i` is beyond the vector dimension of `x` or less than zero, the function returns `BLIS_FAILURE` without taking any action. Similarly, if `x` is a global scalar constant such as `BLIS_ONE`, the function returns `BLIS_FAILURE`.
---
#### setijm
```c
@@ -2151,7 +2215,59 @@ err_t bli_setijm
);
```
Copy real and imaginary values `ar` and `ai` to the (`i`,`j`) element of object `b`. If elements of `b` are stored as real types, then only `ar` is copied and `ai` is ignored. (If `b` contains elements stored in single precision, the corresponding elements are typecast/demoted during the copy.)
If either the row offset `i` is beyond the _m_ dimension of `b`, or column offset `j` is beyond the _n_ dimension of `b`, the function does not perform any copy and returns `BLIS_FAILURE`. Similarly, if `b` is a global scalar constant such as `BLIS_ONE`, `BLIS_FAILURE` is returned.
If either the row offset `i` is beyond the _m_ dimension of `b` or less than zero, or column offset `j` is beyond the _n_ dimension of `b` or less than zero, the function returns `BLIS_FAILURE` without taking any action. Similarly, if `b` is a global scalar constant such as `BLIS_ONE`, the function returns `BLIS_FAILURE`.
---
#### eqsc
```c
void bli_eqsc
(
obj_t chi,
obj_t psi,
bool* is_eq
);
```
Perform an element-wise comparison between scalars `chi` and `psi` and store the boolean result in the `bool` pointed to by `is_eq`.
If exactly one of `conj(chi)` or `conj(psi)` (but not both) indicate a conjugation, then one of the scalars will be implicitly conjugated for purposes of the comparision.
Observed object properties: `conj?(chi)`, `conj?(psi)`.
---
#### eqv
```c
void bli_eqv
(
obj_t x,
obj_t y,
bool* is_eq
);
```
Perform an element-wise comparison between vectors `x` and `y` and store the boolean result in the `bool` pointed to by `is_eq`.
If exactly one of `conj(x)` or `conj(y)` (but not both) indicate a conjugation, then one of the vectors will be implicitly conjugated for purposes of the comparision.
Observed object properties: `conj?(x)`, `conj?(y)`.
---
#### eqm
```c
void bli_eqm
(
obj_t a,
obj_t b,
bool* is_eq
);
```
Perform an element-wise comparison between matrices `A` and `B` and store the boolean result in the `bool` pointed to by `is_eq`.
Here, `A` is stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix with arbitrary diagonal offset and unit or non-unit diagonal.
If `diag(A)` indicates a unit diagonal, the diagonals of both matrices will be ignored for purposes of the comparision.
If `uplo(A)` indicates lower or upper storage, only that part of both matrices `A` and `B` will be referenced.
If exactly one of `trans(A)` or `trans(B)` (but not both) indicate a transposition, then one of the matrices will be transposed for purposes of the comparison.
Similarly, if exactly one of `trans(A)` or `trans(B)` (but not both) indicate a conjugation, then one of the matrices will be implicitly conjugated for purposes of the comparision.
Observed object properties: `diagoff(A)`, `diag(A)`, `uplo(A)`, `trans?(A)`, `trans?(B)`.

View File

@@ -48,7 +48,7 @@ This index provides a quick way to jump directly to the description for each ope
* **[Level-3](BLISTypedAPI.md#level-3-operations)**: Operations with matrices that are multiplication-like:
* [gemm](BLISTypedAPI.md#gemm), [hemm](BLISTypedAPI.md#hemm), [herk](BLISTypedAPI.md#herk), [her2k](BLISTypedAPI.md#her2k), [symm](BLISTypedAPI.md#symm), [syrk](BLISTypedAPI.md#syrk), [syr2k](BLISTypedAPI.md#syr2k), [trmm](BLISTypedAPI.md#trmm), [trmm3](BLISTypedAPI.md#trmm3), [trsm](BLISTypedAPI.md#trsm)
* **[Utility](BLISTypedAPI.md#Utility-operations)**: Miscellaneous operations on matrices and vectors:
* [asumv](BLISTypedAPI.md#asumv), [norm1v](BLISTypedAPI.md#norm1v), [normfv](BLISTypedAPI.md#normfv), [normiv](BLISTypedAPI.md#normiv), [norm1m](BLISTypedAPI.md#norm1m), [normfm](BLISTypedAPI.md#normfm), [normim](BLISTypedAPI.md#normim), [mkherm](BLISTypedAPI.md#mkherm), [mksymm](BLISTypedAPI.md#mksymm), [mktrim](BLISTypedAPI.md#mktrim), [fprintv](BLISTypedAPI.md#fprintv), [fprintm](BLISTypedAPI.md#fprintm),[printv](BLISTypedAPI.md#printv), [printm](BLISTypedAPI.md#printm), [randv](BLISTypedAPI.md#randv), [randm](BLISTypedAPI.md#randm), [sumsqv](BLISTypedAPI.md#sumsqv)
* [asumv](BLISTypedAPI.md#asumv), [norm1v](BLISTypedAPI.md#norm1v), [normfv](BLISTypedAPI.md#normfv), [normiv](BLISTypedAPI.md#normiv), [norm1m](BLISTypedAPI.md#norm1m), [normfm](BLISTypedAPI.md#normfm), [normim](BLISTypedAPI.md#normim), [mkherm](BLISTypedAPI.md#mkherm), [mksymm](BLISTypedAPI.md#mksymm), [mktrim](BLISTypedAPI.md#mktrim), [fprintv](BLISTypedAPI.md#fprintv), [fprintm](BLISTypedAPI.md#fprintm),[printv](BLISTypedAPI.md#printv), [printm](BLISTypedAPI.md#printm), [randv](BLISTypedAPI.md#randv), [randm](BLISTypedAPI.md#randm), [sumsqv](BLISTypedAPI.md#sumsqv), [getsc](BLISTypedAPI.md#getsc), [getijv](BLISTypedAPI.md#getijv), [getijm](BLISTypedAPI.md#getijm), [setsc](BLISTypedAPI.md#setsc), [setijv](BLISTypedAPI.md#setijv), [setijm](BLISTypedAPI.md#setijm), [eqsc](BLISTypedAPI.md#eqsc), [eqv](BLISTypedAPI.md#eqv), [eqm](BLISTypedAPI.md#eqm)
@@ -1695,6 +1695,149 @@ where, on entry, `scale` and `sumsq` contain `scale_old` and `sumsq_old`, respec
---
#### getsc
```c
void bli_getsc
(
ctype* chi,
double* zeta_r,
double* zeta_i
)
```
Copy the real and imaginary values from the scalar object `chi` to `zeta_r` and `zeta_i`. If `chi` is stored as a real type, then `zeta_i` is set to zero. (If `chi` is stored in single precision, the corresponding elements are typecast/promoted during the copy.)
---
#### getijv
```c
err_t bli_?getijv
(
dim_t i,
ctype* x, incx,
double* ar,
double* ai
)
```
Copy the real and imaginary values at the `i`th element of vector `x` to `ar` and `ai`. For real domain invocations, only `ar` is overwritten and `ai` is left unchanged. (If `x` contains elements stored in single precision, the corresponding elements are typecast/promoted during the copy.)
Note that the object-based analogue of [getijv](BLISObjectAPI.md#getijv) does bounds checking of the vector element offset `i` against the vector length while the typed functions specified above do not (since the vector length is not given).
---
#### getijm
```c
err_t bli_?getijm
(
dim_t i,
dim_t j,
ctype* b, inc_t rs_b, inc_t cs_b,
double* ar,
double* ai
)
```
Copy the real and imaginary values at the (`i`,`j`) element of object `b` to `ar` and `ai`. For real domain invocations, only `ar` is overwritten and `ai` is left unchanged. (If `b` contains elements stored in single precision, the corresponding elements are typecast/promoted during the copy.)
Note that the object-based analogue of [getijm](BLISObjectAPI.md#getijm) does bounds checking of the matrix element offsets (`i`,`j`) against the matrix dimensions while the typed functions specified above do not (since the matrix dimensions are not given).
---
#### setsc
```c
void bli_setsc
(
double* zeta_r,
double* zeta_i,
ctype* chi
);
```
Copy real and imaginary values `zeta_r` and `zeta_i` to the scalar object `chi`. If `chi` is stored as a real type, then `zeta_i` is ignored. (If `chi` is stored in single precision, the contents are typecast/demoted during the copy.)
---
#### setijv
```c
err_t bli_?setijv
(
double ar,
double ai,
dim_t i,
ctype* x, incx
);
```
Copy real and imaginary values `ar` and `ai` to the `i`th element of vector object `x`. For real domain invocations, only `ar` is copied and `ai` is ignored. (If `x` contains elements stored in single precision, the corresponding elements are typecast/demoted during the copy.)
Note that the object-based analogue of [setijv](BLISObjectAPI.md#setijv) does bounds checking of the vector element offset `i` against the vector length while the typed functions specified above do not (since the vector length is not given).
---
#### setijm
```c
err_t bli_?setijm
(
double ar,
double ai,
dim_t i,
dim_t j,
ctype* b, inc_t rs_b, inc_t cs_b
);
```
Copy real and imaginary values `ar` and `ai` to the (`i`,`j`) element of object `b`. For real domain invocations, only `ar` is copied and `ai` is ignored. (If `b` contains elements stored in single precision, the corresponding elements are typecast/demoted during the copy.)
Note that the object-based analogue of [setijm](BLISObjectAPI.md#setijm) does bounds checking of the matrix element offsets (`i`,`j`) against the matrix dimensions while the typed functions specified above do not (since the matrix dimensions are not given).
---
#### eqsc
```c
void bli_?eqsc
(
conj_t conjchi,
ctype* chi,
ctype* psi,
bool* is_eq
);
```
Perform an element-wise comparison between scalars `chi` and `psi` and store the boolean result in the `bool` pointed to by `is_eq`.
If `conjchi` indicates a conjugation, `chi` will be implicitly conjugated for purposes of the comparision.
---
#### eqv
```c
void bli_?eqv
(
conj_t conjx,
dim_t n,
ctype* x, inc_t incx,
ctype* y, inc_t incy,
bool* is_eq
);
```
Perform an element-wise comparison between length _n_ vectors `x` and `y` and store the boolean result in the `bool` pointed to by `is_eq`.
If `conjx` indicates a conjugation, `x` will be implicitly conjugated for purposes of the comparision.
---
#### eqm
```c
void bli_?eqm
(
doff_t diagoffa,
diag_t diaga,
uplo_t uploa,
trans_t transa,
dim_t m,
dim_t n,
ctype* a, inc_t rs_a, inc_t cs_a,
ctype* b, inc_t rs_b, inc_t cs_b,
bool* is_eq
)
```
Perform an element-wise comparison between matrices `A` and `B` and store the boolean result in the `bool` pointed to by `is_eq`.
Here, `B` is an _m x n_ matrix, `A` is stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix with arbitrary diagonal offset and unit or non-unit diagonal.
If `diaga` indicates a unit diagonal, the diagonals of both matrices will be ignored for purposes of the comparision.
If `uploa` indicates lower or upper storage, only that part of matrix `A` will be referenced in the comparison.
If `transa` indicates a conjugation and/or transposition, then `A` will be conjugated and/or transposed for purposes of the comparison.
## Level-3 microkernels

View File

@@ -17,6 +17,7 @@ project, as well as those we think a new user or developer might ask. If you do
* [What is a macrokernel?](FAQ.md#what-is-a-macrokernel)
* [What is a context?](FAQ.md#what-is-a-context)
* [I am used to thinking in terms of column-major/row-major storage and leading dimensions. What is a "row stride" / "column stride"?](FAQ.md#im-used-to-thinking-in-terms-of-column-majorrow-major-storage-and-leading-dimensions-what-is-a-row-stride--column-stride)
* [Why does BLIS have vector (level-1v) and matrix (level-1m) variations of most level-1 operations?](FAQ.md#why-does-blis-have-vector-level-1v-and-matrix-level-1m-variations-of-most-level-1-operations)
* [What does it mean when a matrix with general stride is column-tilted or row-tilted?](FAQ.md#what-does-it-mean-when-a-matrix-with-general-stride-is-column-tilted-or-row-tilted)
* [I am not really interested in all of these newfangled features in BLIS. Can I just use BLIS as a BLAS library?](FAQ.md#im-not-really-interested-in-all-of-these-newfangled-features-in-blis-can-i-just-use-blis-as-a-blas-library)
* [What about CBLAS?](FAQ.md#what-about-cblas)
@@ -117,6 +118,16 @@ In generalized storage, we have a row stride and a column stride. The row stride
BLIS also supports situations where both the row stride and column stride are non-unit. We call this situation "general stride".
### Why does BLIS have vector (level-1v) and matrix (level-1m) variations of most level-1 operations?
At first glance, it might appear that an element-wise operation such as `copym` or `axpym` would be sufficiently general purpose to cover the cases where the operands are vectors. After all, an *m x 1* matrix can be viewed as a vector of length m and vice versa. But in BLIS, operations on vectors are treated slightly differently than operations on matrices.
If an application wishes to perform an element-wise operation on two objects, and the application calls a level-1m operation, the dimensions of those objects must be conformal, or "match up" (after any transposition implied by the object properties). This includes situations where one of the dimensions is unit.
However, if an application instead decides to perform an element-wise operation on two objects, and the application calls a level-1v operation, the dimension constraints are slightly relaxed. In this scenario, BLIS only checks that the vector *lengths* are equal. This allows for the vectors to have different orientations (row vs column) while still being considered conformal. So, you could perform a `copyv` operation to copy from an *m x 1* vector to a *1 x m* vector. A `copym` operation on such objects would not be allowed (unless it was executed with the source object containing an implicit transposition).
Another way to think about level-1v operations is that they will work with any two matrix objects in situations where (a) the corresponding level-1m operation *would have* worked if the input had been transposed, and (b) all operands happen to be vectors (i.e., have one unit dimension).
### What does it mean when a matrix with general stride is column-tilted or row-tilted?
When a matrix is stored with general stride, both the row stride and column stride (let's call them `rs` and `cs`) are non-unit. When `rs` < `cs`, we call the general stride matrix "column-tilted" because it is "closer" to being column-stored (than row-stored). Similarly, when `rs` > `cs`, the matrix is "row-tilted" because it is closer to being row-stored.

View File

@@ -87,6 +87,7 @@ void PASTEMAC(opname,_check) \
GENFRONT( absqsc )
GENFRONT( normfsc )
// -----------------------------------------------------------------------------
void bli_getsc_check
(
@@ -352,3 +353,37 @@ void bli_l0_xx2sc_check
bli_check_error_code( e_val );
}
void bli_l0_xxbsc_check
(
obj_t* chi,
obj_t* psi,
bool* is_eq
)
{
err_t e_val;
// Check object datatypes.
e_val = bli_check_noninteger_object( chi );
bli_check_error_code( e_val );
e_val = bli_check_noninteger_object( psi );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_scalar_object( chi );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( psi );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( chi );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( psi );
bli_check_error_code( e_val );
}

View File

@@ -129,7 +129,6 @@ void PASTEMAC(opname,_check) \
GENTPROT( zipsc )
// -----------------------------------------------------------------------------
void bli_l0_xsc_check
@@ -148,3 +147,10 @@ void bli_l0_xx2sc_check
obj_t* chi,
obj_t* norm
);
void bli_l0_xxbsc_check
(
obj_t* chi,
obj_t* psi,
bool* is_eq
);

View File

@@ -175,4 +175,3 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
INSERT_GENTDEFR( zipsc )

View File

@@ -69,8 +69,8 @@ void PASTEMAC0(opname) \
\
f \
( \
buf_chi, \
buf_absq \
buf_chi, \
buf_absq \
); \
}
@@ -105,9 +105,9 @@ void PASTEMAC0(opname) \
\
f \
( \
conjchi, \
buf_chi, \
buf_psi \
conjchi, \
buf_chi, \
buf_psi \
); \
}
@@ -142,8 +142,8 @@ void PASTEMAC0(opname) \
\
f \
( \
conjchi, \
buf_chi \
conjchi, \
buf_chi \
); \
}
@@ -175,8 +175,8 @@ void PASTEMAC0(opname) \
\
f \
( \
buf_chi, \
buf_psi \
buf_chi, \
buf_psi \
); \
}
@@ -218,9 +218,9 @@ void PASTEMAC0(opname) \
\
f \
( \
buf_chi, \
zeta_r, \
zeta_i \
buf_chi, \
zeta_r, \
zeta_i \
); \
}
@@ -252,9 +252,9 @@ void PASTEMAC0(opname) \
\
f \
( \
zeta_r, \
zeta_i, \
buf_chi \
zeta_r, \
zeta_i, \
buf_chi \
); \
}
@@ -295,9 +295,9 @@ void PASTEMAC0(opname) \
\
f \
( \
buf_chi, \
buf_zeta_r, \
buf_zeta_i \
buf_chi, \
buf_zeta_r, \
buf_zeta_i \
); \
}
@@ -332,9 +332,9 @@ void PASTEMAC0(opname) \
\
f \
( \
buf_zeta_i, \
buf_zeta_r, \
buf_chi \
buf_zeta_i, \
buf_zeta_r, \
buf_chi \
); \
}

View File

@@ -128,9 +128,3 @@ BLIS_EXPORT_BLIS void PASTEMAC0(opname) \
GENPROT( zipsc )

View File

@@ -41,18 +41,22 @@
// Prototype object APIs (expert and non-expert).
#include "bli_oapi_ex.h"
#include "bli_l1v_oapi.h"
#include "bli_xapi_undef.h"
#include "bli_oapi_ba.h"
#include "bli_l1v_oapi.h"
#include "bli_xapi_undef.h"
// Prototype typed APIs (expert and non-expert).
#include "bli_tapi_ex.h"
#include "bli_l1v_tapi.h"
#include "bli_l1v_ft.h"
#include "bli_xapi_undef.h"
#include "bli_tapi_ba.h"
#include "bli_l1v_tapi.h"
#include "bli_l1v_ft.h"
#include "bli_xapi_undef.h"
// Generate function pointer arrays for tapi functions (expert only).
#include "bli_l1v_fpa.h"

View File

@@ -117,7 +117,7 @@ siz_t bli_packv_init_pack
dim_t dim_a = bli_obj_vector_dim( a );
dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx );
membrk_t* membrk = bli_cntx_membrk( cntx );
pba_t* pba = bli_cntx_pba( cntx );
#if 0
mem_t* mem_p;
@@ -156,9 +156,7 @@ siz_t bli_packv_init_pack
{
// If the mem_t object of p has not yet been allocated, then acquire
// a memory block suitable for a vector.
bli_membrk_acquire_v( membrk,
size_p,
mem_p );
bli_pba_acquire_v( pba, size_p, mem_p );
}
else
{
@@ -166,11 +164,9 @@ siz_t bli_packv_init_pack
// re-acquire the memory so there is sufficient space.
if ( bli_mem_size( mem_p ) < size_p )
{
bli_membrk_release( mem_p );
bli_pba_release( mem_p );
bli_membrk_acquire_v( membrk,
size_p,
mem_p );
bli_pba_acquire_v( pba, size_p, mem_p );
}
}

View File

@@ -37,18 +37,22 @@
// Prototype object APIs (expert and non-expert).
#include "bli_oapi_ex.h"
#include "bli_l1d_oapi.h"
#include "bli_xapi_undef.h"
#include "bli_oapi_ba.h"
#include "bli_l1d_oapi.h"
#include "bli_xapi_undef.h"
// Prototype typed APIs (expert and non-expert).
#include "bli_tapi_ex.h"
#include "bli_l1d_tapi.h"
#include "bli_l1d_ft.h"
#include "bli_xapi_undef.h"
#include "bli_tapi_ba.h"
#include "bli_l1d_tapi.h"
#include "bli_l1d_ft.h"
#include "bli_xapi_undef.h"
// Generate function pointer arrays for tapi functions (expert only).
#include "bli_l1d_fpa.h"

View File

@@ -40,18 +40,22 @@
// Prototype object APIs (expert and non-expert).
#include "bli_oapi_ex.h"
#include "bli_l1f_oapi.h"
#include "bli_xapi_undef.h"
#include "bli_oapi_ba.h"
#include "bli_l1f_oapi.h"
#include "bli_xapi_undef.h"
// Prototype typed APIs (expert and non-expert).
#include "bli_tapi_ex.h"
#include "bli_l1f_tapi.h"
#include "bli_l1f_ft.h"
#include "bli_xapi_undef.h"
#include "bli_tapi_ba.h"
#include "bli_l1f_tapi.h"
#include "bli_l1f_ft.h"
#include "bli_xapi_undef.h"
// Generate function pointer arrays for tapi functions (expert only).
#include "bli_l1f_fpa.h"

View File

@@ -43,18 +43,22 @@
// Prototype object APIs (expert and non-expert).
#include "bli_oapi_ex.h"
#include "bli_l1m_oapi.h"
#include "bli_xapi_undef.h"
#include "bli_oapi_ba.h"
#include "bli_l1m_oapi.h"
#include "bli_xapi_undef.h"
// Prototype typed APIs (expert and non-expert).
#include "bli_tapi_ex.h"
#include "bli_l1m_tapi.h"
#include "bli_l1m_ft.h"
#include "bli_xapi_undef.h"
#include "bli_tapi_ba.h"
#include "bli_l1m_tapi.h"
#include "bli_l1m_ft.h"
#include "bli_xapi_undef.h"
// Generate function pointer arrays for tapi functions (expert only).
#include "bli_l1m_fpa.h"

View File

@@ -57,25 +57,6 @@ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \
INSERT_GENTDEF( addm )
INSERT_GENTDEF( subm )
// copym
#undef GENTDEF
#define GENTDEF( ctype, ch, opname, tsuf ) \
\
typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \
( \
doff_t diagoffx, \
diag_t diagx, \
uplo_t uplox, \
trans_t transx, \
dim_t m, \
dim_t n, \
ctype* x, inc_t rs_x, inc_t cs_x, \
ctype* y, inc_t rs_y, inc_t cs_y \
BLIS_TAPI_EX_PARAMS \
);
INSERT_GENTDEF( copym )
// axpym

View File

@@ -78,17 +78,17 @@ void PASTEMAC(opname,EX_SUF) \
PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \
\
f \
( \
diagoffx, \
diagx, \
uplox, \
transx, \
m, \
n, \
buf_x, rs_x, cs_x, \
buf_y, rs_y, cs_y, \
cntx, \
rntm \
( \
diagoffx, \
diagx, \
uplox, \
transx, \
m, \
n, \
buf_x, rs_x, cs_x, \
buf_y, rs_y, cs_y, \
cntx, \
rntm \
); \
}
@@ -146,18 +146,18 @@ void PASTEMAC(opname,EX_SUF) \
PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \
\
f \
( \
diagoffx, \
diagx, \
uplox, \
transx, \
m, \
n, \
buf_alpha, \
buf_x, rs_x, cs_x, \
buf_y, rs_y, cs_y, \
cntx, \
rntm \
( \
diagoffx, \
diagx, \
uplox, \
transx, \
m, \
n, \
buf_alpha, \
buf_x, rs_x, cs_x, \
buf_y, rs_y, cs_y, \
cntx, \
rntm \
); \
}
@@ -223,17 +223,17 @@ void PASTEMAC(opname,EX_SUF) \
PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \
\
f \
( \
BLIS_NO_CONJUGATE, /* internal conjugation applied during copy-cast. */ \
diagoffx, \
diagx, \
uplox, \
m, \
n, \
buf_alpha, \
buf_x, rs_x, cs_x, \
cntx, \
rntm \
( \
BLIS_NO_CONJUGATE, /* internal conjugation applied during copy-cast. */ \
diagoffx, \
diagx, \
uplox, \
m, \
n, \
buf_alpha, \
buf_x, rs_x, cs_x, \
cntx, \
rntm \
); \
}
@@ -285,17 +285,17 @@ void PASTEMAC(opname,EX_SUF) \
PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \
\
f \
( \
BLIS_NO_CONJUGATE, /* internal conjugation applied during copy-cast. */ \
diagoffx, \
diagx, \
uplox, \
m, \
n, \
buf_alpha, \
buf_x, rs_x, cs_x, \
cntx, \
rntm \
( \
BLIS_NO_CONJUGATE, /* internal conjugation applied during copy-cast. */ \
diagoffx, \
diagx, \
uplox, \
m, \
n, \
buf_alpha, \
buf_x, rs_x, cs_x, \
cntx, \
rntm \
); \
}
@@ -354,18 +354,18 @@ void PASTEMAC(opname,EX_SUF) \
PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \
\
f \
( \
diagoffx, \
diagx, \
uplox, \
transx, \
m, \
n, \
buf_x, rs_x, cs_x, \
buf_beta, \
buf_y, rs_y, cs_y, \
cntx, \
rntm \
( \
diagoffx, \
diagx, \
uplox, \
transx, \
m, \
n, \
buf_x, rs_x, cs_x, \
buf_beta, \
buf_y, rs_y, cs_y, \
cntx, \
rntm \
); \
}
@@ -420,17 +420,17 @@ void PASTEMAC(opname,EX_SUF) \
\
f \
( \
diagoffx, \
diagx, \
uplox, \
transx, \
m, \
n, \
buf_x, rs_x, cs_x, \
buf_beta, \
buf_y, rs_y, cs_y, \
cntx, \
rntm \
diagoffx, \
diagx, \
uplox, \
transx, \
m, \
n, \
buf_x, rs_x, cs_x, \
buf_beta, \
buf_y, rs_y, cs_y, \
cntx, \
rntm \
); \
}

View File

@@ -57,15 +57,12 @@ void PASTEMAC(ch,opname) \
{ \
const num_t dt = PASTEMAC(ch,type); \
\
ctype* x1; \
ctype* y1; \
uplo_t uplox_eff; \
conj_t conjx; \
dim_t n_iter; \
dim_t n_elem, n_elem_max; \
dim_t n_elem_max; \
inc_t ldx, incx; \
inc_t ldy, incy; \
dim_t j, i; \
dim_t ij0, n_shift; \
\
/* Set various loop parameters. */ \
@@ -88,62 +85,65 @@ void PASTEMAC(ch,opname) \
/* Handle dense and upper/lower storage cases separately. */ \
if ( bli_is_dense( uplox_eff ) ) \
{ \
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
n_elem = n_elem_max; \
const dim_t n_elem = n_elem_max; \
\
x1 = x + (j )*ldx + (0 )*incx; \
y1 = y + (j )*ldy + (0 )*incy; \
ctype* x1 = x + (j )*ldx + (0 )*incx; \
ctype* y1 = y + (j )*ldy + (0 )*incy; \
\
/* Invoke the kernel with the appropriate parameters. */ \
f( \
conjx, \
n_elem, \
x1, incx, \
y1, incy, \
cntx \
); \
f \
( \
conjx, \
n_elem, \
x1, incx, \
y1, incy, \
cntx \
); \
} \
} \
else \
{ \
if ( bli_is_upper( uplox_eff ) ) \
{ \
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
const dim_t n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
\
x1 = x + (ij0+j )*ldx + (0 )*incx; \
y1 = y + (ij0+j )*ldy + (0 )*incy; \
ctype* x1 = x + (ij0+j )*ldx + (0 )*incx; \
ctype* y1 = y + (ij0+j )*ldy + (0 )*incy; \
\
/* Invoke the kernel with the appropriate parameters. */ \
f( \
conjx, \
n_elem, \
x1, incx, \
y1, incy, \
cntx \
); \
f \
( \
conjx, \
n_elem, \
x1, incx, \
y1, incy, \
cntx \
); \
} \
} \
else if ( bli_is_lower( uplox_eff ) ) \
{ \
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
n_elem = n_elem_max - i; \
const dim_t offi = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
const dim_t n_elem = n_elem_max - offi; \
\
x1 = x + (j )*ldx + (ij0+i )*incx; \
y1 = y + (j )*ldy + (ij0+i )*incy; \
ctype* x1 = x + (j )*ldx + (ij0+offi )*incx; \
ctype* y1 = y + (j )*ldy + (ij0+offi )*incy; \
\
/* Invoke the kernel with the appropriate parameters. */ \
f( \
conjx, \
n_elem, \
x1, incx, \
y1, incy, \
cntx \
); \
f \
( \
conjx, \
n_elem, \
x1, incx, \
y1, incy, \
cntx \
); \
} \
} \
} \
@@ -174,15 +174,12 @@ void PASTEMAC(ch,opname) \
{ \
const num_t dt = PASTEMAC(ch,type); \
\
ctype* x1; \
ctype* y1; \
uplo_t uplox_eff; \
conj_t conjx; \
dim_t n_iter; \
dim_t n_elem, n_elem_max; \
dim_t n_elem_max; \
inc_t ldx, incx; \
inc_t ldy, incy; \
dim_t j, i; \
dim_t ij0, n_shift; \
\
/* Set various loop parameters. */ \
@@ -205,65 +202,68 @@ void PASTEMAC(ch,opname) \
/* Handle dense and upper/lower storage cases separately. */ \
if ( bli_is_dense( uplox_eff ) ) \
{ \
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
n_elem = n_elem_max; \
const dim_t n_elem = n_elem_max; \
\
x1 = x + (j )*ldx + (0 )*incx; \
y1 = y + (j )*ldy + (0 )*incy; \
ctype* x1 = x + (j )*ldx + (0 )*incx; \
ctype* y1 = y + (j )*ldy + (0 )*incy; \
\
/* Invoke the kernel with the appropriate parameters. */ \
f( \
conjx, \
n_elem, \
alpha, \
x1, incx, \
y1, incy, \
cntx \
); \
f \
( \
conjx, \
n_elem, \
alpha, \
x1, incx, \
y1, incy, \
cntx \
); \
} \
} \
else \
{ \
if ( bli_is_upper( uplox_eff ) ) \
{ \
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
const dim_t n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
\
x1 = x + (ij0+j )*ldx + (0 )*incx; \
y1 = y + (ij0+j )*ldy + (0 )*incy; \
ctype* x1 = x + (ij0+j )*ldx + (0 )*incx; \
ctype* y1 = y + (ij0+j )*ldy + (0 )*incy; \
\
/* Invoke the kernel with the appropriate parameters. */ \
f( \
conjx, \
n_elem, \
alpha, \
x1, incx, \
y1, incy, \
cntx \
); \
f \
( \
conjx, \
n_elem, \
alpha, \
x1, incx, \
y1, incy, \
cntx \
); \
} \
} \
else if ( bli_is_lower( uplox_eff ) ) \
{ \
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
n_elem = n_elem_max - i; \
const dim_t offi = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
const dim_t n_elem = n_elem_max - offi; \
\
x1 = x + (j )*ldx + (ij0+i )*incx; \
y1 = y + (j )*ldy + (ij0+i )*incy; \
ctype* x1 = x + (j )*ldx + (ij0+offi )*incx; \
ctype* y1 = y + (j )*ldy + (ij0+offi )*incy; \
\
/* Invoke the kernel with the appropriate parameters. */ \
f( \
conjx, \
n_elem, \
alpha, \
x1, incx, \
y1, incy, \
cntx \
); \
f \
( \
conjx, \
n_elem, \
alpha, \
x1, incx, \
y1, incy, \
cntx \
); \
} \
} \
} \
@@ -292,12 +292,10 @@ void PASTEMAC(ch,opname) \
{ \
const num_t dt = PASTEMAC(ch,type); \
\
ctype* x1; \
uplo_t uplox_eff; \
dim_t n_iter; \
dim_t n_elem, n_elem_max; \
dim_t n_elem_max; \
inc_t ldx, incx; \
dim_t j, i; \
dim_t ij0, n_shift; \
\
/* Set various loop parameters. */ \
@@ -317,59 +315,62 @@ void PASTEMAC(ch,opname) \
/* Handle dense and upper/lower storage cases separately. */ \
if ( bli_is_dense( uplox_eff ) ) \
{ \
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
n_elem = n_elem_max; \
const dim_t n_elem = n_elem_max; \
\
x1 = x + (j )*ldx + (0 )*incx; \
ctype* x1 = x + (j )*ldx + (0 )*incx; \
\
/* Invoke the kernel with the appropriate parameters. */ \
f( \
conjalpha, \
n_elem, \
alpha, \
x1, incx, \
cntx \
); \
f \
( \
conjalpha, \
n_elem, \
alpha, \
x1, incx, \
cntx \
); \
} \
} \
else \
{ \
if ( bli_is_upper( uplox_eff ) ) \
{ \
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
const dim_t n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
\
x1 = x + (ij0+j )*ldx + (0 )*incx; \
ctype* x1 = x + (ij0+j )*ldx + (0 )*incx; \
\
/* Invoke the kernel with the appropriate parameters. */ \
f( \
conjalpha, \
n_elem, \
alpha, \
x1, incx, \
cntx \
); \
f \
( \
conjalpha, \
n_elem, \
alpha, \
x1, incx, \
cntx \
); \
} \
} \
else if ( bli_is_lower( uplox_eff ) ) \
{ \
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
n_elem = n_elem_max - i; \
const dim_t offi = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
const dim_t n_elem = n_elem_max - offi; \
\
x1 = x + (j )*ldx + (ij0+i )*incx; \
ctype* x1 = x + (j )*ldx + (ij0+offi )*incx; \
\
/* Invoke the kernel with the appropriate parameters. */ \
f( \
conjalpha, \
n_elem, \
alpha, \
x1, incx, \
cntx \
); \
f \
( \
conjalpha, \
n_elem, \
alpha, \
x1, incx, \
cntx \
); \
} \
} \
} \
@@ -399,15 +400,12 @@ void PASTEMAC(ch,opname) \
{ \
const num_t dt = PASTEMAC(ch,type); \
\
ctype* x1; \
ctype* y1; \
uplo_t uplox_eff; \
conj_t conjx; \
dim_t n_iter; \
dim_t n_elem, n_elem_max; \
dim_t n_elem_max; \
inc_t ldx, incx; \
inc_t ldy, incy; \
dim_t j, i; \
dim_t ij0, n_shift; \
\
/* Set various loop parameters. */ \
@@ -430,65 +428,68 @@ void PASTEMAC(ch,opname) \
/* Handle dense and upper/lower storage cases separately. */ \
if ( bli_is_dense( uplox_eff ) ) \
{ \
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
n_elem = n_elem_max; \
const dim_t n_elem = n_elem_max; \
\
x1 = x + (j )*ldx + (0 )*incx; \
y1 = y + (j )*ldy + (0 )*incy; \
ctype* x1 = x + (j )*ldx + (0 )*incx; \
ctype* y1 = y + (j )*ldy + (0 )*incy; \
\
/* Invoke the kernel with the appropriate parameters. */ \
f( \
conjx, \
n_elem, \
x1, incx, \
beta, \
y1, incy, \
cntx \
); \
f \
( \
conjx, \
n_elem, \
x1, incx, \
beta, \
y1, incy, \
cntx \
); \
} \
} \
else \
{ \
if ( bli_is_upper( uplox_eff ) ) \
{ \
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
const dim_t n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
\
x1 = x + (ij0+j )*ldx + (0 )*incx; \
y1 = y + (ij0+j )*ldy + (0 )*incy; \
ctype* x1 = x + (ij0+j )*ldx + (0 )*incx; \
ctype* y1 = y + (ij0+j )*ldy + (0 )*incy; \
\
/* Invoke the kernel with the appropriate parameters. */ \
f( \
conjx, \
n_elem, \
x1, incx, \
beta, \
y1, incy, \
cntx \
); \
f \
( \
conjx, \
n_elem, \
x1, incx, \
beta, \
y1, incy, \
cntx \
); \
} \
} \
else if ( bli_is_lower( uplox_eff ) ) \
{ \
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
n_elem = n_elem_max - i; \
const dim_t offi = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
const dim_t n_elem = n_elem_max - offi; \
\
x1 = x + (j )*ldx + (ij0+i )*incx; \
y1 = y + (j )*ldy + (ij0+i )*incy; \
ctype* x1 = x + (j )*ldx + (ij0+offi )*incx; \
ctype* y1 = y + (j )*ldy + (ij0+offi )*incy; \
\
/* Invoke the kernel with the appropriate parameters. */ \
f( \
conjx, \
n_elem, \
x1, incx, \
beta, \
y1, incy, \
cntx \
); \
f \
( \
conjx, \
n_elem, \
x1, incx, \
beta, \
y1, incy, \
cntx \
); \
} \
} \
} \
@@ -515,15 +516,12 @@ void PASTEMAC2(chx,chy,opname) \
rntm_t* rntm \
) \
{ \
ctype_x* restrict x1; \
ctype_y* restrict y1; \
uplo_t uplox_eff; \
dim_t n_iter; \
dim_t n_elem, n_elem_max; \
inc_t ldx, incx; \
inc_t ldy, incy; \
dim_t j, i; \
dim_t ij0, n_shift; \
uplo_t uplox_eff; \
dim_t n_iter; \
dim_t n_elem_max; \
inc_t ldx, incx; \
inc_t ldy, incy; \
dim_t ij0, n_shift; \
\
/* Set various loop parameters. */ \
bli_set_dims_incs_uplo_2m \
@@ -542,35 +540,32 @@ void PASTEMAC2(chx,chy,opname) \
{ \
if ( incx == 1 && incy == 1 ) \
{ \
n_elem = n_elem_max; \
const dim_t n_elem = n_elem_max; \
\
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
x1 = x + (j )*ldx + (0 )*incx; \
y1 = y + (j )*ldy + (0 )*incy; \
ctype_x* restrict x1 = x + (j )*ldx + (0 )*incx; \
ctype_y* restrict y1 = y + (j )*ldy + (0 )*incy; \
\
ctype_x* restrict chi1 = x1; \
ctype_y* restrict psi1 = y1; \
\
for ( i = 0; i < n_elem; ++i ) \
for ( dim_t i = 0; i < n_elem; ++i ) \
{ \
PASTEMAC2(chx,chy,adds)( chi1[i], psi1[i] ); \
PASTEMAC2(chx,chy,adds)( x1[i], y1[i] ); \
} \
} \
} \
else \
{ \
n_elem = n_elem_max; \
const dim_t n_elem = n_elem_max; \
\
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
x1 = x + (j )*ldx + (0 )*incx; \
y1 = y + (j )*ldy + (0 )*incy; \
ctype_x* restrict x1 = x + (j )*ldx + (0 )*incx; \
ctype_y* restrict y1 = y + (j )*ldy + (0 )*incy; \
\
ctype_x* restrict chi1 = x1; \
ctype_y* restrict psi1 = y1; \
\
for ( i = 0; i < n_elem; ++i ) \
for ( dim_t i = 0; i < n_elem; ++i ) \
{ \
PASTEMAC2(chx,chy,adds)( *chi1, *psi1 ); \
\
@@ -584,35 +579,32 @@ void PASTEMAC2(chx,chy,opname) \
{ \
if ( incx == 1 && incy == 1 ) \
{ \
n_elem = n_elem_max; \
const dim_t n_elem = n_elem_max; \
\
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
x1 = x + (j )*ldx + (0 )*incx; \
y1 = y + (j )*ldy + (0 )*incy; \
ctype_x* restrict x1 = x + (j )*ldx + (0 )*incx; \
ctype_y* restrict y1 = y + (j )*ldy + (0 )*incy; \
\
ctype_x* restrict chi1 = x1; \
ctype_y* restrict psi1 = y1; \
\
for ( i = 0; i < n_elem; ++i ) \
for ( dim_t i = 0; i < n_elem; ++i ) \
{ \
PASTEMAC3(chx,chy,chy,xpbys)( chi1[i], *beta, psi1[i] ); \
PASTEMAC3(chx,chy,chy,xpbys)( x1[i], *beta, y1[i] ); \
} \
} \
} \
else \
{ \
n_elem = n_elem_max; \
const dim_t n_elem = n_elem_max; \
\
for ( j = 0; j < n_iter; ++j ) \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
x1 = x + (j )*ldx + (0 )*incx; \
y1 = y + (j )*ldy + (0 )*incy; \
ctype_x* restrict x1 = x + (j )*ldx + (0 )*incx; \
ctype_y* restrict y1 = y + (j )*ldy + (0 )*incy; \
\
ctype_x* restrict chi1 = x1; \
ctype_y* restrict psi1 = y1; \
\
for ( i = 0; i < n_elem; ++i ) \
for ( dim_t i = 0; i < n_elem; ++i ) \
{ \
PASTEMAC3(chx,chy,chy,xpbys)( *chi1, *beta, *psi1 ); \
\

View File

@@ -45,13 +45,14 @@ cntl_t* bli_unpackm_cntl_create_node
{
cntl_t* cntl;
unpackm_params_t* params;
err_t r_val;
// NOTE: If this function is ever called, figure out whether the
// bli_malloc_intl() below needs to be changed to bli_sba_acquire().
bli_abort();
// Allocate an unpackm_params_t struct.
params = bli_malloc_intl( sizeof( unpackm_params_t ) );
params = bli_malloc_intl( sizeof( unpackm_params_t ), &r_val );
// Initialize the unpackm_params_t struct.
params->size = sizeof( unpackm_params_t );

View File

@@ -40,18 +40,22 @@
// Prototype object APIs (expert and non-expert).
#include "bli_oapi_ex.h"
#include "bli_l2_oapi.h"
#include "bli_xapi_undef.h"
#include "bli_oapi_ba.h"
#include "bli_l2_oapi.h"
#include "bli_xapi_undef.h"
// Prototype typed APIs (expert and non-expert).
#include "bli_tapi_ex.h"
#include "bli_l2_tapi.h"
#include "bli_l2_ft.h"
#include "bli_xapi_undef.h"
#include "bli_tapi_ba.h"
#include "bli_l2_tapi.h"
#include "bli_l2_ft.h"
#include "bli_xapi_undef.h"
// Generate function pointer arrays for tapi functions (expert only).
#include "bli_l2_fpa.h"

View File

@@ -37,7 +37,7 @@
#include "bli_l3_check.h"
// Define function types.
#include "bli_l3_ft_ex.h"
//#include "bli_l3_ft_ex.h"
#include "bli_l3_ft_ukr.h"
#include "bli_l3_oft.h"
#include "bli_l3_oft_var.h"
@@ -50,16 +50,20 @@
// Prototype object APIs (expert and non-expert).
#include "bli_oapi_ex.h"
#include "bli_l3_oapi.h"
#include "bli_xapi_undef.h"
#include "bli_oapi_ba.h"
#include "bli_l3_oapi.h"
#include "bli_xapi_undef.h"
// Prototype typed APIs (expert and non-expert).
#include "bli_tapi_ex.h"
#include "bli_l3_tapi.h"
#include "bli_xapi_undef.h"
#include "bli_tapi_ba.h"
#include "bli_l3_tapi.h"
#include "bli_xapi_undef.h"
// Define function types for small/unpacked handlers/kernels.
#include "bli_l3_sup_oft.h"

View File

@@ -91,7 +91,7 @@ void bli_l3_packm
// The chief thread acquires a block from the memory broker
// and saves the associated mem_t entry to local_mem_s.
bli_membrk_acquire_m
bli_pba_acquire_m
(
rntm,
size_needed,
@@ -130,12 +130,12 @@ void bli_l3_packm
// The chief thread releases the existing block associated with
// the mem_t entry in the control tree, and then re-acquires a
// new block, saving the associated mem_t entry to local_mem_s.
bli_membrk_release
bli_pba_release
(
rntm,
cntl_mem_p
);
bli_membrk_acquire_m
bli_pba_acquire_m
(
rntm,
size_needed,

View File

@@ -86,7 +86,7 @@ void PASTEMAC(ch,opname) \
function before the other threads have a chance to copy
from it. (A barrier would fix that race condition, but
then again, I prefer to keep barriers to a minimum.) */ \
bli_membrk_acquire_m \
bli_pba_acquire_m \
( \
rntm, \
size_needed, \
@@ -130,12 +130,12 @@ void PASTEMAC(ch,opname) \
above for why the acquisition needs to be directly to
the chief thread's passed-in mem_t and not a local
(temporary) mem_t. */ \
bli_membrk_release \
bli_pba_release \
( \
rntm, \
mem \
); \
bli_membrk_acquire_m \
bli_pba_acquire_m \
( \
rntm, \
size_needed, \
@@ -194,7 +194,7 @@ void PASTEMAC(ch,opname) \
is allocated, which it should be. */ \
if ( bli_mem_is_alloc( mem ) ) \
{ \
bli_membrk_release \
bli_pba_release \
( \
rntm, \
mem \

View File

@@ -86,7 +86,7 @@ void PASTEMAC(ch,opname) \
function before the other threads have a chance to copy
from it. (A barrier would fix that race condition, but
then again, I prefer to keep barriers to a minimum.) */ \
bli_membrk_acquire_m \
bli_pba_acquire_m \
( \
rntm, \
size_needed, \
@@ -130,12 +130,12 @@ void PASTEMAC(ch,opname) \
above for why the acquisition needs to be directly to
the chief thread's passed-in mem_t and not a local
(temporary) mem_t. */ \
bli_membrk_release \
bli_pba_release \
( \
rntm, \
mem \
); \
bli_membrk_acquire_m \
bli_pba_acquire_m \
( \
rntm, \
size_needed, \
@@ -194,7 +194,7 @@ void PASTEMAC(ch,opname) \
is allocated, which it should be. */ \
if ( bli_mem_is_alloc( mem ) ) \
{ \
bli_membrk_release \
bli_pba_release \
( \
rntm, \
mem \

View File

@@ -39,12 +39,19 @@ void bli_apool_init
apool_t* restrict apool
)
{
err_t r_val;
// NOTE: The apool_t is only used in one place; it is the type used to
// define the sba. We've switched to static initialization of the mutex
// field to remove one more thing that could possibly go wrong during
// library initialization.
// Query the mutex from the apool_t.
bli_pthread_mutex_t* restrict mutex = bli_apool_mutex( apool );
//bli_pthread_mutex_t* restrict mutex = bli_apool_mutex( apool );
// Initialize the mutex.
//*mutex = BLIS_PTHREAD_MUTEX_INITIALIZER;
bli_pthread_mutex_init( mutex, NULL );
//bli_pthread_mutex_init( mutex, NULL );
// We choose to start with:
// - an empty pool
@@ -87,7 +94,7 @@ void bli_apool_init
// Allocate the block_ptrs array.
array_t** restrict block_ptrs
=
bli_malloc_intl( block_ptrs_len * sizeof( array_t* ) );
bli_malloc_intl( block_ptrs_len * sizeof( array_t* ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_apool_init(): allocating %d array_t.\n", ( int )num_blocks );
@@ -136,6 +143,8 @@ void bli_apool_alloc_block
array_t** restrict array_p
)
{
err_t r_val;
// Since the apool_t is defined as a pool of array_t, we can hard-code
// the block_size parameter.
const siz_t block_size = sizeof( array_t );
@@ -149,7 +158,7 @@ void bli_apool_alloc_block
// be recovered when it's time to free the block.
array_t* restrict array
=
bli_malloc_intl( block_size );
bli_malloc_intl( block_size, &r_val );
// Initialize an array_t struct within the newly allocated memory region.
bli_array_init( num_elem, sizeof( pool_t* ), array );
@@ -212,11 +221,14 @@ void bli_apool_finalize
apool_t* restrict apool
)
{
// NOTE: Since the apool_t's mutex is now initialized statically, we no
// longer need to explicitly destroy it.
// Query the mutex from the apool_t.
bli_pthread_mutex_t* restrict mutex = bli_apool_mutex( apool );
//bli_pthread_mutex_t* restrict mutex = bli_apool_mutex( apool );
// Destroy the mutex.
bli_pthread_mutex_destroy( mutex );
//bli_pthread_mutex_destroy( mutex );
// Query the underlying pool_t and mutex from the apool_t.
pool_t* restrict pool = bli_apool_pool( apool );
@@ -368,6 +380,8 @@ pool_t* bli_apool_array_elem
array_t* restrict array
)
{
err_t r_val;
// Query the array element corresponding to index.
// NOTE: If we knew that the array_t contained elements of size
// sizeof( void* ) or sizeof( whatever ), we could return the *value*
@@ -417,7 +431,7 @@ pool_t* bli_apool_array_elem
#endif
// Allocate the pool_t.
pool = bli_malloc_intl( sizeof( pool_t ) );
pool = bli_malloc_intl( sizeof( pool_t ), &r_val );
// Initialize the pool_t.
bli_pool_init
@@ -453,6 +467,8 @@ void bli_apool_grow
apool_t* restrict apool
)
{
err_t r_val;
// If the requested increase is zero, return early.
if ( num_blocks_add == 0 ) return;
@@ -493,7 +509,7 @@ void bli_apool_grow
// Allocate a new block_ptrs array.
array_t** restrict block_ptrs_new
=
bli_malloc_intl( block_ptrs_len_new * sizeof( array_t* ) );
bli_malloc_intl( block_ptrs_len_new * sizeof( array_t* ), &r_val );
// Query the top_index of the pool.
const siz_t top_index = bli_pool_top_index( pool );

View File

@@ -43,6 +43,8 @@ void bli_array_init
array_t* restrict array
)
{
err_t r_val;
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_array_init(): allocating array [%d * %d]: ",
( int )num_elem, ( int )elem_size );
@@ -52,7 +54,7 @@ void bli_array_init
const size_t array_size = num_elem * elem_size;
// Allocate the array buffer.
void* restrict buf = bli_malloc_intl( array_size );
void* restrict buf = bli_malloc_intl( array_size, &r_val );
// Initialize the array elements to zero. THIS IS IMPORANT because
// consumer threads will use the NULL-ness of the array elements to
@@ -72,6 +74,8 @@ void bli_array_resize
array_t* restrict array
)
{
err_t r_val;
// Query the number of elements in the array.
const siz_t num_elem_prev = bli_array_num_elem( array );
@@ -98,7 +102,7 @@ void bli_array_resize
#endif
// Allocate a new array buffer.
char* restrict buf_new = bli_malloc_intl( array_size_new );
char* restrict buf_new = bli_malloc_intl( array_size_new, &r_val );
// Copy the previous array contents to the new array.
memcpy( buf_new, buf_prev, array_size_prev );

View File

@@ -42,7 +42,9 @@ blksz_t* bli_blksz_create_ed
dim_t b_z, dim_t be_z
)
{
blksz_t* b = bli_malloc_intl( sizeof( blksz_t ) );
err_t r_val;
blksz_t* b = bli_malloc_intl( sizeof( blksz_t ), &r_val );
bli_blksz_init_ed
(
@@ -62,7 +64,9 @@ blksz_t* bli_blksz_create
dim_t be_s, dim_t be_d, dim_t be_c, dim_t be_z
)
{
blksz_t* b = bli_malloc_intl( sizeof( blksz_t ) );
err_t r_val;
blksz_t* b = bli_malloc_intl( sizeof( blksz_t ), &r_val );
bli_blksz_init
(

View File

@@ -192,7 +192,7 @@ void bli_cntl_free_w_thrinfo
printf( "bli_cntl_free_w_thrinfo(): releasing mem pool block.\n" );
#endif
bli_membrk_release( rntm, cntl_pack_mem );
bli_pba_release( rntm, cntl_pack_mem );
}
// Free the current node.
@@ -236,7 +236,7 @@ void bli_cntl_free_wo_thrinfo
// allocated.
if ( bli_mem_is_alloc( cntl_pack_mem ) )
{
bli_membrk_release( rntm, cntl_pack_mem );
bli_pba_release( rntm, cntl_pack_mem );
}
// Free the current node.

View File

@@ -78,33 +78,34 @@ void bli_cntx_set_blkszs( ind_t method, dim_t n_bs, ... )
va_list args;
dim_t i;
err_t r_val;
// Allocate some temporary local arrays.
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_blkszs(): " );
#endif
bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) );
bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_blkszs(): " );
#endif
blksz_t** blkszs = bli_malloc_intl( n_bs * sizeof( blksz_t* ) );
blksz_t** blkszs = bli_malloc_intl( n_bs * sizeof( blksz_t* ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_blkszs(): " );
#endif
bszid_t* bmults = bli_malloc_intl( n_bs * sizeof( bszid_t ) );
bszid_t* bmults = bli_malloc_intl( n_bs * sizeof( bszid_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_blkszs(): " );
#endif
double* dsclrs = bli_malloc_intl( n_bs * sizeof( double ) );
double* dsclrs = bli_malloc_intl( n_bs * sizeof( double ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_blkszs(): " );
#endif
double* msclrs = bli_malloc_intl( n_bs * sizeof( double ) );
double* msclrs = bli_malloc_intl( n_bs * sizeof( double ), &r_val );
// -- Begin variable argument section --
@@ -343,6 +344,7 @@ void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... )
va_list args;
dim_t i;
err_t r_val;
// Return early if called with BLIS_NAT.
if ( method == BLIS_NAT ) return;
@@ -352,17 +354,17 @@ void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... )
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_ind_blkszs(): " );
#endif
bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) );
bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_ind_blkszs(): " );
#endif
double* dsclrs = bli_malloc_intl( n_bs * sizeof( double ) );
double* dsclrs = bli_malloc_intl( n_bs * sizeof( double ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_ind_blkszs(): " );
#endif
double* msclrs = bli_malloc_intl( n_bs * sizeof( double ) );
double* msclrs = bli_malloc_intl( n_bs * sizeof( double ), &r_val );
// -- Begin variable argument section --
@@ -523,28 +525,29 @@ void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... )
va_list args;
dim_t i;
err_t r_val;
// Allocate some temporary local arrays.
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_nat_ukrs(): " );
#endif
l3ukr_t* ukr_ids = bli_malloc_intl( n_ukrs * sizeof( l3ukr_t ) );
l3ukr_t* ukr_ids = bli_malloc_intl( n_ukrs * sizeof( l3ukr_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_nat_ukrs(): " );
#endif
num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ) );
num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_nat_ukrs(): " );
#endif
void_fp* ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void_fp ) );
void_fp* ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void_fp ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_nat_ukrs(): " );
#endif
bool* ukr_prefs = bli_malloc_intl( n_ukrs * sizeof( bool ) );
bool* ukr_prefs = bli_malloc_intl( n_ukrs * sizeof( bool ), &r_val );
// -- Begin variable argument section --
@@ -680,23 +683,24 @@ void bli_cntx_set_l3_vir_ukrs( dim_t n_ukrs, ... )
va_list args;
dim_t i;
err_t r_val;
// Allocate some temporary local arrays.
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_vir_ukrs(): " );
#endif
l3ukr_t* ukr_ids = bli_malloc_intl( n_ukrs * sizeof( l3ukr_t ) );
l3ukr_t* ukr_ids = bli_malloc_intl( n_ukrs * sizeof( l3ukr_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_vir_ukrs(): " );
#endif
num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ) );
num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_vir_ukrs(): " );
#endif
void_fp* ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void_fp ) );
void_fp* ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void_fp ), &r_val );
// -- Begin variable argument section --
@@ -800,20 +804,21 @@ void bli_cntx_set_l3_sup_thresh( dim_t n_thresh, ... )
*/
va_list args;
dim_t i;
va_list args;
dim_t i;
err_t r_val;
// Allocate some temporary local arrays.
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_sup_thresh(): " );
#endif
threshid_t* threshids = bli_malloc_intl( n_thresh * sizeof( threshid_t ) );
threshid_t* threshids = bli_malloc_intl( n_thresh * sizeof( threshid_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_sup_thresh(): " );
#endif
blksz_t** threshs = bli_malloc_intl( n_thresh * sizeof( blksz_t* ) );
blksz_t** threshs = bli_malloc_intl( n_thresh * sizeof( blksz_t* ), &r_val );
// -- Begin variable argument section --
@@ -907,18 +912,19 @@ void bli_cntx_set_l3_sup_handlers( dim_t n_ops, ... )
va_list args;
dim_t i;
err_t r_val;
// Allocate some temporary local arrays.
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_sup_handlers(): " );
#endif
opid_t* op_ids = bli_malloc_intl( n_ops * sizeof( opid_t ) );
opid_t* op_ids = bli_malloc_intl( n_ops * sizeof( opid_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_sup_handlers(): " );
#endif
void** op_fps = bli_malloc_intl( n_ops * sizeof( void* ) );
void** op_fps = bli_malloc_intl( n_ops * sizeof( void* ), &r_val );
// -- Begin variable argument section --
@@ -1005,17 +1011,18 @@ void bli_cntx_set_l3_sup_blkszs( dim_t n_bs, ... )
va_list args;
dim_t i;
err_t r_val;
// Allocate some temporary local arrays.
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_blkszs(): " );
#endif
bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) );
bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_blkszs(): " );
#endif
blksz_t** blkszs = bli_malloc_intl( n_bs * sizeof( blksz_t* ) );
blksz_t** blkszs = bli_malloc_intl( n_bs * sizeof( blksz_t* ), &r_val );
// -- Begin variable argument section --
@@ -1109,28 +1116,29 @@ void bli_cntx_set_l3_sup_kers( dim_t n_ukrs, ... )
va_list args;
dim_t i;
err_t r_val;
// Allocate some temporary local arrays.
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_sup_kers(): " );
#endif
stor3_t* st3_ids = bli_malloc_intl( n_ukrs * sizeof( stor3_t ) );
stor3_t* st3_ids = bli_malloc_intl( n_ukrs * sizeof( stor3_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_sup_kers(): " );
#endif
num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ) );
num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_sup_kers(): " );
#endif
void** ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void* ) );
void** ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void* ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l3_sup_kers(): " );
#endif
bool* ukr_prefs = bli_malloc_intl( n_ukrs * sizeof( bool ) );
bool* ukr_prefs = bli_malloc_intl( n_ukrs * sizeof( bool ), &r_val );
// -- Begin variable argument section --
@@ -1287,23 +1295,24 @@ void bli_cntx_set_l1f_kers( dim_t n_kers, ... )
va_list args;
dim_t i;
err_t r_val;
// Allocate some temporary local arrays.
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l1f_kers(): " );
#endif
l1fkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1fkr_t ) );
l1fkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1fkr_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l1f_kers(): " );
#endif
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ) );
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l1f_kers(): " );
#endif
void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ) );
void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ), &r_val );
// -- Begin variable argument section --
@@ -1405,23 +1414,24 @@ void bli_cntx_set_l1v_kers( dim_t n_kers, ... )
va_list args;
dim_t i;
err_t r_val;
// Allocate some temporary local arrays.
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l1v_kers(): " );
#endif
l1vkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1vkr_t ) );
l1vkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1vkr_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l1v_kers(): " );
#endif
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ) );
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_l1v_kers(): " );
#endif
void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ) );
void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ), &r_val );
// -- Begin variable argument section --
@@ -1523,23 +1533,24 @@ void bli_cntx_set_packm_kers( dim_t n_kers, ... )
va_list args;
dim_t i;
err_t r_val;
// Allocate some temporary local arrays.
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_packm_kers(): " );
#endif
l1mkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1mkr_t ) );
l1mkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1mkr_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_packm_kers(): " );
#endif
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ) );
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ), &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_cntx_set_packm_kers(): " );
#endif
void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ) );
void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ), &r_val );
// -- Begin variable argument section --

View File

@@ -44,8 +44,9 @@ func_t* bli_func_create
)
{
func_t* f;
err_t r_val;
f = ( func_t* ) bli_malloc_intl( sizeof(func_t) );
f = ( func_t* )bli_malloc_intl( sizeof( func_t ), &r_val );
bli_func_init
(

View File

@@ -337,6 +337,8 @@ void bli_gks_register_cntx
void_fp ind_fp
)
{
err_t r_val;
// This function is called by bli_gks_init() for each architecture that
// will be supported by BLIS. It takes an architecture id and three
// function pointers, one to a function that initializes a native context
@@ -385,7 +387,7 @@ void bli_gks_register_cntx
// needs to be allocated. Allocate the memory and initialize it to
// zeros/NULL, storing the address of the alloacted memory at the element
// for the current architecture id.
gks[ id ] = bli_calloc_intl( sizeof( cntx_t* ) * BLIS_NUM_IND_METHODS );
gks[ id ] = bli_calloc_intl( sizeof( cntx_t* ) * BLIS_NUM_IND_METHODS, &r_val );
// Alias the allocated array for readability.
cntx_t** restrict gks_id = gks[ id ];
@@ -397,7 +399,7 @@ void bli_gks_register_cntx
// Allocate memory for a single context and store the address at
// the element in the gks[ id ] array that is reserved for native
// execution.
gks_id[ BLIS_NAT ] = bli_calloc_intl( sizeof( cntx_t ) );
gks_id[ BLIS_NAT ] = bli_calloc_intl( sizeof( cntx_t ), &r_val );
// Alias the allocated context address for readability.
cntx_t* restrict gks_id_nat = gks_id[ BLIS_NAT ];
@@ -494,6 +496,7 @@ cntx_t* bli_gks_query_ind_cntx
bli_init_once();
cntx_t* gks_id_ind;
err_t r_val;
// Return the address of a context that will be suited for executing a
// level-3 operation via the requested induced method (and datatype) for
@@ -552,7 +555,7 @@ cntx_t* bli_gks_query_ind_cntx
// If gks_id_ind is NULL, then we know we must allocate and then
// initialize the context, storing its address back to
// gks_id[ ind ].
gks_id_ind = bli_calloc_intl( sizeof( cntx_t ) );
gks_id_ind = bli_calloc_intl( sizeof( cntx_t ), &r_val );
gks_id[ ind ] = gks_id_ind;
// Before we can call the induced method context initialization

View File

@@ -69,14 +69,6 @@ gint_t bli_info_get_pool_addr_offset_size_a( void ) { return BLIS_POOL_ADDR_OF
gint_t bli_info_get_pool_addr_offset_size_b( void ) { return BLIS_POOL_ADDR_OFFSET_SIZE_B; }
gint_t bli_info_get_pool_addr_offset_size_c( void ) { return BLIS_POOL_ADDR_OFFSET_SIZE_C; }
gint_t bli_info_get_pool_addr_offset_size_gen( void ) { return BLIS_POOL_ADDR_OFFSET_SIZE_GEN; }
gint_t bli_info_get_enable_stay_auto_init( void )
{
#ifdef BLIS_ENABLE_STAY_AUTO_INITIALIZED
return 1;
#else
return 0;
#endif
}
gint_t bli_info_get_enable_blas( void )
{
#ifdef BLIS_ENABLE_BLAS

View File

@@ -56,18 +56,10 @@ void bli_init_auto( void )
void bli_finalize_auto( void )
{
#ifdef BLIS_ENABLE_STAY_AUTO_INITIALIZED
// If BLIS was configured to stay initialized after being automatically
// initialized, we honor the configuration request and do nothing.
// BLIS will remain initialized unless and until the user explicitly
// calls bli_finalize().
#else
bli_finalize_once();
#endif
// The _auto() functions are used when initializing the BLAS compatibility
// layer. It would not make much sense to automatically initialize and
// finalize for every BLAS routine call; therefore, we remain initialized
// unless and until the application explicitly calls bli_finalize().
}
// -----------------------------------------------------------------------------

View File

@@ -71,7 +71,7 @@ void bli_free_pool( void* p )
// -----------------------------------------------------------------------------
void* bli_malloc_user( size_t size )
void* bli_malloc_user( size_t size, err_t* r_val )
{
const malloc_ft malloc_fp = BLIS_MALLOC_USER;
const size_t align_size = BLIS_HEAP_ADDR_ALIGN_SIZE;
@@ -82,7 +82,9 @@ void* bli_malloc_user( size_t size )
fflush( stdout );
#endif
return bli_fmalloc_align( malloc_fp, size, align_size );
void* p = bli_fmalloc_align( malloc_fp, size, align_size, r_val );
return p;
}
void bli_free_user( void* p )
@@ -97,7 +99,7 @@ void bli_free_user( void* p )
// -----------------------------------------------------------------------------
void* bli_malloc_intl( size_t size )
void* bli_malloc_intl( size_t size, err_t* r_val )
{
const malloc_ft malloc_fp = BLIS_MALLOC_INTL;
@@ -106,18 +108,21 @@ void* bli_malloc_intl( size_t size )
fflush( stdout );
#endif
return bli_fmalloc_noalign( malloc_fp, size );
void* p = bli_fmalloc_noalign( malloc_fp, size, r_val );
return p;
}
void* bli_calloc_intl( size_t size )
void* bli_calloc_intl( size_t size, err_t* r_val )
{
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_calloc_intl(): " );
#endif
void* p = bli_malloc_intl( size );
void* p = bli_malloc_intl( size, r_val );
memset( p, 0, size );
if ( bli_is_success( *r_val ) )
memset( p, 0, size );
return p;
}
@@ -138,7 +143,8 @@ void* bli_fmalloc_align
(
malloc_ft f,
size_t size,
size_t align_size
size_t align_size,
err_t* r_val
)
{
const size_t ptr_size = sizeof( void* );
@@ -165,6 +171,9 @@ void* bli_fmalloc_align
if ( bli_error_checking_is_enabled() )
bli_fmalloc_post_check( p_orig );
// The pseudo-return value isn't used yet.
*r_val = BLIS_SUCCESS;
// Advance the pointer by one pointer element.
p_byte = p_orig;
p_byte += ptr_size;
@@ -226,7 +235,8 @@ void bli_ffree_align
void* bli_fmalloc_noalign
(
malloc_ft f,
size_t size
size_t size,
err_t* r_val
)
{
void* p = f( size );
@@ -235,6 +245,9 @@ void* bli_fmalloc_noalign
if ( bli_error_checking_is_enabled() )
bli_fmalloc_post_check( p );
// The pseudo-return value isn't used yet.
*r_val = BLIS_SUCCESS;
return p;
}

View File

@@ -34,8 +34,8 @@
*/
// Typedef function pointer types for malloc() and free() substitutes.
typedef void* (*malloc_ft) ( size_t size );
typedef void (*free_ft) ( void* p );
//typedef void* (*malloc_ft) ( size_t size );
//typedef void (*free_ft) ( void* p );
// -----------------------------------------------------------------------------
@@ -44,19 +44,19 @@ BLIS_EXPORT_BLIS void* bli_malloc_pool( size_t size );
BLIS_EXPORT_BLIS void bli_free_pool( void* p );
#endif
void* bli_malloc_intl( size_t size );
void* bli_calloc_intl( size_t size );
void* bli_malloc_intl( size_t size, err_t* r_val );
void* bli_calloc_intl( size_t size, err_t* r_val );
void bli_free_intl( void* p );
BLIS_EXPORT_BLIS void* bli_malloc_user( size_t size );
BLIS_EXPORT_BLIS void* bli_malloc_user( size_t size, err_t* r_val );
BLIS_EXPORT_BLIS void bli_free_user( void* p );
// -----------------------------------------------------------------------------
void* bli_fmalloc_align( malloc_ft f, size_t size, size_t align_size );
void* bli_fmalloc_align( malloc_ft f, size_t size, size_t align_size, err_t* r_val );
void bli_ffree_align( free_ft f, void* p );
void* bli_fmalloc_noalign( malloc_ft f, size_t size );
void* bli_fmalloc_noalign( malloc_ft f, size_t size, err_t* r_val );
void bli_ffree_noalign( free_ft f, void* p );
void bli_fmalloc_align_check( malloc_ft f, size_t size, size_t align_size );

View File

@@ -44,8 +44,9 @@ mbool_t* bli_mbool_create
)
{
mbool_t* b;
err_t r_val;
b = ( mbool_t* ) bli_malloc_intl( sizeof(mbool_t) );
b = ( mbool_t* ) bli_malloc_intl( sizeof( mbool_t ), &r_val );
bli_mbool_init
(

View File

@@ -39,7 +39,7 @@
void bli_memsys_init( void )
{
// Query a native context so we have something to pass into
// bli_membrk_init_pools(). We use BLIS_DOUBLE for the datatype,
// bli_pba_init_pools(). We use BLIS_DOUBLE for the datatype,
// but the dt argument is actually only used when initializing
// contexts for induced methods.
// NOTE: Instead of calling bli_gks_query_cntx(), we call
@@ -47,7 +47,7 @@ void bli_memsys_init( void )
cntx_t* cntx_p = bli_gks_query_cntx_noinit();
// Initialize the packing block allocator and its data structures.
bli_membrk_init( cntx_p );
bli_pba_init( cntx_p );
// Initialize the small block allocator and its data structures.
bli_sba_init();
@@ -58,7 +58,7 @@ void bli_memsys_finalize( void )
// Finalize the small block allocator and its data structures.
bli_sba_finalize();
// Finalize the global membrk_t object and its data structures.
bli_membrk_finalize();
// Finalize the packing block allocator and its data structures.
bli_pba_finalize();
}

View File

@@ -147,6 +147,7 @@ void bli_obj_alloc_buffer
siz_t elem_size;
siz_t buffer_size;
void* p;
err_t r_val;
bli_init_once();
@@ -195,7 +196,7 @@ void bli_obj_alloc_buffer
buffer_size = ( siz_t )n_elem * elem_size;
// Allocate the buffer.
p = bli_malloc_user( buffer_size );
p = bli_malloc_user( buffer_size, &r_val );
// Set individual fields.
bli_obj_set_buffer( p, obj );

View File

@@ -57,22 +57,22 @@ void bli_pack_finalize( void )
// -----------------------------------------------------------------------------
dim_t bli_pack_get_pack_a( void )
void bli_pack_get_pack_a( bool* pack_a )
{
// We must ensure that global_rntm has been initialized.
bli_init_once();
return bli_rntm_pack_a( &global_rntm );
*pack_a = bli_rntm_pack_a( &global_rntm );
}
// -----------------------------------------------------------------------------
dim_t bli_pack_get_pack_b( void )
void bli_pack_get_pack_b( bool* pack_b )
{
// We must ensure that global_rntm has been initialized.
bli_init_once();
return bli_rntm_pack_b( &global_rntm );
*pack_b = bli_rntm_pack_b( &global_rntm );
}
// ----------------------------------------------------------------------------
@@ -101,7 +101,7 @@ void bli_pack_set_pack_b( bool pack_b )
// Acquire the mutex protecting global_rntm.
bli_pthread_mutex_lock( &global_rntm_mutex );
bli_rntm_set_pack_a( pack_b, &global_rntm );
bli_rntm_set_pack_b( pack_b, &global_rntm );
// Release the mutex protecting global_rntm.
bli_pthread_mutex_unlock( &global_rntm_mutex );

View File

@@ -38,10 +38,10 @@
void bli_pack_init( void );
void bli_pack_finalize( void );
BLIS_EXPORT_BLIS dim_t bli_pack_get_pack_a( void );
BLIS_EXPORT_BLIS dim_t bli_pack_get_pack_b( void );
BLIS_EXPORT_BLIS void bli_pack_set_pack_a( bool pack_a );
BLIS_EXPORT_BLIS void bli_pack_set_pack_b( bool pack_b );
BLIS_EXPORT_BLIS void bli_pack_get_pack_a( bool* pack_a );
BLIS_EXPORT_BLIS void bli_pack_get_pack_b( bool* pack_b );
BLIS_EXPORT_BLIS void bli_pack_set_pack_a( bool pack_a );
BLIS_EXPORT_BLIS void bli_pack_set_pack_b( bool pack_b );
void bli_pack_init_rntm_from_env( rntm_t* rntm );

View File

@@ -36,55 +36,61 @@
#include "blis.h"
static membrk_t global_membrk;
// Statically initialize the mutex within the packing block allocator object.
static pba_t pba = { .mutex = BLIS_PTHREAD_MUTEX_INITIALIZER };
// -----------------------------------------------------------------------------
membrk_t* bli_membrk_query( void )
pba_t* bli_pba_query( void )
{
return &global_membrk;
return &pba;
}
void bli_membrk_init
void bli_pba_init
(
cntx_t* restrict cntx
)
{
membrk_t* restrict membrk = bli_membrk_query();
pba_t* restrict pba = bli_pba_query();
const siz_t align_size = BLIS_POOL_ADDR_ALIGN_SIZE_GEN;
malloc_ft malloc_fp = BLIS_MALLOC_POOL;
free_ft free_fp = BLIS_FREE_POOL;
// These fields are used for general-purpose allocation (ie: buf_type
// equal to BLIS_BUFFER_FOR_GEN_USE) within bli_membrk_acquire_m().
bli_membrk_set_align_size( align_size, membrk );
bli_membrk_set_malloc_fp( malloc_fp, membrk );
bli_membrk_set_free_fp( free_fp, membrk );
// equal to BLIS_BUFFER_FOR_GEN_USE) within bli_pba_acquire_m().
bli_pba_set_align_size( align_size, pba );
bli_pba_set_malloc_fp( malloc_fp, pba );
bli_pba_set_free_fp( free_fp, pba );
// The mutex field of pba is initialized statically above. This
// keeps bli_pba_init() simpler and removes the possibility of
// something going wrong during mutex initialization.
bli_membrk_init_mutex( membrk );
#ifdef BLIS_ENABLE_PBA_POOLS
bli_membrk_init_pools( cntx, membrk );
bli_pba_init_pools( cntx, pba );
#endif
}
void bli_membrk_finalize
void bli_pba_finalize
(
void
)
{
membrk_t* restrict membrk = bli_membrk_query();
bli_membrk_set_malloc_fp( NULL, membrk );
bli_membrk_set_free_fp( NULL, membrk );
pba_t* restrict pba = bli_pba_query();
#ifdef BLIS_ENABLE_PBA_POOLS
bli_membrk_finalize_pools( membrk );
bli_pba_finalize_pools( pba );
#endif
bli_membrk_finalize_mutex( membrk );
// The mutex field of pba is initialized statically above, and
// therefore never destroyed.
bli_pba_set_malloc_fp( NULL, pba );
bli_pba_set_free_fp( NULL, pba );
}
void bli_membrk_acquire_m
void bli_pba_acquire_m
(
rntm_t* rntm,
siz_t req_size,
@@ -95,37 +101,38 @@ void bli_membrk_acquire_m
pool_t* pool;
pblk_t* pblk;
dim_t pi;
err_t r_val;
// If the internal memory pools for packing block allocator are disabled,
// we spoof the buffer type as BLIS_BUFFER_FOR_GEN_USE to induce the
// immediate usage of bli_membrk_malloc().
// immediate usage of bli_pba_malloc().
#ifndef BLIS_ENABLE_PBA_POOLS
buf_type = BLIS_BUFFER_FOR_GEN_USE;
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_membrk_acquire_m(): bli_fmalloc_align(): size %ld\n",
printf( "bli_pba_acquire_m(): bli_fmalloc_align(): size %ld\n",
( long )req_size );
#endif
#endif
// Query the memory broker from the runtime.
membrk_t* membrk = bli_rntm_membrk( rntm );
pba_t* pba = bli_rntm_pba( rntm );
if ( buf_type == BLIS_BUFFER_FOR_GEN_USE )
{
malloc_ft malloc_fp = bli_membrk_malloc_fp( membrk );
siz_t align_size = bli_membrk_align_size( membrk );
malloc_ft malloc_fp = bli_pba_malloc_fp( pba );
siz_t align_size = bli_pba_align_size( pba );
// For general-use buffer requests, dynamically allocating memory
// is assumed to be sufficient.
void* buf = bli_fmalloc_align( malloc_fp, req_size, align_size );
void* buf = bli_fmalloc_align( malloc_fp, req_size, align_size, &r_val );
// Initialize the mem_t object with:
// - the address of the memory block,
// - the buffer type (a packbuf_t value),
// - the size of the requested region,
// - the membrk_t from which the mem_t entry was acquired.
// - the pba_t from which the mem_t entry was acquired.
// NOTE: We initialize the pool field to NULL since this block did not
// come from a memory pool.
bli_mem_set_buffer( buf, mem );
@@ -142,13 +149,13 @@ void bli_membrk_acquire_m
// Map the requested packed buffer type to a zero-based index, which
// we then use to select the corresponding memory pool.
pi = bli_packbuf_index( buf_type );
pool = bli_membrk_pool( pi, membrk );
pool = bli_pba_pool( pi, pba );
// Extract the address of the pblk_t struct within the mem_t.
pblk = bli_mem_pblk( mem );
// Acquire the mutex associated with the membrk object.
bli_membrk_lock( membrk );
// Acquire the mutex associated with the pba object.
bli_pba_lock( pba );
// BEGIN CRITICAL SECTION
{
@@ -166,8 +173,8 @@ void bli_membrk_acquire_m
}
// END CRITICAL SECTION
// Release the mutex associated with the membrk object.
bli_membrk_unlock( membrk );
// Release the mutex associated with the pba object.
bli_pba_unlock( pba );
// Query the block_size from the pblk_t. This will be at least
// req_size, perhaps larger.
@@ -178,7 +185,7 @@ void bli_membrk_acquire_m
// - the address of the memory pool to which it belongs,
// - the size of the contiguous memory block (NOT the size of the
// requested region),
// - the membrk_t from which the mem_t entry was acquired.
// - the pba_t from which the mem_t entry was acquired.
// The actual (aligned) address is already stored in the mem_t
// struct's pblk_t field.
bli_mem_set_buf_type( buf_type, mem );
@@ -188,7 +195,7 @@ void bli_membrk_acquire_m
}
void bli_membrk_release
void bli_pba_release
(
rntm_t* rntm,
mem_t* mem
@@ -199,21 +206,21 @@ void bli_membrk_release
pblk_t* pblk;
// Query the memory broker from the runtime.
membrk_t* membrk = bli_rntm_membrk( rntm );
pba_t* pba = bli_rntm_pba( rntm );
// Extract the buffer type so we know what kind of memory was allocated.
buf_type = bli_mem_buf_type( mem );
#ifndef BLIS_ENABLE_PBA_POOLS
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_membrk_release(): bli_ffree_align(): size %ld\n",
printf( "bli_pba_release(): bli_ffree_align(): size %ld\n",
( long )bli_mem_size( mem ) );
#endif
#endif
if ( buf_type == BLIS_BUFFER_FOR_GEN_USE )
{
free_ft free_fp = bli_membrk_free_fp( membrk );
free_ft free_fp = bli_pba_free_fp( pba );
void* buf = bli_mem_buffer( mem );
// For general-use buffers, we dynamically allocate memory, and so
@@ -229,8 +236,8 @@ void bli_membrk_release
// Extract the address of the pblk_t struct within the mem_t struct.
pblk = bli_mem_pblk( mem );
// Acquire the mutex associated with the membrk object.
bli_membrk_lock( membrk );
// Acquire the mutex associated with the pba object.
bli_pba_lock( pba );
// BEGIN CRITICAL SECTION
{
@@ -241,15 +248,15 @@ void bli_membrk_release
}
// END CRITICAL SECTION
// Release the mutex associated with the membrk object.
bli_membrk_unlock( membrk );
// Release the mutex associated with the pba object.
bli_pba_unlock( pba );
}
// Clear the mem_t object so that it appears unallocated. This clears:
// - the pblk_t struct's fields (ie: the buffer addresses)
// - the pool field
// - the size field
// - the membrk field
// - the pba field
// NOTE: We do not clear the buf_type field since there is no
// "uninitialized" value for packbuf_t.
bli_mem_clear( mem );
@@ -257,35 +264,38 @@ void bli_membrk_release
#if 0
void bli_membrk_acquire_v
void bli_pba_acquire_v
(
membrk_t* membrk,
siz_t req_size,
mem_t* mem
pba_t* pba,
siz_t req_size,
mem_t* mem
)
{
bli_membrk_acquire_m( membrk,
req_size,
BLIS_BUFFER_FOR_GEN_USE,
mem );
bli_pba_acquire_m
(
pba,
req_size,
BLIS_BUFFER_FOR_GEN_USE,
mem
);
}
#endif
void bli_membrk_rntm_set_membrk
void bli_pba_rntm_set_pba
(
rntm_t* rntm
)
{
membrk_t* membrk = bli_membrk_query();
pba_t* pba = bli_pba_query();
bli_rntm_set_membrk( membrk, rntm );
bli_rntm_set_pba( pba, rntm );
}
siz_t bli_membrk_pool_size
siz_t bli_pba_pool_size
(
membrk_t* membrk,
pba_t* pba,
packbuf_t buf_type
)
{
@@ -305,7 +315,7 @@ siz_t bli_membrk_pool_size
// Acquire the pointer to the pool corresponding to the buf_type
// provided.
pool_index = bli_packbuf_index( buf_type );
pool = bli_membrk_pool( pool_index, membrk );
pool = bli_pba_pool( pool_index, pba );
// Compute the pool "size" as the product of the block size
// and the number of blocks in the pool.
@@ -318,10 +328,10 @@ siz_t bli_membrk_pool_size
// -----------------------------------------------------------------------------
void bli_membrk_init_pools
void bli_pba_init_pools
(
cntx_t* cntx,
membrk_t* membrk
cntx_t* cntx,
pba_t* pba
)
{
// Map each of the packbuf_t values to an index starting at zero.
@@ -330,9 +340,9 @@ void bli_membrk_init_pools
const dim_t index_c = bli_packbuf_index( BLIS_BUFFER_FOR_C_PANEL );
// Alias the pool addresses to convenient identifiers.
pool_t* pool_a = bli_membrk_pool( index_a, membrk );
pool_t* pool_b = bli_membrk_pool( index_b, membrk );
pool_t* pool_c = bli_membrk_pool( index_c, membrk );
pool_t* pool_a = bli_pba_pool( index_a, pba );
pool_t* pool_b = bli_pba_pool( index_b, pba );
pool_t* pool_c = bli_pba_pool( index_c, pba );
// Start with empty pools.
const dim_t num_blocks_a = 0;
@@ -364,10 +374,10 @@ void bli_membrk_init_pools
free_ft free_fp = BLIS_FREE_POOL;
// Determine the block size for each memory pool.
bli_membrk_compute_pool_block_sizes( &block_size_a,
&block_size_b,
&block_size_c,
cntx );
bli_pba_compute_pool_block_sizes( &block_size_a,
&block_size_b,
&block_size_c,
cntx );
// Initialize the memory pools for A, B, and C.
bli_pool_init( num_blocks_a, block_ptrs_len_a, block_size_a, align_size_a,
@@ -378,9 +388,9 @@ void bli_membrk_init_pools
offset_size_c, malloc_fp, free_fp, pool_c );
}
void bli_membrk_finalize_pools
void bli_pba_finalize_pools
(
membrk_t* membrk
pba_t* pba
)
{
// Map each of the packbuf_t values to an index starting at zero.
@@ -389,9 +399,9 @@ void bli_membrk_finalize_pools
dim_t index_c = bli_packbuf_index( BLIS_BUFFER_FOR_C_PANEL );
// Alias the pool addresses to convenient identifiers.
pool_t* pool_a = bli_membrk_pool( index_a, membrk );
pool_t* pool_b = bli_membrk_pool( index_b, membrk );
pool_t* pool_c = bli_membrk_pool( index_c, membrk );
pool_t* pool_a = bli_pba_pool( index_a, pba );
pool_t* pool_b = bli_pba_pool( index_b, pba );
pool_t* pool_c = bli_pba_pool( index_c, pba );
// Finalize the memory pools for A, B, and C.
bli_pool_finalize( pool_a );
@@ -401,7 +411,7 @@ void bli_membrk_finalize_pools
// -----------------------------------------------------------------------------
void bli_membrk_compute_pool_block_sizes
void bli_pba_compute_pool_block_sizes
(
siz_t* bs_a,
siz_t* bs_b,
@@ -429,11 +439,11 @@ void bli_membrk_compute_pool_block_sizes
// Avoid considering induced methods for real datatypes.
if ( bli_is_real( dt ) && im != BLIS_NAT ) continue;
bli_membrk_compute_pool_block_sizes_dt( dt,
&bs_dt_a,
&bs_dt_b,
&bs_dt_c,
cntx );
bli_pba_compute_pool_block_sizes_dt( dt,
&bs_dt_a,
&bs_dt_b,
&bs_dt_c,
cntx );
bs_cand_a = bli_max( bs_dt_a, bs_cand_a );
bs_cand_b = bli_max( bs_dt_b, bs_cand_b );
@@ -448,7 +458,7 @@ void bli_membrk_compute_pool_block_sizes
// -----------------------------------------------------------------------------
void bli_membrk_compute_pool_block_sizes_dt
void bli_pba_compute_pool_block_sizes_dt
(
num_t dt,
siz_t* bs_a,

View File

@@ -37,83 +37,100 @@
#ifndef BLIS_MEMBRK_H
#define BLIS_MEMBRK_H
// membrk init
// Packing block allocator (formerly memory broker)
BLIS_INLINE void bli_membrk_init_mutex( membrk_t* membrk )
/*
typedef struct pba_s
{
bli_pthread_mutex_init( &(membrk->mutex), NULL );
pool_t pools[3];
bli_pthread_mutex_t mutex;
// These fields are used for general-purpose allocation.
siz_t align_size;
malloc_ft malloc_fp;
free_ft free_fp;
} pba_t;
*/
// pba init
//BLIS_INLINE void bli_pba_init_mutex( pba_t* pba )
//{
// bli_pthread_mutex_init( &(pba->mutex), NULL );
//}
//BLIS_INLINE void bli_pba_finalize_mutex( pba_t* pba )
//{
// bli_pthread_mutex_destroy( &(pba->mutex) );
//}
// pba query
BLIS_INLINE pool_t* bli_pba_pool( dim_t pool_index, pba_t* pba )
{
return &(pba->pools[ pool_index ]);
}
BLIS_INLINE void bli_membrk_finalize_mutex( membrk_t* membrk )
BLIS_INLINE siz_t bli_pba_align_size( pba_t* pba )
{
bli_pthread_mutex_destroy( &(membrk->mutex) );
return pba->align_size;
}
// membrk query
BLIS_INLINE pool_t* bli_membrk_pool( dim_t pool_index, membrk_t* membrk )
BLIS_INLINE malloc_ft bli_pba_malloc_fp( pba_t* pba )
{
return &(membrk->pools[ pool_index ]);
return pba->malloc_fp;
}
BLIS_INLINE siz_t bli_membrk_align_size( membrk_t* membrk )
BLIS_INLINE free_ft bli_pba_free_fp( pba_t* pba )
{
return membrk->align_size;
return pba->free_fp;
}
BLIS_INLINE malloc_ft bli_membrk_malloc_fp( membrk_t* membrk )
// pba modification
BLIS_INLINE void bli_pba_set_align_size( siz_t align_size, pba_t* pba )
{
return membrk->malloc_fp;
pba->align_size = align_size;
}
BLIS_INLINE free_ft bli_membrk_free_fp( membrk_t* membrk )
BLIS_INLINE void bli_pba_set_malloc_fp( malloc_ft malloc_fp, pba_t* pba )
{
return membrk->free_fp;
pba->malloc_fp = malloc_fp;
}
// membrk modification
BLIS_INLINE void bli_membrk_set_align_size( siz_t align_size, membrk_t* membrk )
BLIS_INLINE void bli_pba_set_free_fp( free_ft free_fp, pba_t* pba )
{
membrk->align_size = align_size;
pba->free_fp = free_fp;
}
BLIS_INLINE void bli_membrk_set_malloc_fp( malloc_ft malloc_fp, membrk_t* membrk )
// pba action
BLIS_INLINE void bli_pba_lock( pba_t* pba )
{
membrk->malloc_fp = malloc_fp;
bli_pthread_mutex_lock( &(pba->mutex) );
}
BLIS_INLINE void bli_membrk_set_free_fp( free_ft free_fp, membrk_t* membrk )
BLIS_INLINE void bli_pba_unlock( pba_t* pba )
{
membrk->free_fp = free_fp;
}
// membrk action
BLIS_INLINE void bli_membrk_lock( membrk_t* membrk )
{
bli_pthread_mutex_lock( &(membrk->mutex) );
}
BLIS_INLINE void bli_membrk_unlock( membrk_t* membrk )
{
bli_pthread_mutex_unlock( &(membrk->mutex) );
bli_pthread_mutex_unlock( &(pba->mutex) );
}
// -----------------------------------------------------------------------------
membrk_t* bli_membrk_query( void );
pba_t* bli_pba_query( void );
void bli_membrk_init
void bli_pba_init
(
cntx_t* cntx
);
void bli_membrk_finalize
void bli_pba_finalize
(
void
);
void bli_membrk_acquire_m
void bli_pba_acquire_m
(
rntm_t* rntm,
siz_t req_size,
@@ -121,43 +138,43 @@ void bli_membrk_acquire_m
mem_t* mem
);
void bli_membrk_release
void bli_pba_release
(
rntm_t* rntm,
mem_t* mem
);
void bli_membrk_rntm_set_membrk
void bli_pba_rntm_set_pba
(
rntm_t* rntm
);
siz_t bli_membrk_pool_size
siz_t bli_pba_pool_size
(
membrk_t* membrk,
pba_t* pba,
packbuf_t buf_type
);
// ----------------------------------------------------------------------------
void bli_membrk_init_pools
void bli_pba_init_pools
(
cntx_t* cntx,
membrk_t* membrk
cntx_t* cntx,
pba_t* pba
);
void bli_membrk_finalize_pools
void bli_pba_finalize_pools
(
membrk_t* membrk
pba_t* pba
);
void bli_membrk_compute_pool_block_sizes
void bli_pba_compute_pool_block_sizes
(
siz_t* bs_a,
siz_t* bs_b,
siz_t* bs_c,
cntx_t* cntx
);
void bli_membrk_compute_pool_block_sizes_dt
void bli_pba_compute_pool_block_sizes_dt
(
num_t dt,
siz_t* bs_a,

View File

@@ -49,6 +49,8 @@ void bli_pool_init
pool_t* restrict pool
)
{
err_t r_val;
// Make sure that block_ptrs_len is at least num_blocks.
block_ptrs_len = bli_max( block_ptrs_len, num_blocks );
@@ -62,7 +64,7 @@ void bli_pool_init
// well as pool blocks? If so, don't forget to s/bli_free_intl/free_fp/g.
pblk_t* restrict block_ptrs
=
bli_malloc_intl( block_ptrs_len * sizeof( pblk_t ) );
bli_malloc_intl( block_ptrs_len * sizeof( pblk_t ), &r_val );
// Allocate and initialize each entry in the block_ptrs array.
for ( dim_t i = 0; i < num_blocks; ++i )
@@ -343,6 +345,8 @@ void bli_pool_grow
pool_t* restrict pool
)
{
err_t r_val;
// If the requested increase is zero, return early.
if ( num_blocks_add == 0 ) return;
@@ -377,7 +381,7 @@ void bli_pool_grow
// well as pool blocks? If so, don't forget to s/bli_free_intl/free_fp/g.
pblk_t* restrict block_ptrs_new
=
bli_malloc_intl( block_ptrs_len_new * sizeof( pblk_t ) );
bli_malloc_intl( block_ptrs_len_new * sizeof( pblk_t ), &r_val );
// Query the top_index of the pool.
const siz_t top_index = bli_pool_top_index( pool );
@@ -503,6 +507,8 @@ void bli_pool_alloc_block
pblk_t* restrict block
)
{
err_t r_val;
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_pool_alloc_block(): calling fmalloc_align(): size %d (align %d, offset %d)\n",
( int )block_size, ( int )align_size, ( int )offset_size );
@@ -516,7 +522,7 @@ void bli_pool_alloc_block
// that many bytes at the beginning of the allocated memory.
void* restrict buf
=
bli_fmalloc_align( malloc_fp, block_size + offset_size, align_size );
bli_fmalloc_align( malloc_fp, block_size + offset_size, align_size, &r_val );
#if 0
// NOTE: This code is disabled because it is not needed, since

View File

@@ -36,6 +36,7 @@
bool bli_obj_equals( obj_t* a, obj_t* b )
{
#if 0
bool r_val = FALSE;
num_t dt_a;
num_t dt_b;
@@ -80,6 +81,18 @@ bool bli_obj_equals( obj_t* a, obj_t* b )
}
return r_val;
#else
bool r_val;
if ( bli_obj_is_1x1( a ) && bli_obj_is_1x1( b ) )
bli_eqsc( a, b, &r_val );
else if ( bli_obj_is_vector( a ) && bli_obj_is_vector( b ) )
bli_eqv( a, b, &r_val );
else
bli_eqm( a, b, &r_val );
return r_val;
#endif
}
bool bli_obj_imag_equals( obj_t* a, obj_t* b )

View File

@@ -52,7 +52,7 @@ typedef struct rntm_s
bool l3_sup;
pool_t* sba_pool;
membrk_t* membrk;
pba_t* pba;
} rntm_t;
*/
@@ -124,9 +124,9 @@ BLIS_INLINE pool_t* bli_rntm_sba_pool( rntm_t* rntm )
return rntm->sba_pool;
}
BLIS_INLINE membrk_t* bli_rntm_membrk( rntm_t* rntm )
BLIS_INLINE pba_t* bli_rntm_pba( rntm_t* rntm )
{
return rntm->membrk;
return rntm->pba;
}
#if 0
@@ -205,9 +205,9 @@ BLIS_INLINE void bli_rntm_set_sba_pool( pool_t* sba_pool, rntm_t* rntm )
rntm->sba_pool = sba_pool;
}
BLIS_INLINE void bli_rntm_set_membrk( membrk_t* membrk, rntm_t* rntm )
BLIS_INLINE void bli_rntm_set_pba( pba_t* pba, rntm_t* rntm )
{
rntm->membrk = membrk;
rntm->pba = pba;
}
BLIS_INLINE void bli_rntm_clear_num_threads_only( rntm_t* rntm )
@@ -222,9 +222,9 @@ BLIS_INLINE void bli_rntm_clear_sba_pool( rntm_t* rntm )
{
bli_rntm_set_sba_pool( NULL, rntm );
}
BLIS_INLINE void bli_rntm_clear_membrk( rntm_t* rntm )
BLIS_INLINE void bli_rntm_clear_pba( rntm_t* rntm )
{
bli_rntm_set_membrk( NULL, rntm );
bli_rntm_set_pba( NULL, rntm );
}
//
@@ -313,7 +313,7 @@ BLIS_INLINE void bli_rntm_clear_l3_sup( rntm_t* rntm )
.pack_b = FALSE, \
.l3_sup = TRUE, \
.sba_pool = NULL, \
.membrk = NULL, \
.pba = NULL, \
} \
BLIS_INLINE void bli_rntm_init( rntm_t* rntm )
@@ -327,7 +327,7 @@ BLIS_INLINE void bli_rntm_init( rntm_t* rntm )
bli_rntm_clear_l3_sup( rntm );
bli_rntm_clear_sba_pool( rntm );
bli_rntm_clear_membrk( rntm );
bli_rntm_clear_pba( rntm );
}
// -- rntm_t total thread calculation ------------------------------------------

View File

@@ -34,8 +34,9 @@
#include "blis.h"
// The small block allocator: an apool_t of array_t of pool_t.
static apool_t sba;
// Statically initialize the mutex within the small block allocator.
// Note that the sba is an apool_t of array_t of pool_t.
static apool_t sba = { .mutex = BLIS_PTHREAD_MUTEX_INITIALIZER };
apool_t* bli_sba_query( void )
{
@@ -61,11 +62,12 @@ void* bli_sba_acquire
)
{
void* block;
err_t r_val;
#ifdef BLIS_ENABLE_SBA_POOLS
if ( rntm == NULL )
{
block = bli_malloc_intl( req_size );
block = bli_malloc_intl( req_size, &r_val );
}
else
{
@@ -95,7 +97,7 @@ void* bli_sba_acquire
}
#else
block = bli_malloc_intl( req_size );
block = bli_malloc_intl( req_size, &r_val );
#endif

View File

@@ -59,9 +59,9 @@ err_t bli_setijm
dim_t cs = bli_obj_col_stride( b );
num_t dt = bli_obj_dt( b );
// Return error if i or j is beyond bounds of matrix/vector.
if ( m <= i ) return BLIS_FAILURE;
if ( n <= j ) return BLIS_FAILURE;
// Return error if i or j is beyond bounds of the matrix/vector.
if ( i < 0 || m <= i ) return BLIS_FAILURE;
if ( j < 0 || n <= j ) return BLIS_FAILURE;
// Don't modify scalar constants.
if ( dt == BLIS_CONSTANT ) return BLIS_FAILURE;
@@ -133,35 +133,15 @@ err_t bli_getijm
dim_t cs = bli_obj_col_stride( b );
num_t dt = bli_obj_dt( b );
// Return error if i or j is beyond bounds of matrix/vector.
if ( m <= i ) return BLIS_FAILURE;
if ( n <= j ) return BLIS_FAILURE;
// Return error if i or j is beyond bounds of the matrix/vector.
if ( i < 0 || m <= i ) return BLIS_FAILURE;
if ( j < 0 || n <= j ) return BLIS_FAILURE;
void* b_p;
#if 0
// Handle scalar constants separately.
if ( dt == BLIS_CONSTANT )
{
if ( i == 0 && j == 0 )
{
dt = BLIS_DCOMPLEX;
b_p = bli_obj_buffer_for_const( dt, b )
}
else return BLIS_FAILURE;
}
else
{
// Query the pointer to the buffer at the adjusted offsets.
b_p = bli_obj_buffer_at_off( b );
}
#else
// Disallow access into scalar constants.
if ( dt == BLIS_CONSTANT ) return BLIS_FAILURE;
// Query the pointer to the buffer at the adjusted offsets.
b_p = bli_obj_buffer_at_off( b );
#endif
void* b_p = bli_obj_buffer_at_off( b );
// Index into the function pointer array.
getijm_fp f = ftypes_getijm[ dt ];

168
frame/base/bli_setgetijv.c Normal file
View File

@@ -0,0 +1,168 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
typedef void (*setijv_fp)
(
double ar,
double ai,
dim_t i,
void* restrict x, inc_t incx
);
static setijv_fp GENARRAY(ftypes_setijv,setijv);
err_t bli_setijv
(
double ar,
double ai,
dim_t i,
obj_t* x
)
{
dim_t n = bli_obj_vector_dim( x );
dim_t incx = bli_obj_vector_inc( x );
num_t dt = bli_obj_dt( x );
// Return error if i is beyond bounds of the vector.
if ( i < 0 || n <= i ) return BLIS_FAILURE;
// Don't modify scalar constants.
if ( dt == BLIS_CONSTANT ) return BLIS_FAILURE;
// Query the pointer to the buffer at the adjusted offsets.
void* x_p = bli_obj_buffer_at_off( x );
// Index into the function pointer array.
setijv_fp f = ftypes_setijv[ dt ];
// Invoke the type-specific function.
f
(
ar,
ai,
i,
x_p, incx
);
return BLIS_SUCCESS;
}
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname ) \
\
void PASTEMAC(ch,opname) \
( \
double ar, \
double ai, \
dim_t i, \
void* restrict x, inc_t incx \
) \
{ \
ctype* restrict x_cast = ( ctype* )x; \
\
ctype* restrict x_i = x_cast + (i )*incx; \
\
PASTEMAC2(z,ch,sets)( ar, ai, *x_i ); \
}
INSERT_GENTFUNC_BASIC0( setijv )
// -----------------------------------------------------------------------------
typedef void (*getijv_fp)
(
dim_t i,
void* restrict x, inc_t incx,
double* ar,
double* ai
);
static getijv_fp GENARRAY(ftypes_getijv,getijv);
err_t bli_getijv
(
dim_t i,
obj_t* x,
double* ar,
double* ai
)
{
dim_t n = bli_obj_vector_dim( x );
dim_t incx = bli_obj_vector_inc( x );
num_t dt = bli_obj_dt( x );
// Return error if i is beyond bounds of the vector.
if ( i < 0 || n <= i ) return BLIS_FAILURE;
// Disallow access into scalar constants.
if ( dt == BLIS_CONSTANT ) return BLIS_FAILURE;
// Query the pointer to the buffer at the adjusted offsets.
void* x_p = bli_obj_buffer_at_off( x );
// Index into the function pointer array.
getijv_fp f = ftypes_getijv[ dt ];
// Invoke the type-specific function.
f
(
i,
x_p, incx,
ar,
ai
);
return BLIS_SUCCESS;
}
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname ) \
\
void PASTEMAC(ch,opname) \
( \
dim_t i, \
void* restrict x, inc_t incx, \
double* ar, \
double* ai \
) \
{ \
ctype* restrict x_cast = ( ctype* )x; \
\
ctype* restrict x_i = x_cast + (i )*incx; \
\
PASTEMAC2(ch,z,gets)( *x_i, *ar, *ai ); \
}
INSERT_GENTFUNC_BASIC0( getijv )

View File

@@ -0,0 +1,78 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
BLIS_EXPORT_BLIS err_t bli_setijv
(
double ar,
double ai,
dim_t i,
obj_t* x
);
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
( \
double ar, \
double ai, \
dim_t i, \
void* restrict x, inc_t incx \
);
INSERT_GENTPROT_BASIC0( setijv )
// -----------------------------------------------------------------------------
BLIS_EXPORT_BLIS err_t bli_getijv
(
dim_t i,
obj_t* x,
double* ar,
double* ai
);
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
( \
dim_t i, \
void* restrict b, inc_t incx, \
double* ar, \
double* ai \
);
INSERT_GENTPROT_BASIC0( getijv )

View File

@@ -149,16 +149,6 @@
#define BLIS_RELAX_MCNR_NCMR_CONSTRAINTS
#endif
// Stay initialized after auto-initialization, unless and until the user
// explicitly calls bli_finalize().
#ifdef BLIS_DISABLE_STAY_AUTO_INITIALIZED
#undef BLIS_ENABLE_STAY_AUTO_INITIALIZED
#else
// Default behavior is enabled.
#undef BLIS_ENABLE_STAY_AUTO_INITIALIZED // In case user explicitly enabled.
#define BLIS_ENABLE_STAY_AUTO_INITIALIZED
#endif
// -- BLAS COMPATIBILITY LAYER -------------------------------------------------

View File

@@ -35,7 +35,12 @@
// This file defines macros used to allow the _oapi.c files to produce
// object APIs that omit expert parameters.
// Define the macro to remove the function name suffix (in function
// Define a macro that allows the source code to determine which interface
// (basic or expert) we are compiling.
#undef BLIS_OAPI_BASIC
#define BLIS_OAPI_BASIC
// Define the macro to omit a suffix from the function names (in function
// definitions).
#undef EX_SUF
#define EX_SUF
@@ -45,14 +50,10 @@
#undef BLIS_OAPI_EX_PARAMS
#define BLIS_OAPI_EX_PARAMS
// Define the macro to declare local expert variables that are initialized
// Define the macro to add local expert variables that are initialized
// to NULL. The "( void )" statements are to prevent unused variable
// warnings by the compiler.
#undef BLIS_OAPI_EX_DECLS
#define BLIS_OAPI_EX_DECLS cntx_t* cntx = NULL; ( void )cntx; \
rntm_t* rntm = NULL; ( void )rntm;
// Define the macro to pass the local expert variables to another function.
//#undef BLIS_TAPI_EX_VARS
//#define BLIS_TAPI_EX_VARS

View File

@@ -35,8 +35,13 @@
// This file defines macros used to allow the _oapi.c files to produce
// object APIs that contain context parameters.
// Define the macro to add a suffix to the object API function names
// (in function definitions).
// Define a macro that allows the source code to determine which interface
// (basic or expert) we are compiling.
#undef BLIS_OAPI_EXPERT
#define BLIS_OAPI_EXPERT
// Define the macro to add a suffix to the function names (in function
// definitions).
#undef EX_SUF
#define EX_SUF BLIS_OAPI_EX_SUF
@@ -50,7 +55,3 @@
#undef BLIS_OAPI_EX_DECLS
#define BLIS_OAPI_EX_DECLS
// Define the macro to pass the local expert variables to another function.
//#undef BLIS_TAPI_EX_VARS
//#define BLIS_TAPI_EX_VARS ,cntx, rntm

View File

@@ -261,6 +261,12 @@ BLIS_INLINE trans_t bli_trans_toggled_conj( trans_t trans )
( trans ^ BLIS_CONJ_BIT );
}
BLIS_INLINE trans_t bli_apply_trans( trans_t transapp, trans_t trans )
{
return ( trans_t )
( trans ^ transapp );
}
BLIS_INLINE void bli_toggle_trans( trans_t* trans )
{
*trans = bli_trans_toggled( *trans );
@@ -421,6 +427,21 @@ BLIS_INLINE bool bli_is_unit_diag( diag_t diag )
}
// err_t-related
BLIS_INLINE bool bli_is_success( err_t err )
{
return ( bool )
( err == BLIS_SUCCESS );
}
BLIS_INLINE bool bli_is_failure( err_t err )
{
return ( bool )
( err != BLIS_SUCCESS );
}
// dimension-related
BLIS_INLINE bool bli_zero_dim1( dim_t m )

View File

@@ -35,7 +35,12 @@
// This file defines macros used to allow the _tapi.c files to produce
// typed APIs that omit expert parameters.
// Define the macro to remove the function name suffix (in function
// Define a macro that allows the source code to determine which interface
// (basic or expert) we are compiling.
#undef BLIS_TAPI_BASIC
#define BLIS_TAPI_BASIC
// Define the macro to omit a suffix from the function names (in function
// definitions).
#undef EX_SUF
#define EX_SUF
@@ -45,14 +50,10 @@
#undef BLIS_TAPI_EX_PARAMS
#define BLIS_TAPI_EX_PARAMS
// Define the macro to declare local expert variables that are initialized
// Define the macro to add local expert variables that are initialized
// to NULL. The "( void )" statements are to prevent unused variable
// warnings by the compiler.
#undef BLIS_TAPI_EX_DECLS
#define BLIS_TAPI_EX_DECLS cntx_t* cntx = NULL; ( void )cntx; \
rntm_t* rntm = NULL; ( void )rntm;
// Define the macro to pass the local expert variables to another function.
//#undef BLIS_TAPI_EX_VARS
//#define BLIS_TAPI_EX_VARS

View File

@@ -35,8 +35,13 @@
// This file defines macros used to allow the _tapi.c files to produce
// typed APIs that contain context parameters.
// Define the macro to add a suffix to the typed API function names
// (in function definitions).
// Define a macro that allows the source code to determine which interface
// (basic or expert) we are compiling.
#undef BLIS_TAPI_EXPERT
#define BLIS_TAPI_EXPERT
// Define the macro to add a suffix to the function names (in function
// definitions).
#undef EX_SUF
#define EX_SUF BLIS_TAPI_EX_SUF
@@ -50,7 +55,3 @@
#undef BLIS_TAPI_EX_DECLS
#define BLIS_TAPI_EX_DECLS
// Define the macro to pass the local expert variables to another function.
//#undef BLIS_TAPI_EX_VARS
//#define BLIS_TAPI_EX_VARS ,cntx, rntm

View File

@@ -198,16 +198,19 @@ typedef double f77_double;
typedef scomplex f77_scomplex;
typedef dcomplex f77_dcomplex;
// -- Void function pointer types --
// -- Misc. function pointer types --
// Note: This type should be used in any situation where the address of a
// *function* will be conveyed or stored prior to it being typecast back
// to the correct function type. It does not need to be used when conveying
// or storing the address of *data* (such as an array of float or double).
//typedef void (*void_fp)( void );
typedef void* void_fp;
// Typedef function pointer types for malloc() and free() substitutes.
typedef void* (*malloc_ft)( size_t size );
typedef void (*free_ft) ( void* p );
//
// -- BLIS info bit field offsets ----------------------------------------------
@@ -1038,10 +1041,9 @@ typedef enum
// -- BLIS misc. structure types -----------------------------------------------
//
// These headers must be included here (or earlier) because definitions they
// provide are needed in the pool_t and related structs.
// This header must be included here (or earlier) because definitions it
// provides are needed in the pool_t and related structs.
#include "bli_pthread.h"
#include "bli_malloc.h"
// -- Pool block type --
@@ -1099,7 +1101,7 @@ typedef struct
// -- packing block allocator: Locked set of pools type --
typedef struct membrk_s
typedef struct pba_s
{
pool_t pools[3];
bli_pthread_mutex_t mutex;
@@ -1109,7 +1111,7 @@ typedef struct membrk_s
malloc_ft malloc_fp;
free_ft free_fp;
} membrk_t;
} pba_t;
// -- Memory object type --
@@ -1479,7 +1481,7 @@ typedef struct rntm_s
pool_t* sba_pool;
// The packing block allocator, which is attached in the l3 thread decorator.
membrk_t* membrk;
pba_t* pba;
} rntm_t;

View File

@@ -0,0 +1,57 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// This file un-defines macros used to allow the _oapi.c and _tapi.c files to
// produce object and typed APIs that omit or contain expert parameters.
// Un-define all macros that allow the source code to determine which interface
// (basic or expert) we are compiling.
#undef BLIS_OAPI_BASIC
#undef BLIS_OAPI_EXPERT
#undef BLIS_TAPI_BASIC
#undef BLIS_TAPI_EXPERT
// Un-define the macro to omit or add the function name suffix (in function
// definitions).
#undef EX_SUF
// Un-define the macro to omit or add expert arguments from function signatures
// and prototypes.
#undef BLIS_OAPI_EX_PARAMS
#undef BLIS_TAPI_EX_PARAMS
// Un-define the macro to omit or add local expert variables.
#undef BLIS_OAPI_EX_DECLS
#undef BLIS_TAPI_EX_DECLS

View File

@@ -99,6 +99,7 @@ extern "C" {
// -- Base operation prototypes --
#include "bli_init.h"
#include "bli_malloc.h"
#include "bli_const.h"
#include "bli_obj.h"
#include "bli_obj_scalar.h"
@@ -109,7 +110,7 @@ extern "C" {
#include "bli_rntm.h"
#include "bli_gks.h"
#include "bli_ind.h"
#include "bli_membrk.h"
#include "bli_pba.h"
#include "bli_pool.h"
#include "bli_array.h"
#include "bli_apool.h"
@@ -135,7 +136,8 @@ extern "C" {
#include "bli_arch.h"
#include "bli_cpuid.h"
#include "bli_string.h"
#include "bli_setgetij.h"
#include "bli_setgetijm.h"
#include "bli_setgetijv.h"
#include "bli_setri.h"
#include "bli_castm.h"

View File

@@ -73,7 +73,8 @@ void bli_l3_thread_decorator
const dim_t n_threads = bli_rntm_num_threads( rntm );
#ifdef PRINT_THRINFO
thrinfo_t** threads = bli_malloc_intl( n_threads * sizeof( thrinfo_t* ) );
err_t r_val;
thrinfo_t** threads = bli_malloc_intl( n_threads * sizeof( thrinfo_t* ), &r_val );
#endif
// NOTE: The sba was initialized in bli_init().
@@ -92,7 +93,7 @@ void bli_l3_thread_decorator
// Set the packing block allocator field of the rntm. This will be
// inherited by all of the child threads when they make local copies of
// the rntm below.
bli_membrk_rntm_set_membrk( rntm );
bli_pba_rntm_set_pba( rntm );
// Allocate a global communicator for the root thrinfo_t structures.
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );

View File

@@ -146,6 +146,8 @@ void bli_l3_thread_decorator
cntl_t* cntl
)
{
err_t r_val;
// This is part of a hack to support mixed domain in bli_gemm_front().
// Sometimes we need to specify a non-standard schema for A and B, and
// we decided to transmit them via the schema field in the obj_t's
@@ -176,7 +178,7 @@ void bli_l3_thread_decorator
// Set the packing block allocator field of the rntm. This will be
// inherited by all of the child threads when they make local copies of
// the rntm below.
bli_membrk_rntm_set_membrk( rntm );
bli_pba_rntm_set_pba( rntm );
// Allocate a global communicator for the root thrinfo_t structures.
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );
@@ -187,12 +189,12 @@ void bli_l3_thread_decorator
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_l3_thread_decorator().pth: " );
#endif
bli_pthread_t* pthreads = bli_malloc_intl( sizeof( bli_pthread_t ) * n_threads );
bli_pthread_t* pthreads = bli_malloc_intl( sizeof( bli_pthread_t ) * n_threads, &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_l3_thread_decorator().pth: " );
#endif
thread_data_t* datas = bli_malloc_intl( sizeof( thread_data_t ) * n_threads );
thread_data_t* datas = bli_malloc_intl( sizeof( thread_data_t ) * n_threads, &r_val );
// NOTE: We must iterate backwards so that the chief thread (thread id 0)
// can spawn all other threads before proceeding with its own computation.

View File

@@ -78,7 +78,7 @@ void bli_l3_thread_decorator
bli_sba_rntm_set_pool( 0, array, rntm );
// Set the packing block allocator field of the rntm.
bli_membrk_rntm_set_membrk( rntm );
bli_pba_rntm_set_pba( rntm );
// Allcoate a global communicator for the root thrinfo_t structures.
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );

View File

@@ -76,7 +76,7 @@ err_t bli_l3_sup_thread_decorator
// Set the packing block allocator field of the rntm. This will be
// inherited by all of the child threads when they make local copies of
// the rntm below.
bli_membrk_rntm_set_membrk( rntm );
bli_pba_rntm_set_pba( rntm );
// Allcoate a global communicator for the root thrinfo_t structures.
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );

View File

@@ -122,6 +122,8 @@ err_t bli_l3_sup_thread_decorator
rntm_t* rntm
)
{
err_t r_val;
// Query the total number of threads from the context.
const dim_t n_threads = bli_rntm_num_threads( rntm );
@@ -141,7 +143,7 @@ err_t bli_l3_sup_thread_decorator
// Set the packing block allocator field of the rntm. This will be
// inherited by all of the child threads when they make local copies of
// the rntm below.
bli_membrk_rntm_set_membrk( rntm );
bli_pba_rntm_set_pba( rntm );
// Allocate a global communicator for the root thrinfo_t structures.
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );
@@ -152,12 +154,12 @@ err_t bli_l3_sup_thread_decorator
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_l3_thread_decorator().pth: " );
#endif
bli_pthread_t* pthreads = bli_malloc_intl( sizeof( bli_pthread_t ) * n_threads );
bli_pthread_t* pthreads = bli_malloc_intl( sizeof( bli_pthread_t ) * n_threads, &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_l3_thread_decorator().pth: " );
#endif
thread_data_t* datas = bli_malloc_intl( sizeof( thread_data_t ) * n_threads );
thread_data_t* datas = bli_malloc_intl( sizeof( thread_data_t ) * n_threads, &r_val );
// NOTE: We must iterate backwards so that the chief thread (thread id 0)
// can spawn all other threads before proceeding with its own computation.

View File

@@ -69,7 +69,7 @@ err_t bli_l3_sup_thread_decorator
bli_sba_rntm_set_pool( 0, array, rntm );
// Set the packing block allocator field of the rntm.
bli_membrk_rntm_set_membrk( rntm );
bli_pba_rntm_set_pba( rntm );
#ifndef SKIP_THRINFO_TREE
// Allcoate a global communicator for the root thrinfo_t structures.

View File

@@ -111,17 +111,21 @@ void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm )
void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm )
{
err_t r_val;
if ( comm == NULL ) return;
comm->sent_object = NULL;
comm->n_threads = n_threads;
comm->barriers = bli_malloc_intl( sizeof( barrier_t* ) * n_threads );
comm->barriers = bli_malloc_intl( sizeof( barrier_t* ) * n_threads, &r_val );
bli_thrcomm_tree_barrier_create( n_threads, BLIS_TREE_BARRIER_ARITY, comm->barriers, 0 );
}
//Tree barrier used for Intel Xeon Phi
barrier_t* bli_thrcomm_tree_barrier_create( int num_threads, int arity, barrier_t** leaves, int leaf_index )
{
barrier_t* me = bli_malloc_intl( sizeof(barrier_t) );
err_t r_val;
barrier_t* me = bli_malloc_intl( sizeof( barrier_t ), &r_val );
me->dad = NULL;
me->signal = 0;

View File

@@ -93,18 +93,10 @@ void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm )
comm->n_threads = n_threads;
comm->barrier_sense = 0;
comm->barrier_threads_arrived = 0;
//#ifdef BLIS_USE_PTHREAD_MUTEX
// bli_pthread_mutex_init( &comm->mutex, NULL );
//#endif
}
void bli_thrcomm_cleanup( thrcomm_t* comm )
{
//#ifdef BLIS_USE_PTHREAD_MUTEX
// if ( comm == NULL ) return;
// bli_pthread_mutex_destroy( &comm->mutex );
//#endif
}
void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm )
@@ -114,13 +106,7 @@ void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm )
bool my_sense = comm->sense;
dim_t my_threads_arrived;
#ifdef BLIS_USE_PTHREAD_MUTEX
bli_pthread_mutex_lock( &comm->mutex );
my_threads_arrived = ++(comm->threads_arrived);
bli_pthread_mutex_unlock( &comm->mutex );
#else
my_threads_arrived = __sync_add_and_fetch(&(comm->threads_arrived), 1);
#endif
if ( my_threads_arrived == comm->n_threads )
{

View File

@@ -52,10 +52,6 @@ struct thrcomm_s
void* sent_object;
dim_t n_threads;
//#ifdef BLIS_USE_PTHREAD_MUTEX
// bli_pthread_mutex_t mutex;
//#endif
// NOTE: barrier_sense was originally a gint_t-based bool_t, but upon
// redefining bool_t as bool we discovered that some gcc __atomic built-ins
// don't allow the use of bool for the variables being operated upon.

View File

@@ -332,8 +332,10 @@ thrinfo_t* bli_thrinfo_create_for_cntl
// pointers.
if ( bli_thread_am_ochief( thread_par ) )
{
err_t r_val;
if ( parent_n_way > BLIS_NUM_STATIC_COMMS )
new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ) );
new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ), &r_val );
else
new_comms = static_comms;
}

View File

@@ -197,8 +197,10 @@ thrinfo_t* bli_thrinfo_sup_create_for_cntl
// pointers.
if ( bli_thread_am_ochief( thread_par ) )
{
err_t r_val;
if ( parent_n_way > BLIS_NUM_STATIC_COMMS )
new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ) );
new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ), &r_val );
else
new_comms = static_comms;
}

View File

@@ -37,18 +37,22 @@
// Prototype object APIs (expert and non-expert).
#include "bli_oapi_ex.h"
#include "bli_util_oapi.h"
#include "bli_xapi_undef.h"
#include "bli_oapi_ba.h"
#include "bli_util_oapi.h"
#include "bli_xapi_undef.h"
// Prototype typed APIs (expert and non-expert).
#include "bli_tapi_ex.h"
#include "bli_util_tapi.h"
#include "bli_util_ft.h"
#include "bli_xapi_undef.h"
#include "bli_tapi_ba.h"
#include "bli_util_tapi.h"
#include "bli_util_ft.h"
#include "bli_xapi_undef.h"
// Generate function pointer arrays for tapi functions (expert only).
#include "bli_util_fpa.h"

View File

@@ -103,25 +103,6 @@ GENFRONT( normfm )
GENFRONT( normim )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
FILE* file, \
char* s1, \
obj_t* x, \
char* format, \
char* s2 \
) \
{ \
bli_utilm_fprint_check( file, s1, x, format, s2 ); \
}
GENFRONT( fprintv )
GENFRONT( fprintm )
#undef GENFRONT
#define GENFRONT( opname ) \
\
@@ -154,6 +135,73 @@ void PASTEMAC(opname,_check) \
GENFRONT( sumsqv )
// -----------------------------------------------------------------------------
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
obj_t* chi, \
obj_t* psi, \
bool* is_eq \
) \
{ \
bli_l0_xxbsc_check( chi, psi, is_eq ); \
}
GENFRONT( eqsc )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
obj_t* x, \
obj_t* y, \
bool* is_eq \
) \
{ \
bli_l1v_xy_check( x, y ); \
}
GENFRONT( eqv )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
obj_t* x, \
obj_t* y, \
bool* is_eq \
) \
{ \
bli_l1m_xy_check( x, y ); \
}
GENFRONT( eqm )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
FILE* file, \
char* s1, \
obj_t* x, \
char* format, \
char* s2 \
) \
{ \
bli_utilm_fprint_check( file, s1, x, format, s2 ); \
}
GENFRONT( fprintv )
GENFRONT( fprintm )
// -----------------------------------------------------------------------------

View File

@@ -90,22 +90,6 @@ GENPROT( normfm )
GENPROT( normim )
#undef GENPROT
#define GENPROT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
FILE* file, \
char* s1, \
obj_t* x, \
char* format, \
char* s2 \
);
GENPROT( fprintv )
GENPROT( fprintm )
#undef GENPROT
#define GENPROT( opname ) \
\
@@ -132,6 +116,49 @@ void PASTEMAC(opname,_check) \
GENPROT( sumsqv )
// -----------------------------------------------------------------------------
#undef GENTPROT
#define GENTPROT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
obj_t* chi, \
obj_t* psi, \
bool* is_eq \
);
GENTPROT( eqsc )
#undef GENPROT
#define GENPROT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
obj_t* x, \
obj_t* y, \
bool* is_eq \
);
GENPROT( eqv )
GENPROT( eqm )
#undef GENPROT
#define GENPROT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
FILE* file, \
char* s1, \
obj_t* x, \
char* format, \
char* s2 \
);
GENPROT( fprintv )
GENPROT( fprintm )
// -----------------------------------------------------------------------------

View File

@@ -66,6 +66,9 @@ GENFRONT( randm )
GENFRONT( randnm )
GENFRONT( sumsqv )
// -----------------------------------------------------------------------------
// Operations with only basic interfaces.
#undef GENFRONT
#define GENFRONT( opname ) \
@@ -83,6 +86,9 @@ PASTEMAC(opname,_qfp)( num_t dt ) \
return PASTECH(opname,_fpa)[ dt ]; \
}
GENFRONT( eqsc )
GENFRONT( eqv )
GENFRONT( eqm )
GENFRONT( fprintv )
GENFRONT( fprintm )
//GENFRONT( printv )

View File

@@ -52,16 +52,13 @@ GENPROT( normiv )
GENPROT( norm1m )
GENPROT( normfm )
GENPROT( normim )
GENPROT( fprintv )
GENPROT( fprintm )
//GENPROT( printv )
//GENPROT( printm )
GENPROT( randv )
GENPROT( randnv )
GENPROT( randm )
GENPROT( randnm )
GENPROT( sumsqv )
// -----------------------------------------------------------------------------
#undef GENPROT
#define GENPROT( opname ) \
@@ -69,6 +66,9 @@ GENPROT( sumsqv )
PASTECH(opname,_vft) \
PASTEMAC(opname,_qfp)( num_t dt );
GENPROT( eqsc )
GENPROT( eqv )
GENPROT( eqm )
GENPROT( fprintv )
GENPROT( fprintm )
//GENPROT( printv )

View File

@@ -191,3 +191,62 @@ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \
INSERT_GENTDEFR( sumsqv )
// -----------------------------------------------------------------------------
// Operations with only basic interfaces.
#ifdef BLIS_TAPI_BASIC
// eqsc
#undef GENTDEF
#define GENTDEF( ctype, ch, opname, tsuf ) \
\
typedef void (*PASTECH2(ch,opname,tsuf)) \
( \
conj_t conjchi, \
ctype* chi, \
ctype* psi, \
bool* is_eq \
);
INSERT_GENTDEF( eqsc )
// eqv
#undef GENTDEF
#define GENTDEF( ctype, ch, opname, tsuf ) \
\
typedef void (*PASTECH2(ch,opname,tsuf)) \
( \
conj_t conjx, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
bool* is_eq \
);
INSERT_GENTDEF( eqv )
// eqm
#undef GENTDEF
#define GENTDEF( ctype, ch, opname, tsuf ) \
\
typedef void (*PASTECH2(ch,opname,tsuf)) \
( \
doff_t diagoffx, \
diag_t diagx, \
uplo_t uplox, \
trans_t transx, \
dim_t m, \
dim_t n, \
ctype* x, inc_t rs_x, inc_t cs_x, \
ctype* y, inc_t rs_y, inc_t cs_y, \
bool* is_eq \
);
INSERT_GENTDEF( eqm )
#endif // #ifdef BLIS_OAPI_BASIC

View File

@@ -72,11 +72,11 @@ void PASTEMAC(opname,EX_SUF) \
\
f \
( \
n, \
buf_x, incx, \
buf_asum, \
cntx, \
rntm \
n, \
buf_x, incx, \
buf_asum, \
cntx, \
rntm \
); \
}
@@ -114,11 +114,11 @@ void PASTEMAC(opname,EX_SUF) \
\
f \
( \
uploa, \
m, \
buf_a, rs_a, cs_a, \
cntx, \
rntm \
uploa, \
m, \
buf_a, rs_a, cs_a, \
cntx, \
rntm \
); \
}
@@ -158,11 +158,11 @@ void PASTEMAC(opname,EX_SUF) \
\
f \
( \
n, \
buf_x, incx, \
buf_norm, \
cntx, \
rntm \
n, \
buf_x, incx, \
buf_norm, \
cntx, \
rntm \
); \
}
@@ -207,15 +207,15 @@ void PASTEMAC(opname,EX_SUF) \
\
f \
( \
diagoffx, \
diagx, \
uplox, \
m, \
n, \
buf_x, rs_x, cs_x, \
buf_norm, \
cntx, \
rntm \
diagoffx, \
diagx, \
uplox, \
m, \
n, \
buf_x, rs_x, cs_x, \
buf_norm, \
cntx, \
rntm \
); \
}
@@ -224,160 +224,6 @@ GENFRONT( normfm )
GENFRONT( normim )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC(opname,EX_SUF) \
( \
FILE* file, \
char* s1, \
obj_t* x, \
char* format, \
char* s2 \
BLIS_OAPI_EX_PARAMS \
) \
{ \
bli_init_once(); \
\
BLIS_OAPI_EX_DECLS \
\
num_t dt = bli_obj_dt( x ); \
\
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( file, s1, x, format, s2 ); \
\
/* Handle constants up front. */ \
if ( dt == BLIS_CONSTANT ) \
{ \
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \
} \
\
/* Query a type-specific function pointer, except one that uses
void* for function arguments instead of typed pointers. */ \
PASTECH(opname,_vft) f = \
PASTEMAC(opname,_qfp)( dt ); \
\
f \
( \
file, \
s1, \
n, \
buf_x, incx, \
format, \
s2 \
); \
}
GENFRONT( fprintv )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC(opname,EX_SUF) \
( \
FILE* file, \
char* s1, \
obj_t* x, \
char* format, \
char* s2 \
BLIS_OAPI_EX_PARAMS \
) \
{ \
bli_init_once(); \
\
BLIS_OAPI_EX_DECLS \
\
num_t dt = bli_obj_dt( x ); \
\
dim_t m = bli_obj_length( x ); \
dim_t n = bli_obj_width( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( x ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( file, s1, x, format, s2 ); \
\
/* Handle constants up front. */ \
if ( dt == BLIS_CONSTANT ) \
{ \
float* sp = bli_obj_buffer_for_const( BLIS_FLOAT, x ); \
double* dp = bli_obj_buffer_for_const( BLIS_DOUBLE, x ); \
scomplex* cp = bli_obj_buffer_for_const( BLIS_SCOMPLEX, x ); \
dcomplex* zp = bli_obj_buffer_for_const( BLIS_DCOMPLEX, x ); \
gint_t* ip = bli_obj_buffer_for_const( BLIS_INT, x ); \
\
fprintf( file, "%s\n", s1 ); \
fprintf( file, " float: %9.2e\n", bli_sreal( *sp ) ); \
fprintf( file, " double: %9.2e\n", bli_dreal( *dp ) ); \
fprintf( file, " scomplex: %9.2e + %9.2e\n", bli_creal( *cp ), \
bli_cimag( *cp ) ); \
fprintf( file, " dcomplex: %9.2e + %9.2e\n", bli_zreal( *zp ), \
bli_zimag( *zp ) ); \
fprintf( file, " int: %ld\n", ( long )(*ip) ); \
fprintf( file, "\n" ); \
return; \
} \
\
/* Query a type-specific function pointer, except one that uses
void* for function arguments instead of typed pointers. */ \
PASTECH(opname,_vft) f = \
PASTEMAC(opname,_qfp)( dt ); \
\
f \
( \
file, \
s1, \
m, \
n, \
buf_x, rs_x, cs_x, \
format, \
s2 \
); \
}
GENFRONT( fprintm )
#undef GENFRONT
#define GENFRONT( opname, varname ) \
\
void PASTEMAC(opname,EX_SUF) \
( \
char* s1, \
obj_t* x, \
char* format, \
char* s2 \
BLIS_OAPI_EX_PARAMS \
) \
{ \
bli_init_once(); \
\
BLIS_OAPI_EX_DECLS \
\
/* Suppress compiler warning about unused variables. */ \
( void )cntx; \
\
/* Invoke the typed function. */ \
PASTEMAC0(varname) \
( \
stdout, \
s1, \
x, \
format, \
s2 \
); \
}
GENFRONT( printv, fprintv )
GENFRONT( printm, fprintm )
#undef GENFRONT
#define GENFRONT( opname ) \
\
@@ -407,10 +253,10 @@ void PASTEMAC(opname,EX_SUF) \
\
f \
( \
n, \
buf_x, incx, \
cntx, \
rntm \
n, \
buf_x, incx, \
cntx, \
rntm \
); \
}
@@ -451,13 +297,13 @@ void PASTEMAC(opname,EX_SUF) \
\
f \
( \
diagoffx, \
uplox, \
m, \
n, \
buf_x, rs_x, cs_x, \
cntx, \
rntm \
diagoffx, \
uplox, \
m, \
n, \
buf_x, rs_x, cs_x, \
cntx, \
rntm \
); \
}
@@ -498,17 +344,330 @@ void PASTEMAC(opname,EX_SUF) \
\
f \
( \
n, \
buf_x, incx, \
buf_scale, \
buf_sumsq, \
cntx, \
rntm \
n, \
buf_x, incx, \
buf_scale, \
buf_sumsq, \
cntx, \
rntm \
); \
}
GENFRONT( sumsqv )
// -----------------------------------------------------------------------------
// Operations with only basic interfaces.
#ifdef BLIS_OAPI_BASIC
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC0(opname) \
( \
obj_t* chi, \
obj_t* psi, \
bool* is_eq \
) \
{ \
bli_init_once(); \
\
num_t dt_chi = bli_obj_dt( chi ); \
num_t dt_psi = bli_obj_dt( psi ); \
num_t dt; \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi, psi, is_eq ); \
\
/* Decide which datatype will be used to query the buffer from the
constant object (if there is one). */ \
if ( bli_is_constant( dt_psi ) ) dt = dt_chi; \
else dt = dt_psi; \
\
/* If chi and psi are both constants, then we compare only the dcomplex
fields. */ \
if ( bli_is_constant( dt ) ) dt = BLIS_DCOMPLEX; \
\
void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \
void* buf_psi = bli_obj_buffer_for_1x1( dt, psi ); \
\
/* Integer objects are handled separately. */ \
if ( bli_is_int( dt ) ) \
{ \
*is_eq = bli_ieqa( buf_chi, buf_psi ); \
return; \
} \
\
/* Query the conj status of each object and use the two to come up with a
single "net" conj_t value. */ \
conj_t conjchi = bli_obj_conj_status( chi ); \
conj_t conjpsi = bli_obj_conj_status( psi ); \
conj_t conj = bli_apply_conj( conjchi, conjpsi ); \
\
/* Query a type-specific function pointer, except one that uses
void* for function arguments instead of typed pointers. */ \
PASTECH(opname,_vft) f = \
PASTEMAC(opname,_qfp)( dt ); \
\
f \
( \
conj, \
buf_chi, \
buf_psi, \
is_eq \
); \
}
GENFRONT( eqsc )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC0(opname) \
( \
obj_t* x, \
obj_t* y, \
bool* is_eq \
) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_dt( x ); \
\
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( y ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, y, is_eq ); \
\
/* Query the conj status of each object and use the two to come up with a
single "net" conj_t value. */ \
conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( y ); \
conj_t conj = bli_apply_conj( conjx, conjy ); \
\
/* Query a type-specific function pointer, except one that uses
void* for function arguments instead of typed pointers. */ \
PASTECH(opname,_vft) f = \
PASTEMAC(opname,_qfp)( dt ); \
\
f \
( \
conj, \
n, \
buf_x, inc_x, \
buf_y, inc_y, \
is_eq \
); \
}
GENFRONT( eqv )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC0(opname) \
( \
obj_t* x, \
obj_t* y, \
bool* is_eq \
) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_dt( x ); \
\
doff_t diagoffx = bli_obj_diag_offset( x ); \
diag_t diagx = bli_obj_diag( x ); \
uplo_t uplox = bli_obj_uplo( x ); \
dim_t m = bli_obj_length( y ); \
dim_t n = bli_obj_width( y ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t rs_y = bli_obj_row_stride( y ); \
inc_t cs_y = bli_obj_col_stride( y ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, y, is_eq ); \
\
/* Query the combined trans and conj status of each object and use the two
to come up with a single "net" trans_t value. */ \
trans_t transx = bli_obj_conjtrans_status( x ); \
trans_t transy = bli_obj_conjtrans_status( y ); \
trans_t trans = bli_apply_trans( transy, transx ); \
\
/* Query a type-specific function pointer, except one that uses
void* for function arguments instead of typed pointers. */ \
PASTECH(opname,_vft) f = \
PASTEMAC(opname,_qfp)( dt ); \
\
f \
( \
diagoffx, \
diagx, \
uplox, \
trans, \
m, \
n, \
buf_x, rs_x, cs_x, \
buf_y, rs_y, cs_y, \
is_eq \
); \
}
GENFRONT( eqm )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC0(opname) \
( \
FILE* file, \
char* s1, \
obj_t* x, \
char* format, \
char* s2 \
) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_dt( x ); \
\
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( file, s1, x, format, s2 ); \
\
/* Handle constants up front. */ \
if ( dt == BLIS_CONSTANT ) \
{ \
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \
} \
\
/* Query a type-specific function pointer, except one that uses
void* for function arguments instead of typed pointers. */ \
PASTECH(opname,_vft) f = \
PASTEMAC(opname,_qfp)( dt ); \
\
f \
( \
file, \
s1, \
n, \
buf_x, incx, \
format, \
s2 \
); \
}
GENFRONT( fprintv )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC0(opname) \
( \
FILE* file, \
char* s1, \
obj_t* x, \
char* format, \
char* s2 \
) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_dt( x ); \
\
dim_t m = bli_obj_length( x ); \
dim_t n = bli_obj_width( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( x ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( file, s1, x, format, s2 ); \
\
/* Handle constants up front. */ \
if ( dt == BLIS_CONSTANT ) \
{ \
float* sp = bli_obj_buffer_for_const( BLIS_FLOAT, x ); \
double* dp = bli_obj_buffer_for_const( BLIS_DOUBLE, x ); \
scomplex* cp = bli_obj_buffer_for_const( BLIS_SCOMPLEX, x ); \
dcomplex* zp = bli_obj_buffer_for_const( BLIS_DCOMPLEX, x ); \
gint_t* ip = bli_obj_buffer_for_const( BLIS_INT, x ); \
\
fprintf( file, "%s\n", s1 ); \
fprintf( file, " float: %9.2e\n", bli_sreal( *sp ) ); \
fprintf( file, " double: %9.2e\n", bli_dreal( *dp ) ); \
fprintf( file, " scomplex: %9.2e + %9.2e\n", bli_creal( *cp ), \
bli_cimag( *cp ) ); \
fprintf( file, " dcomplex: %9.2e + %9.2e\n", bli_zreal( *zp ), \
bli_zimag( *zp ) ); \
fprintf( file, " int: %ld\n", ( long )(*ip) ); \
fprintf( file, "\n" ); \
return; \
} \
\
/* Query a type-specific function pointer, except one that uses
void* for function arguments instead of typed pointers. */ \
PASTECH(opname,_vft) f = \
PASTEMAC(opname,_qfp)( dt ); \
\
f \
( \
file, \
s1, \
m, \
n, \
buf_x, rs_x, cs_x, \
format, \
s2 \
); \
}
GENFRONT( fprintm )
#undef GENFRONT
#define GENFRONT( opname, varname ) \
\
void PASTEMAC0(opname) \
( \
char* s1, \
obj_t* x, \
char* format, \
char* s2 \
) \
{ \
bli_init_once(); \
\
/* Invoke the typed function. */ \
PASTEMAC0(varname) \
( \
stdout, \
s1, \
x, \
format, \
s2 \
); \
}
GENFRONT( printv, fprintv )
GENFRONT( printm, fprintm )
#endif // #ifdef BLIS_OAPI_BASIC
#endif

View File

@@ -94,39 +94,6 @@ GENPROT( normfm )
GENPROT( normim )
#undef GENPROT
#define GENPROT( opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \
( \
FILE* file, \
char* s1, \
obj_t* x, \
char* format, \
char* s2 \
BLIS_OAPI_EX_PARAMS \
);
GENPROT( fprintv )
GENPROT( fprintm )
#undef GENPROT
#define GENPROT( opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \
( \
char* s1, \
obj_t* x, \
char* format, \
char* s2 \
BLIS_OAPI_EX_PARAMS \
);
GENPROT( printv )
GENPROT( printm )
#undef GENPROT
#define GENPROT( opname ) \
\
@@ -166,3 +133,84 @@ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \
GENPROT( sumsqv )
// -----------------------------------------------------------------------------
// Operations with basic interfaces only.
#ifdef BLIS_OAPI_BASIC
/*
#undef GENPROT
#define GENPROT( opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC0(opname) \
( \
obj_t* chi, \
obj_t* psi, \
bool* is_eq \
);
GENPROT( eqsc )
#undef GENPROT
#define GENPROT( opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC0(opname) \
( \
obj_t* x, \
obj_t* y, \
bool* is_eq \
);
GENPROT( eqv )
*/
#undef GENPROT
#define GENPROT( opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC0(opname) \
( \
obj_t* x, \
obj_t* y, \
bool* is_eq \
);
GENPROT( eqsc )
GENPROT( eqv )
GENPROT( eqm )
#undef GENPROT
#define GENPROT( opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC0(opname) \
( \
FILE* file, \
char* s1, \
obj_t* x, \
char* format, \
char* s2 \
);
GENPROT( fprintv )
GENPROT( fprintm )
#undef GENPROT
#define GENPROT( opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC0(opname) \
( \
char* s1, \
obj_t* x, \
char* format, \
char* s2 \
);
GENPROT( printv )
GENPROT( printm )
#endif // #ifdef BLIS_OAPI_BASIC

View File

@@ -213,64 +213,6 @@ INSERT_GENTFUNCR_BASIC0( normfm )
INSERT_GENTFUNCR_BASIC0( normim )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname, varname ) \
\
void PASTEMAC2(ch,opname,EX_SUF) \
( \
char* s1, \
dim_t n, \
void* x, inc_t incx, \
char* format, \
char* s2 \
) \
{ \
bli_init_once(); \
\
PASTEMAC(ch,varname) \
( \
stdout, \
s1, \
n, \
x, incx, \
format, \
s2 \
); \
}
INSERT_GENTFUNC_BASIC_I( printv, fprintv )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname, varname ) \
\
void PASTEMAC2(ch,opname,EX_SUF) \
( \
char* s1, \
dim_t m, \
dim_t n, \
void* x, inc_t rs_x, inc_t cs_x, \
char* format, \
char* s2 \
) \
{ \
bli_init_once(); \
\
PASTEMAC(ch,varname) \
( \
stdout, \
s1, \
m, \
n, \
x, rs_x, cs_x, \
format, \
s2 \
); \
}
INSERT_GENTFUNC_BASIC_I( printm, fprintm )
#undef GENTFUNCR
#define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \
\
@@ -430,6 +372,168 @@ void PASTEMAC2(ch,opname,EX_SUF) \
INSERT_GENTFUNCR_BASIC0( sumsqv )
// -----------------------------------------------------------------------------
// Operations with only basic interfaces.
#ifdef BLIS_TAPI_BASIC
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname ) \
\
void PASTEMAC(ch,opname) \
( \
conj_t conjchi, \
ctype* chi, \
ctype* psi, \
bool* is_eq \
) \
{ \
bli_init_once(); \
\
ctype chi_conj; \
\
PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \
\
*is_eq = PASTEMAC(ch,eq)( chi_conj, *psi ); \
}
INSERT_GENTFUNC_BASIC0( eqsc )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname ) \
\
void PASTEMAC(ch,opname) \
( \
conj_t conjx, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
bool* is_eq \
) \
{ \
bli_init_once(); \
\
/* If x is zero length, return with a result of TRUE. */ \
if ( bli_zero_dim1( n ) ) { *is_eq = TRUE; return; } \
\
/* Obtain a valid context from the gks if necessary. */ \
/*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \
\
*is_eq = PASTEMAC2(ch,opname,_unb_var1) \
( \
conjx, \
n, \
x, incx, \
y, incy \
); \
}
INSERT_GENTFUNC_BASIC0( eqv )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname ) \
\
void PASTEMAC(ch,opname) \
( \
doff_t diagoffx, \
diag_t diagx, \
uplo_t uplox, \
trans_t transx, \
dim_t m, \
dim_t n, \
ctype* x, inc_t rs_x, inc_t cs_x, \
ctype* y, inc_t rs_y, inc_t cs_y, \
bool* is_eq \
) \
{ \
bli_init_once(); \
\
/* If x has a zero dimension, return with a result of TRUE. See the
_unb_var() variant for why we return TRUE in this scenario. */ \
if ( bli_zero_dim2( m, n ) ) { *is_eq = TRUE; return; } \
\
/* Obtain a valid context from the gks if necessary. */ \
/*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \
\
/* Invoke the helper variant. */ \
*is_eq = PASTEMAC2(ch,opname,_unb_var1) \
( \
diagoffx, \
diagx, \
uplox, \
transx, \
m, \
n, \
x, rs_x, cs_x, \
y, rs_y, cs_y \
); \
}
INSERT_GENTFUNC_BASIC0( eqm )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname, varname ) \
\
void PASTEMAC(ch,opname) \
( \
char* s1, \
dim_t n, \
void* x, inc_t incx, \
char* format, \
char* s2 \
) \
{ \
bli_init_once(); \
\
PASTEMAC(ch,varname) \
( \
stdout, \
s1, \
n, \
x, incx, \
format, \
s2 \
); \
}
INSERT_GENTFUNC_BASIC_I( printv, fprintv )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname, varname ) \
\
void PASTEMAC(ch,opname) \
( \
char* s1, \
dim_t m, \
dim_t n, \
void* x, inc_t rs_x, inc_t cs_x, \
char* format, \
char* s2 \
) \
{ \
bli_init_once(); \
\
PASTEMAC(ch,varname) \
( \
stdout, \
s1, \
m, \
n, \
x, rs_x, cs_x, \
format, \
s2 \
); \
}
INSERT_GENTFUNC_BASIC_I( printm, fprintm )
#endif // #ifdef BLIS_TAPI_BASIC
#endif

View File

@@ -103,37 +103,6 @@ INSERT_GENTPROTR_BASIC0( normfm )
INSERT_GENTPROTR_BASIC0( normim )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \
( \
char* s1, \
dim_t n, \
void* x, inc_t incx, \
char* format, \
char* s2 \
);
INSERT_GENTPROT_BASIC0_I( printv )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \
( \
char* s1, \
dim_t m, \
dim_t n, \
void* x, inc_t rs_x, inc_t cs_x, \
char* format, \
char* s2 \
);
INSERT_GENTPROT_BASIC0_I( printm )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
@@ -179,4 +148,89 @@ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \
INSERT_GENTPROTR_BASIC0( sumsqv )
// -----------------------------------------------------------------------------
// Operations with basic interfaces only.
#ifdef BLIS_TAPI_BASIC
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
( \
conj_t conjchi, \
ctype* chi, \
ctype* psi, \
bool* is_eq \
);
INSERT_GENTPROT_BASIC0( eqsc )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
( \
conj_t conjx, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
bool* is_eq \
);
INSERT_GENTPROT_BASIC0( eqv )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
( \
doff_t diagoffx, \
diag_t diagx, \
uplo_t uplox, \
trans_t transx, \
dim_t m, \
dim_t n, \
ctype* x, inc_t rs_x, inc_t cs_x, \
ctype* y, inc_t rs_y, inc_t cs_y, \
bool* is_eq \
);
INSERT_GENTPROT_BASIC0( eqm )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
( \
char* s1, \
dim_t n, \
void* x, inc_t incx, \
char* format, \
char* s2 \
);
INSERT_GENTPROT_BASIC0_I( printv )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
( \
char* s1, \
dim_t m, \
dim_t n, \
void* x, inc_t rs_x, inc_t cs_x, \
char* format, \
char* s2 \
);
INSERT_GENTPROT_BASIC0_I( printm )
#endif // #ifdef BLIS_TAPI_BASIC

View File

@@ -862,85 +862,6 @@ void PASTEMAC(ch,varname) \
INSERT_GENTFUNCR_BASIC( normim_unb_var1, norm1m_unb_var1 )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname ) \
\
void PASTEMAC(ch,opname) \
( \
FILE* file, \
char* s1, \
dim_t n, \
ctype* x, inc_t incx, \
char* format, \
char* s2 \
) \
{ \
dim_t i; \
ctype* chi1; \
char default_spec[32] = PASTEMAC(ch,formatspec)(); \
\
if ( format == NULL ) format = default_spec; \
\
chi1 = x; \
\
fprintf( file, "%s\n", s1 ); \
\
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,fprints)( file, format, *chi1 ); \
fprintf( file, "\n" ); \
\
chi1 += incx; \
} \
\
fprintf( file, "%s\n", s2 ); \
}
INSERT_GENTFUNC_BASIC0_I( fprintv )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname ) \
\
void PASTEMAC(ch,opname) \
( \
FILE* file, \
char* s1, \
dim_t m, \
dim_t n, \
ctype* x, inc_t rs_x, inc_t cs_x, \
char* format, \
char* s2 \
) \
{ \
dim_t i, j; \
ctype* chi1; \
char default_spec[32] = PASTEMAC(ch,formatspec)(); \
\
if ( format == NULL ) format = default_spec; \
\
fprintf( file, "%s\n", s1 ); \
\
for ( i = 0; i < m; ++i ) \
{ \
for ( j = 0; j < n; ++j ) \
{ \
chi1 = (( ctype* ) x) + i*rs_x + j*cs_x; \
\
PASTEMAC(ch,fprints)( file, format, *chi1 ); \
fprintf( file, " " ); \
} \
\
fprintf( file, "\n" ); \
} \
\
fprintf( file, "%s\n", s2 ); \
fflush( file ); \
}
INSERT_GENTFUNC_BASIC0_I( fprintm )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, varname, randmac ) \
\
@@ -1215,3 +1136,238 @@ void PASTEMAC(ch,varname) \
INSERT_GENTFUNCR_BASIC0( sumsqv_unb_var1 )
// -----------------------------------------------------------------------------
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname ) \
\
bool PASTEMAC(ch,opname) \
( \
conj_t conjx, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy \
) \
{ \
for ( dim_t i = 0; i < n; ++i ) \
{ \
ctype* chi1 = x + (i )*incx; \
ctype* psi1 = y + (i )*incy; \
\
ctype chi1c; \
\
if ( bli_is_conj( conjx ) ) { PASTEMAC(ch,copyjs)( *chi1, chi1c ); } \
else { PASTEMAC(ch,copys)( *chi1, chi1c ); } \
\
if ( !PASTEMAC(ch,eq)( chi1c, *psi1 ) ) \
return FALSE; \
} \
\
return TRUE; \
}
INSERT_GENTFUNC_BASIC0( eqv_unb_var1 )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname ) \
\
bool PASTEMAC(ch,opname) \
( \
doff_t diagoffx, \
diag_t diagx, \
uplo_t uplox, \
trans_t transx, \
dim_t m, \
dim_t n, \
ctype* x, inc_t rs_x, inc_t cs_x, \
ctype* y, inc_t rs_y, inc_t cs_y \
) \
{ \
uplo_t uplox_eff; \
conj_t conjx; \
dim_t n_iter; \
dim_t n_elem_max; \
inc_t ldx, incx; \
inc_t ldy, incy; \
dim_t ij0, n_shift; \
\
/* Set various loop parameters. */ \
bli_set_dims_incs_uplo_2m \
( \
diagoffx, diagx, transx, \
uplox, m, n, rs_x, cs_x, rs_y, cs_y, \
&uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, &incy, &ldy, \
&ij0, &n_shift \
); \
\
/* In the odd case where we are comparing against a complete unstored
matrix, we assert equality. Why? We assume the matrices are equal
unless we can find two corresponding elements that are unequal. So
if there are no elements, there is no inequality. Granted, this logic
is strange to think about no matter what, and thankfully it should
never be used under normal usage. */ \
if ( bli_is_zeros( uplox_eff ) ) return TRUE; \
\
/* Extract the conjugation component from the transx parameter. */ \
conjx = bli_extract_conj( transx ); \
\
/* Handle dense and upper/lower storage cases separately. */ \
if ( bli_is_dense( uplox_eff ) ) \
{ \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
const dim_t n_elem = n_elem_max; \
\
ctype* x1 = x + (j )*ldx + (0 )*incx; \
ctype* y1 = y + (j )*ldy + (0 )*incy; \
\
for ( dim_t i = 0; i < n_elem; ++i ) \
{ \
ctype* x11 = x1 + (i )*incx; \
ctype* y11 = y1 + (i )*incy; \
ctype x11c; \
\
if ( bli_is_conj( conjx ) ) { PASTEMAC(ch,copyjs)( *x11, x11c ); } \
else { PASTEMAC(ch,copys)( *x11, x11c ); } \
\
if ( !PASTEMAC(ch,eq)( x11c, *y11 ) ) \
return FALSE; \
} \
} \
} \
else \
{ \
if ( bli_is_upper( uplox_eff ) ) \
{ \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
const dim_t n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
\
ctype* x1 = x + (ij0+j )*ldx + (0 )*incx; \
ctype* y1 = y + (ij0+j )*ldy + (0 )*incy; \
\
for ( dim_t i = 0; i < n_elem; ++i ) \
{ \
ctype* x11 = x1 + (i )*incx; \
ctype* y11 = y1 + (i )*incy; \
ctype x11c; \
\
if ( bli_is_conj( conjx ) ) { PASTEMAC(ch,copyjs)( *x11, x11c ); } \
else { PASTEMAC(ch,copys)( *x11, x11c ); } \
\
if ( !PASTEMAC(ch,eq)( x11c, *y11 ) ) \
return FALSE; \
} \
} \
} \
else if ( bli_is_lower( uplox_eff ) ) \
{ \
for ( dim_t j = 0; j < n_iter; ++j ) \
{ \
const dim_t offi = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
const dim_t n_elem = n_elem_max - offi; \
\
ctype* x1 = x + (j )*ldx + (ij0+offi )*incx; \
ctype* y1 = y + (j )*ldy + (ij0+offi )*incy; \
\
for ( dim_t i = 0; i < n_elem; ++i ) \
{ \
ctype* x11 = x1 + (i )*incx; \
ctype* y11 = y1 + (i )*incy; \
ctype x11c; \
\
if ( bli_is_conj( conjx ) ) { PASTEMAC(ch,copyjs)( *x11, x11c ); } \
else { PASTEMAC(ch,copys)( *x11, x11c ); } \
\
if ( !PASTEMAC(ch,eq)( x11c, *y11 ) ) \
return FALSE; \
} \
} \
} \
} \
\
return TRUE; \
}
INSERT_GENTFUNC_BASIC0( eqm_unb_var1 )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname ) \
\
void PASTEMAC(ch,opname) \
( \
FILE* file, \
char* s1, \
dim_t n, \
ctype* x, inc_t incx, \
char* format, \
char* s2 \
) \
{ \
dim_t i; \
ctype* chi1; \
char default_spec[32] = PASTEMAC(ch,formatspec)(); \
\
if ( format == NULL ) format = default_spec; \
\
chi1 = x; \
\
fprintf( file, "%s\n", s1 ); \
\
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,fprints)( file, format, *chi1 ); \
fprintf( file, "\n" ); \
\
chi1 += incx; \
} \
\
fprintf( file, "%s\n", s2 ); \
}
INSERT_GENTFUNC_BASIC0_I( fprintv )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname ) \
\
void PASTEMAC(ch,opname) \
( \
FILE* file, \
char* s1, \
dim_t m, \
dim_t n, \
ctype* x, inc_t rs_x, inc_t cs_x, \
char* format, \
char* s2 \
) \
{ \
dim_t i, j; \
ctype* chi1; \
char default_spec[32] = PASTEMAC(ch,formatspec)(); \
\
if ( format == NULL ) format = default_spec; \
\
fprintf( file, "%s\n", s1 ); \
\
for ( i = 0; i < m; ++i ) \
{ \
for ( j = 0; j < n; ++j ) \
{ \
chi1 = (( ctype* ) x) + i*rs_x + j*cs_x; \
\
PASTEMAC(ch,fprints)( file, format, *chi1 ); \
fprintf( file, " " ); \
} \
\
fprintf( file, "\n" ); \
} \
\
fprintf( file, "%s\n", s2 ); \
fflush( file ); \
}
INSERT_GENTFUNC_BASIC0_I( fprintm )

View File

@@ -107,39 +107,6 @@ INSERT_GENTPROTR_BASIC0( normfm_unb_var1 )
INSERT_GENTPROTR_BASIC0( normim_unb_var1 )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
( \
FILE* file, \
char* s1, \
dim_t n, \
ctype* x, inc_t incx, \
char* format, \
char* s2 \
);
INSERT_GENTPROT_BASIC0_I( fprintv )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
( \
FILE* file, \
char* s1, \
dim_t m, \
dim_t n, \
ctype* x, inc_t rs_x, inc_t cs_x, \
char* format, \
char* s2 \
);
INSERT_GENTPROT_BASIC0_I( fprintm )
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
@@ -188,3 +155,70 @@ void PASTEMAC(ch,varname) \
INSERT_GENTPROTR_BASIC0( sumsqv_unb_var1 )
// -----------------------------------------------------------------------------
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
bool PASTEMAC(ch,varname) \
( \
conj_t conjx, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy \
);
INSERT_GENTPROT_BASIC0( eqv_unb_var1 )
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
bool PASTEMAC(ch,varname) \
( \
doff_t diagoffx, \
diag_t diagx, \
uplo_t uplox, \
trans_t transx, \
dim_t m, \
dim_t n, \
ctype* x, inc_t rs_x, inc_t cs_x, \
ctype* y, inc_t rs_y, inc_t cs_y \
);
INSERT_GENTPROT_BASIC0( eqm_unb_var1 )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
( \
FILE* file, \
char* s1, \
dim_t n, \
ctype* x, inc_t incx, \
char* format, \
char* s2 \
);
INSERT_GENTPROT_BASIC0_I( fprintv )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
( \
FILE* file, \
char* s1, \
dim_t m, \
dim_t n, \
ctype* x, inc_t rs_x, inc_t cs_x, \
char* format, \
char* s2 \
);
INSERT_GENTPROT_BASIC0_I( fprintm )

View File

@@ -268,12 +268,12 @@ static err_t bli_sgemm_small
bli_rntm_init_from_global( &rntm );
bli_rntm_set_num_threads_only( 1, &rntm );
bli_membrk_rntm_set_membrk( &rntm );
bli_pba_rntm_set_pba( &rntm );
// Get the current size of the buffer pool for A block packing.
// We will use the same size to avoid pool re-initialization
siz_t buffer_size = bli_pool_block_size(bli_membrk_pool(bli_packbuf_index(BLIS_BITVAL_BUFFER_FOR_A_BLOCK),
bli_rntm_membrk(&rntm)));
siz_t buffer_size = bli_pool_block_size(bli_pba_pool(bli_packbuf_index(BLIS_BITVAL_BUFFER_FOR_A_BLOCK),
bli_rntm_pba(&rntm)));
// Based on the available memory in the buffer we will decide if
// we want to do packing or not.
@@ -299,7 +299,7 @@ static err_t bli_sgemm_small
#endif
// Get the buffer from the pool, if there is no pool with
// required size, it will be created.
bli_membrk_acquire_m(&rntm,
bli_pba_acquire_m(&rntm,
buffer_size,
BLIS_BITVAL_BUFFER_FOR_A_BLOCK,
&local_mem_buf_A_s);
@@ -1699,7 +1699,7 @@ static err_t bli_sgemm_small
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_sgemm_small(): releasing mem pool block\n" );
#endif
bli_membrk_release(&rntm,
bli_pba_release(&rntm,
&local_mem_buf_A_s);
}
@@ -1833,13 +1833,13 @@ static err_t bli_dgemm_small
bli_rntm_init_from_global( &rntm );
bli_rntm_set_num_threads_only( 1, &rntm );
bli_membrk_rntm_set_membrk( &rntm );
bli_pba_rntm_set_pba( &rntm );
// Get the current size of the buffer pool for A block packing.
// We will use the same size to avoid pool re-initliazaton
siz_t buffer_size = bli_pool_block_size(
bli_membrk_pool(bli_packbuf_index(BLIS_BITVAL_BUFFER_FOR_A_BLOCK),
bli_rntm_membrk(&rntm)));
bli_pba_pool(bli_packbuf_index(BLIS_BITVAL_BUFFER_FOR_A_BLOCK),
bli_rntm_pba(&rntm)));
//
// This kernel assumes that "A" will be unpackged if N <= 3.
@@ -1863,7 +1863,7 @@ static err_t bli_dgemm_small
printf( "bli_dgemm_small: Requesting mem pool block of size %lu\n", buffer_size);
#endif
// Get the buffer from the pool.
bli_membrk_acquire_m(&rntm,
bli_pba_acquire_m(&rntm,
buffer_size,
BLIS_BITVAL_BUFFER_FOR_A_BLOCK,
&local_mem_buf_A_s);
@@ -3309,7 +3309,7 @@ static err_t bli_dgemm_small
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "bli_dgemm_small(): releasing mem pool block\n" );
#endif
bli_membrk_release(&rntm,
bli_pba_release(&rntm,
&local_mem_buf_A_s);
}
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO);

View File

@@ -10773,10 +10773,11 @@ static err_t bli_dtrsm_small_XAltB_unitDiag(
k_iter = j / D_NR; //number of GEMM operations to be performed(in block of 4x4)
dim_t iter;
err_t r_val;
if((j+n_remainder) == n)
{
f_temp = bli_malloc_user(4 * sizeof(double));
f_temp = bli_malloc_user(4 * sizeof(double), &r_val);
for(iter = 0; iter < m_remainder; iter++)
f_temp[iter] = (b11 + cs_b * (n_remainder-1))[iter];
}

View File

@@ -593,10 +593,6 @@ void GENBARNAME(cntx_init)
bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS, cntx );
bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS, cntx );
bli_cntx_set_schema_c_panel( BLIS_NOT_PACKED, cntx );
//bli_cntx_set_anti_pref( FALSE, cntx );
//bli_cntx_set_membrk( bli_membrk_query(), cntx );
}
// -----------------------------------------------------------------------------

View File

@@ -45,7 +45,7 @@ void blx_l3_packm
thrinfo_t* thread
)
{
membrk_t* membrk;
pba_t* pba;
packbuf_t pack_buf_type;
mem_t* cntl_mem_p;
siz_t size_needed;
@@ -71,7 +71,7 @@ void blx_l3_packm
if ( size_needed == 0 ) return;
// Query the memory broker from the context.
membrk = bli_cntx_get_membrk( cntx );
pba = bli_cntx_get_pba( cntx );
// Query the pack buffer type from the control tree node.
pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl );
@@ -91,9 +91,9 @@ void blx_l3_packm
{
// The chief thread acquires a block from the memory broker
// and saves the associated mem_t entry to local_mem_s.
bli_membrk_acquire_m
bli_pba_acquire_m
(
membrk,
pba,
size_needed,
pack_buf_type,
&local_mem_s
@@ -130,10 +130,10 @@ void blx_l3_packm
// The chief thread releases the existing block associated with
// the mem_t entry in the control tree, and then re-acquires a
// new block, saving the associated mem_t entry to local_mem_s.
bli_membrk_release( cntl_mem_p );
bli_membrk_acquire_m
bli_pba_release( cntl_mem_p );
bli_pba_acquire_m
(
membrk,
pba,
size_needed,
pack_buf_type,
&local_mem_s

View File

@@ -147,7 +147,7 @@ void blx_gemm_thread
// Set the packing block allocator field of the rntm. This will be
// inherited by all of the child threads when they make local copies of
// the rntm below.
bli_membrk_rntm_set_membrk( rntm );
bli_pba_rntm_set_pba( rntm );
// Allocate a global communicator for the root thrinfo_t structures.
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );

View File

@@ -1,2 +1,2 @@
3
4
0.0

View File

@@ -121,6 +121,8 @@ void* libblis_test_thread_entry( void* tdata_void )
void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
{
err_t r_val;
// Query the total number of threads to simulate.
size_t nt = ( size_t )params->n_app_threads;
@@ -130,12 +132,12 @@ void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "libblis_test_thread_decorator(): " );
#endif
bli_pthread_t* pthread = bli_malloc_user( sizeof( bli_pthread_t ) * nt );
bli_pthread_t* pthread = bli_malloc_user( sizeof( bli_pthread_t ) * nt, &r_val );
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "libblis_test_thread_decorator(): " );
#endif
thread_data_t* tdata = bli_malloc_user( sizeof( thread_data_t ) * nt );
thread_data_t* tdata = bli_malloc_user( sizeof( thread_data_t ) * nt, &r_val );
// Allocate a mutex for the threads to share.
//bli_pthread_mutex_t* mutex = bli_malloc_user( sizeof( bli_pthread_mutex_t ) );
@@ -145,7 +147,7 @@ void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
#ifdef BLIS_ENABLE_MEM_TRACING
printf( "libblis_test_thread_decorator(): " );
#endif
bli_pthread_barrier_t* barrier = bli_malloc_user( sizeof( bli_pthread_barrier_t ) );
bli_pthread_barrier_t* barrier = bli_malloc_user( sizeof( bli_pthread_barrier_t ), &r_val );
// Initialize the mutex.
//bli_pthread_mutex_init( mutex, NULL );