mirror of
https://github.com/amd/blis.git
synced 2026-03-15 23:07:22 +00:00
Merge branch 'dev'
This commit is contained in:
@@ -1297,17 +1297,17 @@ bli_malloc_user
|
||||
bli_mbool_create
|
||||
bli_mbool_free
|
||||
bli_mbool_init
|
||||
bli_membrk_acquire_m
|
||||
bli_membrk_compute_pool_block_sizes
|
||||
bli_membrk_compute_pool_block_sizes_dt
|
||||
bli_membrk_finalize
|
||||
bli_membrk_finalize_pools
|
||||
bli_membrk_init
|
||||
bli_membrk_init_pools
|
||||
bli_membrk_pool_size
|
||||
bli_membrk_query
|
||||
bli_membrk_release
|
||||
bli_membrk_rntm_set_membrk
|
||||
bli_pba_acquire_m
|
||||
bli_pba_compute_pool_block_sizes
|
||||
bli_pba_compute_pool_block_sizes_dt
|
||||
bli_pba_finalize
|
||||
bli_pba_finalize_pools
|
||||
bli_pba_init
|
||||
bli_pba_init_pools
|
||||
bli_pba_pool_size
|
||||
bli_pba_query
|
||||
bli_pba_release
|
||||
bli_pba_rntm_set_pba
|
||||
bli_memsys_finalize
|
||||
bli_memsys_init
|
||||
bli_mkherm
|
||||
|
||||
@@ -53,7 +53,7 @@ This index provides a quick way to jump directly to the description for each ope
|
||||
* **[Level-3](BLISObjectAPI.md#level-3-operations)**: Operations with matrices that are multiplication-like:
|
||||
* [gemm](BLISObjectAPI.md#gemm), [hemm](BLISObjectAPI.md#hemm), [herk](BLISObjectAPI.md#herk), [her2k](BLISObjectAPI.md#her2k), [symm](BLISObjectAPI.md#symm), [syrk](BLISObjectAPI.md#syrk), [syr2k](BLISObjectAPI.md#syr2k), [trmm](BLISObjectAPI.md#trmm), [trmm3](BLISObjectAPI.md#trmm3), [trsm](BLISObjectAPI.md#trsm)
|
||||
* **[Utility](BLISObjectAPI.md#Utility-operations)**: Miscellaneous operations on matrices and vectors:
|
||||
* [asumv](BLISObjectAPI.md#asumv), [norm1v](BLISObjectAPI.md#norm1v), [normfv](BLISObjectAPI.md#normfv), [normiv](BLISObjectAPI.md#normiv), [norm1m](BLISObjectAPI.md#norm1m), [normfm](BLISObjectAPI.md#normfm), [normim](BLISObjectAPI.md#normim), [mkherm](BLISObjectAPI.md#mkherm), [mksymm](BLISObjectAPI.md#mksymm), [mktrim](BLISObjectAPI.md#mktrim), [fprintv](BLISObjectAPI.md#fprintv), [fprintm](BLISObjectAPI.md#fprintm),[printv](BLISObjectAPI.md#printv), [printm](BLISObjectAPI.md#printm), [randv](BLISObjectAPI.md#randv), [randm](BLISObjectAPI.md#randm), [sumsqv](BLISObjectAPI.md#sumsqv), [getijm](BLISObjectAPI.md#getijm), [setijm](BLISObjectAPI.md#setijm)
|
||||
* [asumv](BLISObjectAPI.md#asumv), [norm1v](BLISObjectAPI.md#norm1v), [normfv](BLISObjectAPI.md#normfv), [normiv](BLISObjectAPI.md#normiv), [norm1m](BLISObjectAPI.md#norm1m), [normfm](BLISObjectAPI.md#normfm), [normim](BLISObjectAPI.md#normim), [mkherm](BLISObjectAPI.md#mkherm), [mksymm](BLISObjectAPI.md#mksymm), [mktrim](BLISObjectAPI.md#mktrim), [fprintv](BLISObjectAPI.md#fprintv), [fprintm](BLISObjectAPI.md#fprintm),[printv](BLISObjectAPI.md#printv), [printm](BLISObjectAPI.md#printm), [randv](BLISObjectAPI.md#randv), [randm](BLISObjectAPI.md#randm), [sumsqv](BLISObjectAPI.md#sumsqv), [getsc](BLISObjectAPI.md#getsc), [getijv](BLISObjectAPI.md#getijv), [getijm](BLISObjectAPI.md#getijm), [setsc](BLISObjectAPI.md#setsc), [setijv](BLISObjectAPI.md#setijv), [setijm](BLISObjectAPI.md#setijm), [eqsc](BLISObjectAPI.md#eqsc), [eqv](BLISObjectAPI.md#eqv), [eqm](BLISObjectAPI.md#eqm)
|
||||
|
||||
|
||||
|
||||
@@ -790,6 +790,8 @@ Perform
|
||||
```
|
||||
where `x` and `y` are vectors of length _n_.
|
||||
|
||||
Observed object properties: `conj?(x)`.
|
||||
|
||||
---
|
||||
|
||||
#### dotv
|
||||
@@ -807,6 +809,8 @@ Perform
|
||||
```
|
||||
where `x` and `y` are vectors of length _n_, and `rho` is a scalar.
|
||||
|
||||
Observed object properties: `conj?(x)`, `conj?(y)`.
|
||||
|
||||
---
|
||||
|
||||
#### dotxv
|
||||
@@ -826,6 +830,8 @@ Perform
|
||||
```
|
||||
where `x` and `y` are vectors of length _n_, and `alpha`, `beta`, and `rho` are scalars.
|
||||
|
||||
Observed object properties: `conj?(alpha)`, `conj?(beta)`, `conj?(x)`, `conj?(y)`.
|
||||
|
||||
---
|
||||
|
||||
#### invertv
|
||||
@@ -2125,6 +2131,34 @@ where, on entry, `scale` and `sumsq` contain `scale_old` and `sumsq_old`, respec
|
||||
|
||||
---
|
||||
|
||||
#### getsc
|
||||
```c
|
||||
void bli_getsc
|
||||
(
|
||||
obj_t* chi,
|
||||
double* zeta_r,
|
||||
double* zeta_i
|
||||
)
|
||||
```
|
||||
Copy the real and imaginary values from the scalar object `chi` to `zeta_r` and `zeta_i`. If `chi` is stored as a real type, then `zeta_i` is set to zero. (If `chi` is stored in single precision, the corresponding elements are typecast/promoted during the copy.)
|
||||
|
||||
---
|
||||
|
||||
#### getijv
|
||||
```c
|
||||
err_t bli_getijv
|
||||
(
|
||||
dim_t i,
|
||||
obj_t* b,
|
||||
double* ar,
|
||||
double* ai
|
||||
)
|
||||
```
|
||||
Copy the real and imaginary values at the `i`th element of vector object `x` to `ar` and `ai`. If elements of `x` are stored as real types, then only `ar` is overwritten and `ai` is left unchanged. (If `x` contains elements stored in single precision, the corresponding elements are typecast/promoted during the copy.)
|
||||
If either the element offset `i` is beyond the vector dimension of `x` or less than zero, the function returns `BLIS_FAILURE` without taking any action. Similarly, if `x` is a global scalar constant such as `BLIS_ONE`, the function returns `BLIS_FAILURE`.
|
||||
|
||||
---
|
||||
|
||||
#### getijm
|
||||
```c
|
||||
err_t bli_getijm
|
||||
@@ -2136,8 +2170,38 @@ err_t bli_getijm
|
||||
double* ai
|
||||
)
|
||||
```
|
||||
Copy the real and imaginary values at the (`i`,`j`) element of object `b` to `ar` and `ai`. f elements of `b` are stored as real types, then only `ar` is overwritten and `ai` is left unchanged. (If `b` contains elements stored in single precision, the corresponding elements are typecast/promoted during the copy.)
|
||||
If either the row offset `i` is beyond the _m_ dimension of `b`, or column offset `j` is beyond the _n_ dimension of `b`, the function does not perform any copy and returns `BLIS_FAILURE`. Similarly, if `b` is a global scalar constant such as `BLIS_ONE`, `BLIS_FAILURE` is returned.
|
||||
Copy the real and imaginary values at the (`i`,`j`) element of object `b` to `ar` and `ai`. If elements of `b` are stored as real types, then only `ar` is overwritten and `ai` is left unchanged. (If `b` contains elements stored in single precision, the corresponding elements are typecast/promoted during the copy.)
|
||||
If either the row offset `i` is beyond the _m_ dimension of `b` or less than zero, or column offset `j` is beyond the _n_ dimension of `b` or less than zero, the function returns `BLIS_FAILURE` without taking any action. Similarly, if `b` is a global scalar constant such as `BLIS_ONE`, the function returns `BLIS_FAILURE`.
|
||||
|
||||
---
|
||||
|
||||
#### setsc
|
||||
```c
|
||||
void bli_setsc
|
||||
(
|
||||
double* zeta_r,
|
||||
double* zeta_i,
|
||||
obj_t* chi
|
||||
);
|
||||
```
|
||||
Copy real and imaginary values `zeta_r` and `zeta_i` to the scalar object `chi`. If `chi` is stored as a real type, then `zeta_i` is ignored. (If `chi` is stored in single precision, the contents are typecast/demoted during the copy.)
|
||||
|
||||
---
|
||||
|
||||
#### setijv
|
||||
```c
|
||||
err_t bli_setijv
|
||||
(
|
||||
double ar,
|
||||
double ai,
|
||||
dim_t i,
|
||||
obj_t* x
|
||||
);
|
||||
```
|
||||
Copy real and imaginary values `ar` and `ai` to the `i`th element of vector object `x`. If elements of `x` are stored as real types, then only `ar` is copied and `ai` is ignored. (If `x` contains elements stored in single precision, the corresponding elements are typecast/demoted during the copy.)
|
||||
If the element offset `i` is beyond the vector dimension of `x` or less than zero, the function returns `BLIS_FAILURE` without taking any action. Similarly, if `x` is a global scalar constant such as `BLIS_ONE`, the function returns `BLIS_FAILURE`.
|
||||
|
||||
---
|
||||
|
||||
#### setijm
|
||||
```c
|
||||
@@ -2151,7 +2215,59 @@ err_t bli_setijm
|
||||
);
|
||||
```
|
||||
Copy real and imaginary values `ar` and `ai` to the (`i`,`j`) element of object `b`. If elements of `b` are stored as real types, then only `ar` is copied and `ai` is ignored. (If `b` contains elements stored in single precision, the corresponding elements are typecast/demoted during the copy.)
|
||||
If either the row offset `i` is beyond the _m_ dimension of `b`, or column offset `j` is beyond the _n_ dimension of `b`, the function does not perform any copy and returns `BLIS_FAILURE`. Similarly, if `b` is a global scalar constant such as `BLIS_ONE`, `BLIS_FAILURE` is returned.
|
||||
If either the row offset `i` is beyond the _m_ dimension of `b` or less than zero, or column offset `j` is beyond the _n_ dimension of `b` or less than zero, the function returns `BLIS_FAILURE` without taking any action. Similarly, if `b` is a global scalar constant such as `BLIS_ONE`, the function returns `BLIS_FAILURE`.
|
||||
|
||||
---
|
||||
|
||||
#### eqsc
|
||||
```c
|
||||
void bli_eqsc
|
||||
(
|
||||
obj_t chi,
|
||||
obj_t psi,
|
||||
bool* is_eq
|
||||
);
|
||||
```
|
||||
Perform an element-wise comparison between scalars `chi` and `psi` and store the boolean result in the `bool` pointed to by `is_eq`.
|
||||
If exactly one of `conj(chi)` or `conj(psi)` (but not both) indicate a conjugation, then one of the scalars will be implicitly conjugated for purposes of the comparision.
|
||||
|
||||
Observed object properties: `conj?(chi)`, `conj?(psi)`.
|
||||
|
||||
---
|
||||
|
||||
#### eqv
|
||||
```c
|
||||
void bli_eqv
|
||||
(
|
||||
obj_t x,
|
||||
obj_t y,
|
||||
bool* is_eq
|
||||
);
|
||||
```
|
||||
Perform an element-wise comparison between vectors `x` and `y` and store the boolean result in the `bool` pointed to by `is_eq`.
|
||||
If exactly one of `conj(x)` or `conj(y)` (but not both) indicate a conjugation, then one of the vectors will be implicitly conjugated for purposes of the comparision.
|
||||
|
||||
Observed object properties: `conj?(x)`, `conj?(y)`.
|
||||
|
||||
---
|
||||
|
||||
#### eqm
|
||||
```c
|
||||
void bli_eqm
|
||||
(
|
||||
obj_t a,
|
||||
obj_t b,
|
||||
bool* is_eq
|
||||
);
|
||||
```
|
||||
Perform an element-wise comparison between matrices `A` and `B` and store the boolean result in the `bool` pointed to by `is_eq`.
|
||||
Here, `A` is stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix with arbitrary diagonal offset and unit or non-unit diagonal.
|
||||
If `diag(A)` indicates a unit diagonal, the diagonals of both matrices will be ignored for purposes of the comparision.
|
||||
If `uplo(A)` indicates lower or upper storage, only that part of both matrices `A` and `B` will be referenced.
|
||||
If exactly one of `trans(A)` or `trans(B)` (but not both) indicate a transposition, then one of the matrices will be transposed for purposes of the comparison.
|
||||
Similarly, if exactly one of `trans(A)` or `trans(B)` (but not both) indicate a conjugation, then one of the matrices will be implicitly conjugated for purposes of the comparision.
|
||||
|
||||
Observed object properties: `diagoff(A)`, `diag(A)`, `uplo(A)`, `trans?(A)`, `trans?(B)`.
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ This index provides a quick way to jump directly to the description for each ope
|
||||
* **[Level-3](BLISTypedAPI.md#level-3-operations)**: Operations with matrices that are multiplication-like:
|
||||
* [gemm](BLISTypedAPI.md#gemm), [hemm](BLISTypedAPI.md#hemm), [herk](BLISTypedAPI.md#herk), [her2k](BLISTypedAPI.md#her2k), [symm](BLISTypedAPI.md#symm), [syrk](BLISTypedAPI.md#syrk), [syr2k](BLISTypedAPI.md#syr2k), [trmm](BLISTypedAPI.md#trmm), [trmm3](BLISTypedAPI.md#trmm3), [trsm](BLISTypedAPI.md#trsm)
|
||||
* **[Utility](BLISTypedAPI.md#Utility-operations)**: Miscellaneous operations on matrices and vectors:
|
||||
* [asumv](BLISTypedAPI.md#asumv), [norm1v](BLISTypedAPI.md#norm1v), [normfv](BLISTypedAPI.md#normfv), [normiv](BLISTypedAPI.md#normiv), [norm1m](BLISTypedAPI.md#norm1m), [normfm](BLISTypedAPI.md#normfm), [normim](BLISTypedAPI.md#normim), [mkherm](BLISTypedAPI.md#mkherm), [mksymm](BLISTypedAPI.md#mksymm), [mktrim](BLISTypedAPI.md#mktrim), [fprintv](BLISTypedAPI.md#fprintv), [fprintm](BLISTypedAPI.md#fprintm),[printv](BLISTypedAPI.md#printv), [printm](BLISTypedAPI.md#printm), [randv](BLISTypedAPI.md#randv), [randm](BLISTypedAPI.md#randm), [sumsqv](BLISTypedAPI.md#sumsqv)
|
||||
* [asumv](BLISTypedAPI.md#asumv), [norm1v](BLISTypedAPI.md#norm1v), [normfv](BLISTypedAPI.md#normfv), [normiv](BLISTypedAPI.md#normiv), [norm1m](BLISTypedAPI.md#norm1m), [normfm](BLISTypedAPI.md#normfm), [normim](BLISTypedAPI.md#normim), [mkherm](BLISTypedAPI.md#mkherm), [mksymm](BLISTypedAPI.md#mksymm), [mktrim](BLISTypedAPI.md#mktrim), [fprintv](BLISTypedAPI.md#fprintv), [fprintm](BLISTypedAPI.md#fprintm),[printv](BLISTypedAPI.md#printv), [printm](BLISTypedAPI.md#printm), [randv](BLISTypedAPI.md#randv), [randm](BLISTypedAPI.md#randm), [sumsqv](BLISTypedAPI.md#sumsqv), [getsc](BLISTypedAPI.md#getsc), [getijv](BLISTypedAPI.md#getijv), [getijm](BLISTypedAPI.md#getijm), [setsc](BLISTypedAPI.md#setsc), [setijv](BLISTypedAPI.md#setijv), [setijm](BLISTypedAPI.md#setijm), [eqsc](BLISTypedAPI.md#eqsc), [eqv](BLISTypedAPI.md#eqv), [eqm](BLISTypedAPI.md#eqm)
|
||||
|
||||
|
||||
|
||||
@@ -1695,6 +1695,149 @@ where, on entry, `scale` and `sumsq` contain `scale_old` and `sumsq_old`, respec
|
||||
|
||||
---
|
||||
|
||||
#### getsc
|
||||
```c
|
||||
void bli_getsc
|
||||
(
|
||||
ctype* chi,
|
||||
double* zeta_r,
|
||||
double* zeta_i
|
||||
)
|
||||
```
|
||||
Copy the real and imaginary values from the scalar object `chi` to `zeta_r` and `zeta_i`. If `chi` is stored as a real type, then `zeta_i` is set to zero. (If `chi` is stored in single precision, the corresponding elements are typecast/promoted during the copy.)
|
||||
|
||||
---
|
||||
|
||||
#### getijv
|
||||
```c
|
||||
err_t bli_?getijv
|
||||
(
|
||||
dim_t i,
|
||||
ctype* x, incx,
|
||||
double* ar,
|
||||
double* ai
|
||||
)
|
||||
```
|
||||
Copy the real and imaginary values at the `i`th element of vector `x` to `ar` and `ai`. For real domain invocations, only `ar` is overwritten and `ai` is left unchanged. (If `x` contains elements stored in single precision, the corresponding elements are typecast/promoted during the copy.)
|
||||
Note that the object-based analogue of [getijv](BLISObjectAPI.md#getijv) does bounds checking of the vector element offset `i` against the vector length while the typed functions specified above do not (since the vector length is not given).
|
||||
|
||||
---
|
||||
|
||||
#### getijm
|
||||
```c
|
||||
err_t bli_?getijm
|
||||
(
|
||||
dim_t i,
|
||||
dim_t j,
|
||||
ctype* b, inc_t rs_b, inc_t cs_b,
|
||||
double* ar,
|
||||
double* ai
|
||||
)
|
||||
```
|
||||
Copy the real and imaginary values at the (`i`,`j`) element of object `b` to `ar` and `ai`. For real domain invocations, only `ar` is overwritten and `ai` is left unchanged. (If `b` contains elements stored in single precision, the corresponding elements are typecast/promoted during the copy.)
|
||||
Note that the object-based analogue of [getijm](BLISObjectAPI.md#getijm) does bounds checking of the matrix element offsets (`i`,`j`) against the matrix dimensions while the typed functions specified above do not (since the matrix dimensions are not given).
|
||||
|
||||
---
|
||||
|
||||
#### setsc
|
||||
```c
|
||||
void bli_setsc
|
||||
(
|
||||
double* zeta_r,
|
||||
double* zeta_i,
|
||||
ctype* chi
|
||||
);
|
||||
```
|
||||
Copy real and imaginary values `zeta_r` and `zeta_i` to the scalar object `chi`. If `chi` is stored as a real type, then `zeta_i` is ignored. (If `chi` is stored in single precision, the contents are typecast/demoted during the copy.)
|
||||
|
||||
---
|
||||
|
||||
#### setijv
|
||||
```c
|
||||
err_t bli_?setijv
|
||||
(
|
||||
double ar,
|
||||
double ai,
|
||||
dim_t i,
|
||||
ctype* x, incx
|
||||
);
|
||||
```
|
||||
Copy real and imaginary values `ar` and `ai` to the `i`th element of vector object `x`. For real domain invocations, only `ar` is copied and `ai` is ignored. (If `x` contains elements stored in single precision, the corresponding elements are typecast/demoted during the copy.)
|
||||
Note that the object-based analogue of [setijv](BLISObjectAPI.md#setijv) does bounds checking of the vector element offset `i` against the vector length while the typed functions specified above do not (since the vector length is not given).
|
||||
|
||||
---
|
||||
|
||||
#### setijm
|
||||
```c
|
||||
err_t bli_?setijm
|
||||
(
|
||||
double ar,
|
||||
double ai,
|
||||
dim_t i,
|
||||
dim_t j,
|
||||
ctype* b, inc_t rs_b, inc_t cs_b
|
||||
);
|
||||
```
|
||||
Copy real and imaginary values `ar` and `ai` to the (`i`,`j`) element of object `b`. For real domain invocations, only `ar` is copied and `ai` is ignored. (If `b` contains elements stored in single precision, the corresponding elements are typecast/demoted during the copy.)
|
||||
Note that the object-based analogue of [setijm](BLISObjectAPI.md#setijm) does bounds checking of the matrix element offsets (`i`,`j`) against the matrix dimensions while the typed functions specified above do not (since the matrix dimensions are not given).
|
||||
|
||||
---
|
||||
|
||||
#### eqsc
|
||||
```c
|
||||
void bli_?eqsc
|
||||
(
|
||||
conj_t conjchi,
|
||||
ctype* chi,
|
||||
ctype* psi,
|
||||
bool* is_eq
|
||||
);
|
||||
```
|
||||
Perform an element-wise comparison between scalars `chi` and `psi` and store the boolean result in the `bool` pointed to by `is_eq`.
|
||||
If `conjchi` indicates a conjugation, `chi` will be implicitly conjugated for purposes of the comparision.
|
||||
|
||||
---
|
||||
|
||||
#### eqv
|
||||
```c
|
||||
void bli_?eqv
|
||||
(
|
||||
conj_t conjx,
|
||||
dim_t n,
|
||||
ctype* x, inc_t incx,
|
||||
ctype* y, inc_t incy,
|
||||
bool* is_eq
|
||||
);
|
||||
```
|
||||
Perform an element-wise comparison between length _n_ vectors `x` and `y` and store the boolean result in the `bool` pointed to by `is_eq`.
|
||||
If `conjx` indicates a conjugation, `x` will be implicitly conjugated for purposes of the comparision.
|
||||
|
||||
---
|
||||
|
||||
#### eqm
|
||||
```c
|
||||
void bli_?eqm
|
||||
(
|
||||
doff_t diagoffa,
|
||||
diag_t diaga,
|
||||
uplo_t uploa,
|
||||
trans_t transa,
|
||||
dim_t m,
|
||||
dim_t n,
|
||||
ctype* a, inc_t rs_a, inc_t cs_a,
|
||||
ctype* b, inc_t rs_b, inc_t cs_b,
|
||||
bool* is_eq
|
||||
)
|
||||
```
|
||||
Perform an element-wise comparison between matrices `A` and `B` and store the boolean result in the `bool` pointed to by `is_eq`.
|
||||
Here, `B` is an _m x n_ matrix, `A` is stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix with arbitrary diagonal offset and unit or non-unit diagonal.
|
||||
If `diaga` indicates a unit diagonal, the diagonals of both matrices will be ignored for purposes of the comparision.
|
||||
If `uploa` indicates lower or upper storage, only that part of matrix `A` will be referenced in the comparison.
|
||||
If `transa` indicates a conjugation and/or transposition, then `A` will be conjugated and/or transposed for purposes of the comparison.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Level-3 microkernels
|
||||
|
||||
|
||||
11
docs/FAQ.md
11
docs/FAQ.md
@@ -17,6 +17,7 @@ project, as well as those we think a new user or developer might ask. If you do
|
||||
* [What is a macrokernel?](FAQ.md#what-is-a-macrokernel)
|
||||
* [What is a context?](FAQ.md#what-is-a-context)
|
||||
* [I am used to thinking in terms of column-major/row-major storage and leading dimensions. What is a "row stride" / "column stride"?](FAQ.md#im-used-to-thinking-in-terms-of-column-majorrow-major-storage-and-leading-dimensions-what-is-a-row-stride--column-stride)
|
||||
* [Why does BLIS have vector (level-1v) and matrix (level-1m) variations of most level-1 operations?](FAQ.md#why-does-blis-have-vector-level-1v-and-matrix-level-1m-variations-of-most-level-1-operations)
|
||||
* [What does it mean when a matrix with general stride is column-tilted or row-tilted?](FAQ.md#what-does-it-mean-when-a-matrix-with-general-stride-is-column-tilted-or-row-tilted)
|
||||
* [I am not really interested in all of these newfangled features in BLIS. Can I just use BLIS as a BLAS library?](FAQ.md#im-not-really-interested-in-all-of-these-newfangled-features-in-blis-can-i-just-use-blis-as-a-blas-library)
|
||||
* [What about CBLAS?](FAQ.md#what-about-cblas)
|
||||
@@ -117,6 +118,16 @@ In generalized storage, we have a row stride and a column stride. The row stride
|
||||
|
||||
BLIS also supports situations where both the row stride and column stride are non-unit. We call this situation "general stride".
|
||||
|
||||
### Why does BLIS have vector (level-1v) and matrix (level-1m) variations of most level-1 operations?
|
||||
|
||||
At first glance, it might appear that an element-wise operation such as `copym` or `axpym` would be sufficiently general purpose to cover the cases where the operands are vectors. After all, an *m x 1* matrix can be viewed as a vector of length m and vice versa. But in BLIS, operations on vectors are treated slightly differently than operations on matrices.
|
||||
|
||||
If an application wishes to perform an element-wise operation on two objects, and the application calls a level-1m operation, the dimensions of those objects must be conformal, or "match up" (after any transposition implied by the object properties). This includes situations where one of the dimensions is unit.
|
||||
|
||||
However, if an application instead decides to perform an element-wise operation on two objects, and the application calls a level-1v operation, the dimension constraints are slightly relaxed. In this scenario, BLIS only checks that the vector *lengths* are equal. This allows for the vectors to have different orientations (row vs column) while still being considered conformal. So, you could perform a `copyv` operation to copy from an *m x 1* vector to a *1 x m* vector. A `copym` operation on such objects would not be allowed (unless it was executed with the source object containing an implicit transposition).
|
||||
|
||||
Another way to think about level-1v operations is that they will work with any two matrix objects in situations where (a) the corresponding level-1m operation *would have* worked if the input had been transposed, and (b) all operands happen to be vectors (i.e., have one unit dimension).
|
||||
|
||||
### What does it mean when a matrix with general stride is column-tilted or row-tilted?
|
||||
|
||||
When a matrix is stored with general stride, both the row stride and column stride (let's call them `rs` and `cs`) are non-unit. When `rs` < `cs`, we call the general stride matrix "column-tilted" because it is "closer" to being column-stored (than row-stored). Similarly, when `rs` > `cs`, the matrix is "row-tilted" because it is closer to being row-stored.
|
||||
|
||||
@@ -87,6 +87,7 @@ void PASTEMAC(opname,_check) \
|
||||
GENFRONT( absqsc )
|
||||
GENFRONT( normfsc )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_getsc_check
|
||||
(
|
||||
@@ -352,3 +353,37 @@ void bli_l0_xx2sc_check
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
void bli_l0_xxbsc_check
|
||||
(
|
||||
obj_t* chi,
|
||||
obj_t* psi,
|
||||
bool* is_eq
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_noninteger_object( chi );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_noninteger_object( psi );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( chi );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_scalar_object( psi );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
|
||||
e_val = bli_check_object_buffer( chi );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( psi );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
|
||||
@@ -129,7 +129,6 @@ void PASTEMAC(opname,_check) \
|
||||
|
||||
GENTPROT( zipsc )
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_l0_xsc_check
|
||||
@@ -148,3 +147,10 @@ void bli_l0_xx2sc_check
|
||||
obj_t* chi,
|
||||
obj_t* norm
|
||||
);
|
||||
|
||||
void bli_l0_xxbsc_check
|
||||
(
|
||||
obj_t* chi,
|
||||
obj_t* psi,
|
||||
bool* is_eq
|
||||
);
|
||||
|
||||
@@ -175,4 +175,3 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
|
||||
INSERT_GENTDEFR( zipsc )
|
||||
|
||||
|
||||
|
||||
@@ -69,8 +69,8 @@ void PASTEMAC0(opname) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
buf_chi, \
|
||||
buf_absq \
|
||||
buf_chi, \
|
||||
buf_absq \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -105,9 +105,9 @@ void PASTEMAC0(opname) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
conjchi, \
|
||||
buf_chi, \
|
||||
buf_psi \
|
||||
conjchi, \
|
||||
buf_chi, \
|
||||
buf_psi \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -142,8 +142,8 @@ void PASTEMAC0(opname) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
conjchi, \
|
||||
buf_chi \
|
||||
conjchi, \
|
||||
buf_chi \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -175,8 +175,8 @@ void PASTEMAC0(opname) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
buf_chi, \
|
||||
buf_psi \
|
||||
buf_chi, \
|
||||
buf_psi \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -218,9 +218,9 @@ void PASTEMAC0(opname) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
buf_chi, \
|
||||
zeta_r, \
|
||||
zeta_i \
|
||||
buf_chi, \
|
||||
zeta_r, \
|
||||
zeta_i \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -252,9 +252,9 @@ void PASTEMAC0(opname) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
zeta_r, \
|
||||
zeta_i, \
|
||||
buf_chi \
|
||||
zeta_r, \
|
||||
zeta_i, \
|
||||
buf_chi \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -295,9 +295,9 @@ void PASTEMAC0(opname) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
buf_chi, \
|
||||
buf_zeta_r, \
|
||||
buf_zeta_i \
|
||||
buf_chi, \
|
||||
buf_zeta_r, \
|
||||
buf_zeta_i \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -332,9 +332,9 @@ void PASTEMAC0(opname) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
buf_zeta_i, \
|
||||
buf_zeta_r, \
|
||||
buf_chi \
|
||||
buf_zeta_i, \
|
||||
buf_zeta_r, \
|
||||
buf_chi \
|
||||
); \
|
||||
}
|
||||
|
||||
|
||||
@@ -128,9 +128,3 @@ BLIS_EXPORT_BLIS void PASTEMAC0(opname) \
|
||||
|
||||
GENPROT( zipsc )
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -41,18 +41,22 @@
|
||||
// Prototype object APIs (expert and non-expert).
|
||||
#include "bli_oapi_ex.h"
|
||||
#include "bli_l1v_oapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
#include "bli_oapi_ba.h"
|
||||
#include "bli_l1v_oapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
// Prototype typed APIs (expert and non-expert).
|
||||
#include "bli_tapi_ex.h"
|
||||
#include "bli_l1v_tapi.h"
|
||||
#include "bli_l1v_ft.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
#include "bli_tapi_ba.h"
|
||||
#include "bli_l1v_tapi.h"
|
||||
#include "bli_l1v_ft.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
// Generate function pointer arrays for tapi functions (expert only).
|
||||
#include "bli_l1v_fpa.h"
|
||||
|
||||
@@ -117,7 +117,7 @@ siz_t bli_packv_init_pack
|
||||
dim_t dim_a = bli_obj_vector_dim( a );
|
||||
dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx );
|
||||
|
||||
membrk_t* membrk = bli_cntx_membrk( cntx );
|
||||
pba_t* pba = bli_cntx_pba( cntx );
|
||||
|
||||
#if 0
|
||||
mem_t* mem_p;
|
||||
@@ -156,9 +156,7 @@ siz_t bli_packv_init_pack
|
||||
{
|
||||
// If the mem_t object of p has not yet been allocated, then acquire
|
||||
// a memory block suitable for a vector.
|
||||
bli_membrk_acquire_v( membrk,
|
||||
size_p,
|
||||
mem_p );
|
||||
bli_pba_acquire_v( pba, size_p, mem_p );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -166,11 +164,9 @@ siz_t bli_packv_init_pack
|
||||
// re-acquire the memory so there is sufficient space.
|
||||
if ( bli_mem_size( mem_p ) < size_p )
|
||||
{
|
||||
bli_membrk_release( mem_p );
|
||||
bli_pba_release( mem_p );
|
||||
|
||||
bli_membrk_acquire_v( membrk,
|
||||
size_p,
|
||||
mem_p );
|
||||
bli_pba_acquire_v( pba, size_p, mem_p );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -37,18 +37,22 @@
|
||||
// Prototype object APIs (expert and non-expert).
|
||||
#include "bli_oapi_ex.h"
|
||||
#include "bli_l1d_oapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
#include "bli_oapi_ba.h"
|
||||
#include "bli_l1d_oapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
// Prototype typed APIs (expert and non-expert).
|
||||
#include "bli_tapi_ex.h"
|
||||
#include "bli_l1d_tapi.h"
|
||||
#include "bli_l1d_ft.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
#include "bli_tapi_ba.h"
|
||||
#include "bli_l1d_tapi.h"
|
||||
#include "bli_l1d_ft.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
// Generate function pointer arrays for tapi functions (expert only).
|
||||
#include "bli_l1d_fpa.h"
|
||||
|
||||
@@ -40,18 +40,22 @@
|
||||
// Prototype object APIs (expert and non-expert).
|
||||
#include "bli_oapi_ex.h"
|
||||
#include "bli_l1f_oapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
#include "bli_oapi_ba.h"
|
||||
#include "bli_l1f_oapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
// Prototype typed APIs (expert and non-expert).
|
||||
#include "bli_tapi_ex.h"
|
||||
#include "bli_l1f_tapi.h"
|
||||
#include "bli_l1f_ft.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
#include "bli_tapi_ba.h"
|
||||
#include "bli_l1f_tapi.h"
|
||||
#include "bli_l1f_ft.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
// Generate function pointer arrays for tapi functions (expert only).
|
||||
#include "bli_l1f_fpa.h"
|
||||
|
||||
@@ -43,18 +43,22 @@
|
||||
// Prototype object APIs (expert and non-expert).
|
||||
#include "bli_oapi_ex.h"
|
||||
#include "bli_l1m_oapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
#include "bli_oapi_ba.h"
|
||||
#include "bli_l1m_oapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
// Prototype typed APIs (expert and non-expert).
|
||||
#include "bli_tapi_ex.h"
|
||||
#include "bli_l1m_tapi.h"
|
||||
#include "bli_l1m_ft.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
#include "bli_tapi_ba.h"
|
||||
#include "bli_l1m_tapi.h"
|
||||
#include "bli_l1m_ft.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
// Generate function pointer arrays for tapi functions (expert only).
|
||||
#include "bli_l1m_fpa.h"
|
||||
|
||||
@@ -57,25 +57,6 @@ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \
|
||||
|
||||
INSERT_GENTDEF( addm )
|
||||
INSERT_GENTDEF( subm )
|
||||
|
||||
// copym
|
||||
|
||||
#undef GENTDEF
|
||||
#define GENTDEF( ctype, ch, opname, tsuf ) \
|
||||
\
|
||||
typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \
|
||||
( \
|
||||
doff_t diagoffx, \
|
||||
diag_t diagx, \
|
||||
uplo_t uplox, \
|
||||
trans_t transx, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t rs_x, inc_t cs_x, \
|
||||
ctype* y, inc_t rs_y, inc_t cs_y \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
);
|
||||
|
||||
INSERT_GENTDEF( copym )
|
||||
|
||||
// axpym
|
||||
|
||||
@@ -78,17 +78,17 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
transx, \
|
||||
m, \
|
||||
n, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
buf_y, rs_y, cs_y, \
|
||||
cntx, \
|
||||
rntm \
|
||||
( \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
transx, \
|
||||
m, \
|
||||
n, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
buf_y, rs_y, cs_y, \
|
||||
cntx, \
|
||||
rntm \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -146,18 +146,18 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
transx, \
|
||||
m, \
|
||||
n, \
|
||||
buf_alpha, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
buf_y, rs_y, cs_y, \
|
||||
cntx, \
|
||||
rntm \
|
||||
( \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
transx, \
|
||||
m, \
|
||||
n, \
|
||||
buf_alpha, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
buf_y, rs_y, cs_y, \
|
||||
cntx, \
|
||||
rntm \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -223,17 +223,17 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
BLIS_NO_CONJUGATE, /* internal conjugation applied during copy-cast. */ \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
m, \
|
||||
n, \
|
||||
buf_alpha, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
cntx, \
|
||||
rntm \
|
||||
( \
|
||||
BLIS_NO_CONJUGATE, /* internal conjugation applied during copy-cast. */ \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
m, \
|
||||
n, \
|
||||
buf_alpha, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
cntx, \
|
||||
rntm \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -285,17 +285,17 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
BLIS_NO_CONJUGATE, /* internal conjugation applied during copy-cast. */ \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
m, \
|
||||
n, \
|
||||
buf_alpha, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
cntx, \
|
||||
rntm \
|
||||
( \
|
||||
BLIS_NO_CONJUGATE, /* internal conjugation applied during copy-cast. */ \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
m, \
|
||||
n, \
|
||||
buf_alpha, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
cntx, \
|
||||
rntm \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -354,18 +354,18 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
transx, \
|
||||
m, \
|
||||
n, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
buf_beta, \
|
||||
buf_y, rs_y, cs_y, \
|
||||
cntx, \
|
||||
rntm \
|
||||
( \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
transx, \
|
||||
m, \
|
||||
n, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
buf_beta, \
|
||||
buf_y, rs_y, cs_y, \
|
||||
cntx, \
|
||||
rntm \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -420,17 +420,17 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
transx, \
|
||||
m, \
|
||||
n, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
buf_beta, \
|
||||
buf_y, rs_y, cs_y, \
|
||||
cntx, \
|
||||
rntm \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
transx, \
|
||||
m, \
|
||||
n, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
buf_beta, \
|
||||
buf_y, rs_y, cs_y, \
|
||||
cntx, \
|
||||
rntm \
|
||||
); \
|
||||
}
|
||||
|
||||
|
||||
@@ -57,15 +57,12 @@ void PASTEMAC(ch,opname) \
|
||||
{ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
ctype* x1; \
|
||||
ctype* y1; \
|
||||
uplo_t uplox_eff; \
|
||||
conj_t conjx; \
|
||||
dim_t n_iter; \
|
||||
dim_t n_elem, n_elem_max; \
|
||||
dim_t n_elem_max; \
|
||||
inc_t ldx, incx; \
|
||||
inc_t ldy, incy; \
|
||||
dim_t j, i; \
|
||||
dim_t ij0, n_shift; \
|
||||
\
|
||||
/* Set various loop parameters. */ \
|
||||
@@ -88,62 +85,65 @@ void PASTEMAC(ch,opname) \
|
||||
/* Handle dense and upper/lower storage cases separately. */ \
|
||||
if ( bli_is_dense( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
n_elem = n_elem_max; \
|
||||
const dim_t n_elem = n_elem_max; \
|
||||
\
|
||||
x1 = x + (j )*ldx + (0 )*incx; \
|
||||
y1 = y + (j )*ldy + (0 )*incy; \
|
||||
ctype* x1 = x + (j )*ldx + (0 )*incx; \
|
||||
ctype* y1 = y + (j )*ldy + (0 )*incy; \
|
||||
\
|
||||
/* Invoke the kernel with the appropriate parameters. */ \
|
||||
f( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
x1, incx, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
f \
|
||||
( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
x1, incx, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if ( bli_is_upper( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
|
||||
const dim_t n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
|
||||
\
|
||||
x1 = x + (ij0+j )*ldx + (0 )*incx; \
|
||||
y1 = y + (ij0+j )*ldy + (0 )*incy; \
|
||||
ctype* x1 = x + (ij0+j )*ldx + (0 )*incx; \
|
||||
ctype* y1 = y + (ij0+j )*ldy + (0 )*incy; \
|
||||
\
|
||||
/* Invoke the kernel with the appropriate parameters. */ \
|
||||
f( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
x1, incx, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
f \
|
||||
( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
x1, incx, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
else if ( bli_is_lower( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
|
||||
n_elem = n_elem_max - i; \
|
||||
const dim_t offi = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
|
||||
const dim_t n_elem = n_elem_max - offi; \
|
||||
\
|
||||
x1 = x + (j )*ldx + (ij0+i )*incx; \
|
||||
y1 = y + (j )*ldy + (ij0+i )*incy; \
|
||||
ctype* x1 = x + (j )*ldx + (ij0+offi )*incx; \
|
||||
ctype* y1 = y + (j )*ldy + (ij0+offi )*incy; \
|
||||
\
|
||||
/* Invoke the kernel with the appropriate parameters. */ \
|
||||
f( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
x1, incx, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
f \
|
||||
( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
x1, incx, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
@@ -174,15 +174,12 @@ void PASTEMAC(ch,opname) \
|
||||
{ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
ctype* x1; \
|
||||
ctype* y1; \
|
||||
uplo_t uplox_eff; \
|
||||
conj_t conjx; \
|
||||
dim_t n_iter; \
|
||||
dim_t n_elem, n_elem_max; \
|
||||
dim_t n_elem_max; \
|
||||
inc_t ldx, incx; \
|
||||
inc_t ldy, incy; \
|
||||
dim_t j, i; \
|
||||
dim_t ij0, n_shift; \
|
||||
\
|
||||
/* Set various loop parameters. */ \
|
||||
@@ -205,65 +202,68 @@ void PASTEMAC(ch,opname) \
|
||||
/* Handle dense and upper/lower storage cases separately. */ \
|
||||
if ( bli_is_dense( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
n_elem = n_elem_max; \
|
||||
const dim_t n_elem = n_elem_max; \
|
||||
\
|
||||
x1 = x + (j )*ldx + (0 )*incx; \
|
||||
y1 = y + (j )*ldy + (0 )*incy; \
|
||||
ctype* x1 = x + (j )*ldx + (0 )*incx; \
|
||||
ctype* y1 = y + (j )*ldy + (0 )*incy; \
|
||||
\
|
||||
/* Invoke the kernel with the appropriate parameters. */ \
|
||||
f( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
alpha, \
|
||||
x1, incx, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
f \
|
||||
( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
alpha, \
|
||||
x1, incx, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if ( bli_is_upper( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
|
||||
const dim_t n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
|
||||
\
|
||||
x1 = x + (ij0+j )*ldx + (0 )*incx; \
|
||||
y1 = y + (ij0+j )*ldy + (0 )*incy; \
|
||||
ctype* x1 = x + (ij0+j )*ldx + (0 )*incx; \
|
||||
ctype* y1 = y + (ij0+j )*ldy + (0 )*incy; \
|
||||
\
|
||||
/* Invoke the kernel with the appropriate parameters. */ \
|
||||
f( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
alpha, \
|
||||
x1, incx, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
f \
|
||||
( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
alpha, \
|
||||
x1, incx, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
else if ( bli_is_lower( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
|
||||
n_elem = n_elem_max - i; \
|
||||
const dim_t offi = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
|
||||
const dim_t n_elem = n_elem_max - offi; \
|
||||
\
|
||||
x1 = x + (j )*ldx + (ij0+i )*incx; \
|
||||
y1 = y + (j )*ldy + (ij0+i )*incy; \
|
||||
ctype* x1 = x + (j )*ldx + (ij0+offi )*incx; \
|
||||
ctype* y1 = y + (j )*ldy + (ij0+offi )*incy; \
|
||||
\
|
||||
/* Invoke the kernel with the appropriate parameters. */ \
|
||||
f( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
alpha, \
|
||||
x1, incx, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
f \
|
||||
( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
alpha, \
|
||||
x1, incx, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
@@ -292,12 +292,10 @@ void PASTEMAC(ch,opname) \
|
||||
{ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
ctype* x1; \
|
||||
uplo_t uplox_eff; \
|
||||
dim_t n_iter; \
|
||||
dim_t n_elem, n_elem_max; \
|
||||
dim_t n_elem_max; \
|
||||
inc_t ldx, incx; \
|
||||
dim_t j, i; \
|
||||
dim_t ij0, n_shift; \
|
||||
\
|
||||
/* Set various loop parameters. */ \
|
||||
@@ -317,59 +315,62 @@ void PASTEMAC(ch,opname) \
|
||||
/* Handle dense and upper/lower storage cases separately. */ \
|
||||
if ( bli_is_dense( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
n_elem = n_elem_max; \
|
||||
const dim_t n_elem = n_elem_max; \
|
||||
\
|
||||
x1 = x + (j )*ldx + (0 )*incx; \
|
||||
ctype* x1 = x + (j )*ldx + (0 )*incx; \
|
||||
\
|
||||
/* Invoke the kernel with the appropriate parameters. */ \
|
||||
f( \
|
||||
conjalpha, \
|
||||
n_elem, \
|
||||
alpha, \
|
||||
x1, incx, \
|
||||
cntx \
|
||||
); \
|
||||
f \
|
||||
( \
|
||||
conjalpha, \
|
||||
n_elem, \
|
||||
alpha, \
|
||||
x1, incx, \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if ( bli_is_upper( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
|
||||
const dim_t n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
|
||||
\
|
||||
x1 = x + (ij0+j )*ldx + (0 )*incx; \
|
||||
ctype* x1 = x + (ij0+j )*ldx + (0 )*incx; \
|
||||
\
|
||||
/* Invoke the kernel with the appropriate parameters. */ \
|
||||
f( \
|
||||
conjalpha, \
|
||||
n_elem, \
|
||||
alpha, \
|
||||
x1, incx, \
|
||||
cntx \
|
||||
); \
|
||||
f \
|
||||
( \
|
||||
conjalpha, \
|
||||
n_elem, \
|
||||
alpha, \
|
||||
x1, incx, \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
else if ( bli_is_lower( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
|
||||
n_elem = n_elem_max - i; \
|
||||
const dim_t offi = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
|
||||
const dim_t n_elem = n_elem_max - offi; \
|
||||
\
|
||||
x1 = x + (j )*ldx + (ij0+i )*incx; \
|
||||
ctype* x1 = x + (j )*ldx + (ij0+offi )*incx; \
|
||||
\
|
||||
/* Invoke the kernel with the appropriate parameters. */ \
|
||||
f( \
|
||||
conjalpha, \
|
||||
n_elem, \
|
||||
alpha, \
|
||||
x1, incx, \
|
||||
cntx \
|
||||
); \
|
||||
f \
|
||||
( \
|
||||
conjalpha, \
|
||||
n_elem, \
|
||||
alpha, \
|
||||
x1, incx, \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
@@ -399,15 +400,12 @@ void PASTEMAC(ch,opname) \
|
||||
{ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
ctype* x1; \
|
||||
ctype* y1; \
|
||||
uplo_t uplox_eff; \
|
||||
conj_t conjx; \
|
||||
dim_t n_iter; \
|
||||
dim_t n_elem, n_elem_max; \
|
||||
dim_t n_elem_max; \
|
||||
inc_t ldx, incx; \
|
||||
inc_t ldy, incy; \
|
||||
dim_t j, i; \
|
||||
dim_t ij0, n_shift; \
|
||||
\
|
||||
/* Set various loop parameters. */ \
|
||||
@@ -430,65 +428,68 @@ void PASTEMAC(ch,opname) \
|
||||
/* Handle dense and upper/lower storage cases separately. */ \
|
||||
if ( bli_is_dense( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
n_elem = n_elem_max; \
|
||||
const dim_t n_elem = n_elem_max; \
|
||||
\
|
||||
x1 = x + (j )*ldx + (0 )*incx; \
|
||||
y1 = y + (j )*ldy + (0 )*incy; \
|
||||
ctype* x1 = x + (j )*ldx + (0 )*incx; \
|
||||
ctype* y1 = y + (j )*ldy + (0 )*incy; \
|
||||
\
|
||||
/* Invoke the kernel with the appropriate parameters. */ \
|
||||
f( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
x1, incx, \
|
||||
beta, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
f \
|
||||
( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
x1, incx, \
|
||||
beta, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if ( bli_is_upper( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
|
||||
const dim_t n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
|
||||
\
|
||||
x1 = x + (ij0+j )*ldx + (0 )*incx; \
|
||||
y1 = y + (ij0+j )*ldy + (0 )*incy; \
|
||||
ctype* x1 = x + (ij0+j )*ldx + (0 )*incx; \
|
||||
ctype* y1 = y + (ij0+j )*ldy + (0 )*incy; \
|
||||
\
|
||||
/* Invoke the kernel with the appropriate parameters. */ \
|
||||
f( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
x1, incx, \
|
||||
beta, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
f \
|
||||
( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
x1, incx, \
|
||||
beta, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
else if ( bli_is_lower( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
|
||||
n_elem = n_elem_max - i; \
|
||||
const dim_t offi = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
|
||||
const dim_t n_elem = n_elem_max - offi; \
|
||||
\
|
||||
x1 = x + (j )*ldx + (ij0+i )*incx; \
|
||||
y1 = y + (j )*ldy + (ij0+i )*incy; \
|
||||
ctype* x1 = x + (j )*ldx + (ij0+offi )*incx; \
|
||||
ctype* y1 = y + (j )*ldy + (ij0+offi )*incy; \
|
||||
\
|
||||
/* Invoke the kernel with the appropriate parameters. */ \
|
||||
f( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
x1, incx, \
|
||||
beta, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
f \
|
||||
( \
|
||||
conjx, \
|
||||
n_elem, \
|
||||
x1, incx, \
|
||||
beta, \
|
||||
y1, incy, \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
@@ -515,15 +516,12 @@ void PASTEMAC2(chx,chy,opname) \
|
||||
rntm_t* rntm \
|
||||
) \
|
||||
{ \
|
||||
ctype_x* restrict x1; \
|
||||
ctype_y* restrict y1; \
|
||||
uplo_t uplox_eff; \
|
||||
dim_t n_iter; \
|
||||
dim_t n_elem, n_elem_max; \
|
||||
inc_t ldx, incx; \
|
||||
inc_t ldy, incy; \
|
||||
dim_t j, i; \
|
||||
dim_t ij0, n_shift; \
|
||||
uplo_t uplox_eff; \
|
||||
dim_t n_iter; \
|
||||
dim_t n_elem_max; \
|
||||
inc_t ldx, incx; \
|
||||
inc_t ldy, incy; \
|
||||
dim_t ij0, n_shift; \
|
||||
\
|
||||
/* Set various loop parameters. */ \
|
||||
bli_set_dims_incs_uplo_2m \
|
||||
@@ -542,35 +540,32 @@ void PASTEMAC2(chx,chy,opname) \
|
||||
{ \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
n_elem = n_elem_max; \
|
||||
const dim_t n_elem = n_elem_max; \
|
||||
\
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
x1 = x + (j )*ldx + (0 )*incx; \
|
||||
y1 = y + (j )*ldy + (0 )*incy; \
|
||||
ctype_x* restrict x1 = x + (j )*ldx + (0 )*incx; \
|
||||
ctype_y* restrict y1 = y + (j )*ldy + (0 )*incy; \
|
||||
\
|
||||
ctype_x* restrict chi1 = x1; \
|
||||
ctype_y* restrict psi1 = y1; \
|
||||
\
|
||||
for ( i = 0; i < n_elem; ++i ) \
|
||||
for ( dim_t i = 0; i < n_elem; ++i ) \
|
||||
{ \
|
||||
PASTEMAC2(chx,chy,adds)( chi1[i], psi1[i] ); \
|
||||
PASTEMAC2(chx,chy,adds)( x1[i], y1[i] ); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
n_elem = n_elem_max; \
|
||||
const dim_t n_elem = n_elem_max; \
|
||||
\
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
x1 = x + (j )*ldx + (0 )*incx; \
|
||||
y1 = y + (j )*ldy + (0 )*incy; \
|
||||
ctype_x* restrict x1 = x + (j )*ldx + (0 )*incx; \
|
||||
ctype_y* restrict y1 = y + (j )*ldy + (0 )*incy; \
|
||||
\
|
||||
ctype_x* restrict chi1 = x1; \
|
||||
ctype_y* restrict psi1 = y1; \
|
||||
\
|
||||
for ( i = 0; i < n_elem; ++i ) \
|
||||
for ( dim_t i = 0; i < n_elem; ++i ) \
|
||||
{ \
|
||||
PASTEMAC2(chx,chy,adds)( *chi1, *psi1 ); \
|
||||
\
|
||||
@@ -584,35 +579,32 @@ void PASTEMAC2(chx,chy,opname) \
|
||||
{ \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
n_elem = n_elem_max; \
|
||||
const dim_t n_elem = n_elem_max; \
|
||||
\
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
x1 = x + (j )*ldx + (0 )*incx; \
|
||||
y1 = y + (j )*ldy + (0 )*incy; \
|
||||
ctype_x* restrict x1 = x + (j )*ldx + (0 )*incx; \
|
||||
ctype_y* restrict y1 = y + (j )*ldy + (0 )*incy; \
|
||||
\
|
||||
ctype_x* restrict chi1 = x1; \
|
||||
ctype_y* restrict psi1 = y1; \
|
||||
\
|
||||
for ( i = 0; i < n_elem; ++i ) \
|
||||
for ( dim_t i = 0; i < n_elem; ++i ) \
|
||||
{ \
|
||||
PASTEMAC3(chx,chy,chy,xpbys)( chi1[i], *beta, psi1[i] ); \
|
||||
PASTEMAC3(chx,chy,chy,xpbys)( x1[i], *beta, y1[i] ); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
n_elem = n_elem_max; \
|
||||
const dim_t n_elem = n_elem_max; \
|
||||
\
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
x1 = x + (j )*ldx + (0 )*incx; \
|
||||
y1 = y + (j )*ldy + (0 )*incy; \
|
||||
ctype_x* restrict x1 = x + (j )*ldx + (0 )*incx; \
|
||||
ctype_y* restrict y1 = y + (j )*ldy + (0 )*incy; \
|
||||
\
|
||||
ctype_x* restrict chi1 = x1; \
|
||||
ctype_y* restrict psi1 = y1; \
|
||||
\
|
||||
for ( i = 0; i < n_elem; ++i ) \
|
||||
for ( dim_t i = 0; i < n_elem; ++i ) \
|
||||
{ \
|
||||
PASTEMAC3(chx,chy,chy,xpbys)( *chi1, *beta, *psi1 ); \
|
||||
\
|
||||
|
||||
@@ -45,13 +45,14 @@ cntl_t* bli_unpackm_cntl_create_node
|
||||
{
|
||||
cntl_t* cntl;
|
||||
unpackm_params_t* params;
|
||||
err_t r_val;
|
||||
|
||||
// NOTE: If this function is ever called, figure out whether the
|
||||
// bli_malloc_intl() below needs to be changed to bli_sba_acquire().
|
||||
bli_abort();
|
||||
|
||||
// Allocate an unpackm_params_t struct.
|
||||
params = bli_malloc_intl( sizeof( unpackm_params_t ) );
|
||||
params = bli_malloc_intl( sizeof( unpackm_params_t ), &r_val );
|
||||
|
||||
// Initialize the unpackm_params_t struct.
|
||||
params->size = sizeof( unpackm_params_t );
|
||||
|
||||
@@ -40,18 +40,22 @@
|
||||
// Prototype object APIs (expert and non-expert).
|
||||
#include "bli_oapi_ex.h"
|
||||
#include "bli_l2_oapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
#include "bli_oapi_ba.h"
|
||||
#include "bli_l2_oapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
// Prototype typed APIs (expert and non-expert).
|
||||
#include "bli_tapi_ex.h"
|
||||
#include "bli_l2_tapi.h"
|
||||
#include "bli_l2_ft.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
#include "bli_tapi_ba.h"
|
||||
#include "bli_l2_tapi.h"
|
||||
#include "bli_l2_ft.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
// Generate function pointer arrays for tapi functions (expert only).
|
||||
#include "bli_l2_fpa.h"
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
#include "bli_l3_check.h"
|
||||
|
||||
// Define function types.
|
||||
#include "bli_l3_ft_ex.h"
|
||||
//#include "bli_l3_ft_ex.h"
|
||||
#include "bli_l3_ft_ukr.h"
|
||||
#include "bli_l3_oft.h"
|
||||
#include "bli_l3_oft_var.h"
|
||||
@@ -50,16 +50,20 @@
|
||||
// Prototype object APIs (expert and non-expert).
|
||||
#include "bli_oapi_ex.h"
|
||||
#include "bli_l3_oapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
#include "bli_oapi_ba.h"
|
||||
#include "bli_l3_oapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
// Prototype typed APIs (expert and non-expert).
|
||||
#include "bli_tapi_ex.h"
|
||||
#include "bli_l3_tapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
#include "bli_tapi_ba.h"
|
||||
#include "bli_l3_tapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
// Define function types for small/unpacked handlers/kernels.
|
||||
#include "bli_l3_sup_oft.h"
|
||||
|
||||
@@ -91,7 +91,7 @@ void bli_l3_packm
|
||||
|
||||
// The chief thread acquires a block from the memory broker
|
||||
// and saves the associated mem_t entry to local_mem_s.
|
||||
bli_membrk_acquire_m
|
||||
bli_pba_acquire_m
|
||||
(
|
||||
rntm,
|
||||
size_needed,
|
||||
@@ -130,12 +130,12 @@ void bli_l3_packm
|
||||
// The chief thread releases the existing block associated with
|
||||
// the mem_t entry in the control tree, and then re-acquires a
|
||||
// new block, saving the associated mem_t entry to local_mem_s.
|
||||
bli_membrk_release
|
||||
bli_pba_release
|
||||
(
|
||||
rntm,
|
||||
cntl_mem_p
|
||||
);
|
||||
bli_membrk_acquire_m
|
||||
bli_pba_acquire_m
|
||||
(
|
||||
rntm,
|
||||
size_needed,
|
||||
|
||||
@@ -86,7 +86,7 @@ void PASTEMAC(ch,opname) \
|
||||
function before the other threads have a chance to copy
|
||||
from it. (A barrier would fix that race condition, but
|
||||
then again, I prefer to keep barriers to a minimum.) */ \
|
||||
bli_membrk_acquire_m \
|
||||
bli_pba_acquire_m \
|
||||
( \
|
||||
rntm, \
|
||||
size_needed, \
|
||||
@@ -130,12 +130,12 @@ void PASTEMAC(ch,opname) \
|
||||
above for why the acquisition needs to be directly to
|
||||
the chief thread's passed-in mem_t and not a local
|
||||
(temporary) mem_t. */ \
|
||||
bli_membrk_release \
|
||||
bli_pba_release \
|
||||
( \
|
||||
rntm, \
|
||||
mem \
|
||||
); \
|
||||
bli_membrk_acquire_m \
|
||||
bli_pba_acquire_m \
|
||||
( \
|
||||
rntm, \
|
||||
size_needed, \
|
||||
@@ -194,7 +194,7 @@ void PASTEMAC(ch,opname) \
|
||||
is allocated, which it should be. */ \
|
||||
if ( bli_mem_is_alloc( mem ) ) \
|
||||
{ \
|
||||
bli_membrk_release \
|
||||
bli_pba_release \
|
||||
( \
|
||||
rntm, \
|
||||
mem \
|
||||
|
||||
@@ -86,7 +86,7 @@ void PASTEMAC(ch,opname) \
|
||||
function before the other threads have a chance to copy
|
||||
from it. (A barrier would fix that race condition, but
|
||||
then again, I prefer to keep barriers to a minimum.) */ \
|
||||
bli_membrk_acquire_m \
|
||||
bli_pba_acquire_m \
|
||||
( \
|
||||
rntm, \
|
||||
size_needed, \
|
||||
@@ -130,12 +130,12 @@ void PASTEMAC(ch,opname) \
|
||||
above for why the acquisition needs to be directly to
|
||||
the chief thread's passed-in mem_t and not a local
|
||||
(temporary) mem_t. */ \
|
||||
bli_membrk_release \
|
||||
bli_pba_release \
|
||||
( \
|
||||
rntm, \
|
||||
mem \
|
||||
); \
|
||||
bli_membrk_acquire_m \
|
||||
bli_pba_acquire_m \
|
||||
( \
|
||||
rntm, \
|
||||
size_needed, \
|
||||
@@ -194,7 +194,7 @@ void PASTEMAC(ch,opname) \
|
||||
is allocated, which it should be. */ \
|
||||
if ( bli_mem_is_alloc( mem ) ) \
|
||||
{ \
|
||||
bli_membrk_release \
|
||||
bli_pba_release \
|
||||
( \
|
||||
rntm, \
|
||||
mem \
|
||||
|
||||
@@ -39,12 +39,19 @@ void bli_apool_init
|
||||
apool_t* restrict apool
|
||||
)
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
// NOTE: The apool_t is only used in one place; it is the type used to
|
||||
// define the sba. We've switched to static initialization of the mutex
|
||||
// field to remove one more thing that could possibly go wrong during
|
||||
// library initialization.
|
||||
|
||||
// Query the mutex from the apool_t.
|
||||
bli_pthread_mutex_t* restrict mutex = bli_apool_mutex( apool );
|
||||
//bli_pthread_mutex_t* restrict mutex = bli_apool_mutex( apool );
|
||||
|
||||
// Initialize the mutex.
|
||||
//*mutex = BLIS_PTHREAD_MUTEX_INITIALIZER;
|
||||
bli_pthread_mutex_init( mutex, NULL );
|
||||
//bli_pthread_mutex_init( mutex, NULL );
|
||||
|
||||
// We choose to start with:
|
||||
// - an empty pool
|
||||
@@ -87,7 +94,7 @@ void bli_apool_init
|
||||
// Allocate the block_ptrs array.
|
||||
array_t** restrict block_ptrs
|
||||
=
|
||||
bli_malloc_intl( block_ptrs_len * sizeof( array_t* ) );
|
||||
bli_malloc_intl( block_ptrs_len * sizeof( array_t* ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_init(): allocating %d array_t.\n", ( int )num_blocks );
|
||||
@@ -136,6 +143,8 @@ void bli_apool_alloc_block
|
||||
array_t** restrict array_p
|
||||
)
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
// Since the apool_t is defined as a pool of array_t, we can hard-code
|
||||
// the block_size parameter.
|
||||
const siz_t block_size = sizeof( array_t );
|
||||
@@ -149,7 +158,7 @@ void bli_apool_alloc_block
|
||||
// be recovered when it's time to free the block.
|
||||
array_t* restrict array
|
||||
=
|
||||
bli_malloc_intl( block_size );
|
||||
bli_malloc_intl( block_size, &r_val );
|
||||
|
||||
// Initialize an array_t struct within the newly allocated memory region.
|
||||
bli_array_init( num_elem, sizeof( pool_t* ), array );
|
||||
@@ -212,11 +221,14 @@ void bli_apool_finalize
|
||||
apool_t* restrict apool
|
||||
)
|
||||
{
|
||||
// NOTE: Since the apool_t's mutex is now initialized statically, we no
|
||||
// longer need to explicitly destroy it.
|
||||
|
||||
// Query the mutex from the apool_t.
|
||||
bli_pthread_mutex_t* restrict mutex = bli_apool_mutex( apool );
|
||||
//bli_pthread_mutex_t* restrict mutex = bli_apool_mutex( apool );
|
||||
|
||||
// Destroy the mutex.
|
||||
bli_pthread_mutex_destroy( mutex );
|
||||
//bli_pthread_mutex_destroy( mutex );
|
||||
|
||||
// Query the underlying pool_t and mutex from the apool_t.
|
||||
pool_t* restrict pool = bli_apool_pool( apool );
|
||||
@@ -368,6 +380,8 @@ pool_t* bli_apool_array_elem
|
||||
array_t* restrict array
|
||||
)
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
// Query the array element corresponding to index.
|
||||
// NOTE: If we knew that the array_t contained elements of size
|
||||
// sizeof( void* ) or sizeof( whatever ), we could return the *value*
|
||||
@@ -417,7 +431,7 @@ pool_t* bli_apool_array_elem
|
||||
#endif
|
||||
|
||||
// Allocate the pool_t.
|
||||
pool = bli_malloc_intl( sizeof( pool_t ) );
|
||||
pool = bli_malloc_intl( sizeof( pool_t ), &r_val );
|
||||
|
||||
// Initialize the pool_t.
|
||||
bli_pool_init
|
||||
@@ -453,6 +467,8 @@ void bli_apool_grow
|
||||
apool_t* restrict apool
|
||||
)
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
// If the requested increase is zero, return early.
|
||||
if ( num_blocks_add == 0 ) return;
|
||||
|
||||
@@ -493,7 +509,7 @@ void bli_apool_grow
|
||||
// Allocate a new block_ptrs array.
|
||||
array_t** restrict block_ptrs_new
|
||||
=
|
||||
bli_malloc_intl( block_ptrs_len_new * sizeof( array_t* ) );
|
||||
bli_malloc_intl( block_ptrs_len_new * sizeof( array_t* ), &r_val );
|
||||
|
||||
// Query the top_index of the pool.
|
||||
const siz_t top_index = bli_pool_top_index( pool );
|
||||
|
||||
@@ -43,6 +43,8 @@ void bli_array_init
|
||||
array_t* restrict array
|
||||
)
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_array_init(): allocating array [%d * %d]: ",
|
||||
( int )num_elem, ( int )elem_size );
|
||||
@@ -52,7 +54,7 @@ void bli_array_init
|
||||
const size_t array_size = num_elem * elem_size;
|
||||
|
||||
// Allocate the array buffer.
|
||||
void* restrict buf = bli_malloc_intl( array_size );
|
||||
void* restrict buf = bli_malloc_intl( array_size, &r_val );
|
||||
|
||||
// Initialize the array elements to zero. THIS IS IMPORANT because
|
||||
// consumer threads will use the NULL-ness of the array elements to
|
||||
@@ -72,6 +74,8 @@ void bli_array_resize
|
||||
array_t* restrict array
|
||||
)
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
// Query the number of elements in the array.
|
||||
const siz_t num_elem_prev = bli_array_num_elem( array );
|
||||
|
||||
@@ -98,7 +102,7 @@ void bli_array_resize
|
||||
#endif
|
||||
|
||||
// Allocate a new array buffer.
|
||||
char* restrict buf_new = bli_malloc_intl( array_size_new );
|
||||
char* restrict buf_new = bli_malloc_intl( array_size_new, &r_val );
|
||||
|
||||
// Copy the previous array contents to the new array.
|
||||
memcpy( buf_new, buf_prev, array_size_prev );
|
||||
|
||||
@@ -42,7 +42,9 @@ blksz_t* bli_blksz_create_ed
|
||||
dim_t b_z, dim_t be_z
|
||||
)
|
||||
{
|
||||
blksz_t* b = bli_malloc_intl( sizeof( blksz_t ) );
|
||||
err_t r_val;
|
||||
|
||||
blksz_t* b = bli_malloc_intl( sizeof( blksz_t ), &r_val );
|
||||
|
||||
bli_blksz_init_ed
|
||||
(
|
||||
@@ -62,7 +64,9 @@ blksz_t* bli_blksz_create
|
||||
dim_t be_s, dim_t be_d, dim_t be_c, dim_t be_z
|
||||
)
|
||||
{
|
||||
blksz_t* b = bli_malloc_intl( sizeof( blksz_t ) );
|
||||
err_t r_val;
|
||||
|
||||
blksz_t* b = bli_malloc_intl( sizeof( blksz_t ), &r_val );
|
||||
|
||||
bli_blksz_init
|
||||
(
|
||||
|
||||
@@ -192,7 +192,7 @@ void bli_cntl_free_w_thrinfo
|
||||
printf( "bli_cntl_free_w_thrinfo(): releasing mem pool block.\n" );
|
||||
#endif
|
||||
|
||||
bli_membrk_release( rntm, cntl_pack_mem );
|
||||
bli_pba_release( rntm, cntl_pack_mem );
|
||||
}
|
||||
|
||||
// Free the current node.
|
||||
@@ -236,7 +236,7 @@ void bli_cntl_free_wo_thrinfo
|
||||
// allocated.
|
||||
if ( bli_mem_is_alloc( cntl_pack_mem ) )
|
||||
{
|
||||
bli_membrk_release( rntm, cntl_pack_mem );
|
||||
bli_pba_release( rntm, cntl_pack_mem );
|
||||
}
|
||||
|
||||
// Free the current node.
|
||||
|
||||
@@ -78,33 +78,34 @@ void bli_cntx_set_blkszs( ind_t method, dim_t n_bs, ... )
|
||||
|
||||
va_list args;
|
||||
dim_t i;
|
||||
err_t r_val;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) );
|
||||
bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
blksz_t** blkszs = bli_malloc_intl( n_bs * sizeof( blksz_t* ) );
|
||||
blksz_t** blkszs = bli_malloc_intl( n_bs * sizeof( blksz_t* ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
bszid_t* bmults = bli_malloc_intl( n_bs * sizeof( bszid_t ) );
|
||||
bszid_t* bmults = bli_malloc_intl( n_bs * sizeof( bszid_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
double* dsclrs = bli_malloc_intl( n_bs * sizeof( double ) );
|
||||
double* dsclrs = bli_malloc_intl( n_bs * sizeof( double ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
double* msclrs = bli_malloc_intl( n_bs * sizeof( double ) );
|
||||
double* msclrs = bli_malloc_intl( n_bs * sizeof( double ), &r_val );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
|
||||
@@ -343,6 +344,7 @@ void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... )
|
||||
|
||||
va_list args;
|
||||
dim_t i;
|
||||
err_t r_val;
|
||||
|
||||
// Return early if called with BLIS_NAT.
|
||||
if ( method == BLIS_NAT ) return;
|
||||
@@ -352,17 +354,17 @@ void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... )
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_ind_blkszs(): " );
|
||||
#endif
|
||||
bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) );
|
||||
bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_ind_blkszs(): " );
|
||||
#endif
|
||||
double* dsclrs = bli_malloc_intl( n_bs * sizeof( double ) );
|
||||
double* dsclrs = bli_malloc_intl( n_bs * sizeof( double ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_ind_blkszs(): " );
|
||||
#endif
|
||||
double* msclrs = bli_malloc_intl( n_bs * sizeof( double ) );
|
||||
double* msclrs = bli_malloc_intl( n_bs * sizeof( double ), &r_val );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
|
||||
@@ -523,28 +525,29 @@ void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... )
|
||||
|
||||
va_list args;
|
||||
dim_t i;
|
||||
err_t r_val;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_nat_ukrs(): " );
|
||||
#endif
|
||||
l3ukr_t* ukr_ids = bli_malloc_intl( n_ukrs * sizeof( l3ukr_t ) );
|
||||
l3ukr_t* ukr_ids = bli_malloc_intl( n_ukrs * sizeof( l3ukr_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_nat_ukrs(): " );
|
||||
#endif
|
||||
num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ) );
|
||||
num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_nat_ukrs(): " );
|
||||
#endif
|
||||
void_fp* ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void_fp ) );
|
||||
void_fp* ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void_fp ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_nat_ukrs(): " );
|
||||
#endif
|
||||
bool* ukr_prefs = bli_malloc_intl( n_ukrs * sizeof( bool ) );
|
||||
bool* ukr_prefs = bli_malloc_intl( n_ukrs * sizeof( bool ), &r_val );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
|
||||
@@ -680,23 +683,24 @@ void bli_cntx_set_l3_vir_ukrs( dim_t n_ukrs, ... )
|
||||
|
||||
va_list args;
|
||||
dim_t i;
|
||||
err_t r_val;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_vir_ukrs(): " );
|
||||
#endif
|
||||
l3ukr_t* ukr_ids = bli_malloc_intl( n_ukrs * sizeof( l3ukr_t ) );
|
||||
l3ukr_t* ukr_ids = bli_malloc_intl( n_ukrs * sizeof( l3ukr_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_vir_ukrs(): " );
|
||||
#endif
|
||||
num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ) );
|
||||
num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_vir_ukrs(): " );
|
||||
#endif
|
||||
void_fp* ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void_fp ) );
|
||||
void_fp* ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void_fp ), &r_val );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
|
||||
@@ -800,20 +804,21 @@ void bli_cntx_set_l3_sup_thresh( dim_t n_thresh, ... )
|
||||
|
||||
*/
|
||||
|
||||
va_list args;
|
||||
dim_t i;
|
||||
va_list args;
|
||||
dim_t i;
|
||||
err_t r_val;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_sup_thresh(): " );
|
||||
#endif
|
||||
threshid_t* threshids = bli_malloc_intl( n_thresh * sizeof( threshid_t ) );
|
||||
threshid_t* threshids = bli_malloc_intl( n_thresh * sizeof( threshid_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_sup_thresh(): " );
|
||||
#endif
|
||||
blksz_t** threshs = bli_malloc_intl( n_thresh * sizeof( blksz_t* ) );
|
||||
blksz_t** threshs = bli_malloc_intl( n_thresh * sizeof( blksz_t* ), &r_val );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
|
||||
@@ -907,18 +912,19 @@ void bli_cntx_set_l3_sup_handlers( dim_t n_ops, ... )
|
||||
|
||||
va_list args;
|
||||
dim_t i;
|
||||
err_t r_val;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_sup_handlers(): " );
|
||||
#endif
|
||||
opid_t* op_ids = bli_malloc_intl( n_ops * sizeof( opid_t ) );
|
||||
opid_t* op_ids = bli_malloc_intl( n_ops * sizeof( opid_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_sup_handlers(): " );
|
||||
#endif
|
||||
void** op_fps = bli_malloc_intl( n_ops * sizeof( void* ) );
|
||||
void** op_fps = bli_malloc_intl( n_ops * sizeof( void* ), &r_val );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
|
||||
@@ -1005,17 +1011,18 @@ void bli_cntx_set_l3_sup_blkszs( dim_t n_bs, ... )
|
||||
|
||||
va_list args;
|
||||
dim_t i;
|
||||
err_t r_val;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) );
|
||||
bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
blksz_t** blkszs = bli_malloc_intl( n_bs * sizeof( blksz_t* ) );
|
||||
blksz_t** blkszs = bli_malloc_intl( n_bs * sizeof( blksz_t* ), &r_val );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
|
||||
@@ -1109,28 +1116,29 @@ void bli_cntx_set_l3_sup_kers( dim_t n_ukrs, ... )
|
||||
|
||||
va_list args;
|
||||
dim_t i;
|
||||
err_t r_val;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_sup_kers(): " );
|
||||
#endif
|
||||
stor3_t* st3_ids = bli_malloc_intl( n_ukrs * sizeof( stor3_t ) );
|
||||
stor3_t* st3_ids = bli_malloc_intl( n_ukrs * sizeof( stor3_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_sup_kers(): " );
|
||||
#endif
|
||||
num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ) );
|
||||
num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_sup_kers(): " );
|
||||
#endif
|
||||
void** ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void* ) );
|
||||
void** ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void* ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_sup_kers(): " );
|
||||
#endif
|
||||
bool* ukr_prefs = bli_malloc_intl( n_ukrs * sizeof( bool ) );
|
||||
bool* ukr_prefs = bli_malloc_intl( n_ukrs * sizeof( bool ), &r_val );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
|
||||
@@ -1287,23 +1295,24 @@ void bli_cntx_set_l1f_kers( dim_t n_kers, ... )
|
||||
|
||||
va_list args;
|
||||
dim_t i;
|
||||
err_t r_val;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1f_kers(): " );
|
||||
#endif
|
||||
l1fkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1fkr_t ) );
|
||||
l1fkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1fkr_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1f_kers(): " );
|
||||
#endif
|
||||
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ) );
|
||||
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1f_kers(): " );
|
||||
#endif
|
||||
void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ) );
|
||||
void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ), &r_val );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
|
||||
@@ -1405,23 +1414,24 @@ void bli_cntx_set_l1v_kers( dim_t n_kers, ... )
|
||||
|
||||
va_list args;
|
||||
dim_t i;
|
||||
err_t r_val;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1v_kers(): " );
|
||||
#endif
|
||||
l1vkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1vkr_t ) );
|
||||
l1vkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1vkr_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1v_kers(): " );
|
||||
#endif
|
||||
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ) );
|
||||
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1v_kers(): " );
|
||||
#endif
|
||||
void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ) );
|
||||
void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ), &r_val );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
|
||||
@@ -1523,23 +1533,24 @@ void bli_cntx_set_packm_kers( dim_t n_kers, ... )
|
||||
|
||||
va_list args;
|
||||
dim_t i;
|
||||
err_t r_val;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_packm_kers(): " );
|
||||
#endif
|
||||
l1mkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1mkr_t ) );
|
||||
l1mkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1mkr_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_packm_kers(): " );
|
||||
#endif
|
||||
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ) );
|
||||
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ), &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_packm_kers(): " );
|
||||
#endif
|
||||
void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ) );
|
||||
void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ), &r_val );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
|
||||
|
||||
@@ -44,8 +44,9 @@ func_t* bli_func_create
|
||||
)
|
||||
{
|
||||
func_t* f;
|
||||
err_t r_val;
|
||||
|
||||
f = ( func_t* ) bli_malloc_intl( sizeof(func_t) );
|
||||
f = ( func_t* )bli_malloc_intl( sizeof( func_t ), &r_val );
|
||||
|
||||
bli_func_init
|
||||
(
|
||||
|
||||
@@ -337,6 +337,8 @@ void bli_gks_register_cntx
|
||||
void_fp ind_fp
|
||||
)
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
// This function is called by bli_gks_init() for each architecture that
|
||||
// will be supported by BLIS. It takes an architecture id and three
|
||||
// function pointers, one to a function that initializes a native context
|
||||
@@ -385,7 +387,7 @@ void bli_gks_register_cntx
|
||||
// needs to be allocated. Allocate the memory and initialize it to
|
||||
// zeros/NULL, storing the address of the alloacted memory at the element
|
||||
// for the current architecture id.
|
||||
gks[ id ] = bli_calloc_intl( sizeof( cntx_t* ) * BLIS_NUM_IND_METHODS );
|
||||
gks[ id ] = bli_calloc_intl( sizeof( cntx_t* ) * BLIS_NUM_IND_METHODS, &r_val );
|
||||
|
||||
// Alias the allocated array for readability.
|
||||
cntx_t** restrict gks_id = gks[ id ];
|
||||
@@ -397,7 +399,7 @@ void bli_gks_register_cntx
|
||||
// Allocate memory for a single context and store the address at
|
||||
// the element in the gks[ id ] array that is reserved for native
|
||||
// execution.
|
||||
gks_id[ BLIS_NAT ] = bli_calloc_intl( sizeof( cntx_t ) );
|
||||
gks_id[ BLIS_NAT ] = bli_calloc_intl( sizeof( cntx_t ), &r_val );
|
||||
|
||||
// Alias the allocated context address for readability.
|
||||
cntx_t* restrict gks_id_nat = gks_id[ BLIS_NAT ];
|
||||
@@ -494,6 +496,7 @@ cntx_t* bli_gks_query_ind_cntx
|
||||
bli_init_once();
|
||||
|
||||
cntx_t* gks_id_ind;
|
||||
err_t r_val;
|
||||
|
||||
// Return the address of a context that will be suited for executing a
|
||||
// level-3 operation via the requested induced method (and datatype) for
|
||||
@@ -552,7 +555,7 @@ cntx_t* bli_gks_query_ind_cntx
|
||||
// If gks_id_ind is NULL, then we know we must allocate and then
|
||||
// initialize the context, storing its address back to
|
||||
// gks_id[ ind ].
|
||||
gks_id_ind = bli_calloc_intl( sizeof( cntx_t ) );
|
||||
gks_id_ind = bli_calloc_intl( sizeof( cntx_t ), &r_val );
|
||||
gks_id[ ind ] = gks_id_ind;
|
||||
|
||||
// Before we can call the induced method context initialization
|
||||
|
||||
@@ -69,14 +69,6 @@ gint_t bli_info_get_pool_addr_offset_size_a( void ) { return BLIS_POOL_ADDR_OF
|
||||
gint_t bli_info_get_pool_addr_offset_size_b( void ) { return BLIS_POOL_ADDR_OFFSET_SIZE_B; }
|
||||
gint_t bli_info_get_pool_addr_offset_size_c( void ) { return BLIS_POOL_ADDR_OFFSET_SIZE_C; }
|
||||
gint_t bli_info_get_pool_addr_offset_size_gen( void ) { return BLIS_POOL_ADDR_OFFSET_SIZE_GEN; }
|
||||
gint_t bli_info_get_enable_stay_auto_init( void )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_STAY_AUTO_INITIALIZED
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
gint_t bli_info_get_enable_blas( void )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
|
||||
@@ -56,18 +56,10 @@ void bli_init_auto( void )
|
||||
|
||||
void bli_finalize_auto( void )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_STAY_AUTO_INITIALIZED
|
||||
|
||||
// If BLIS was configured to stay initialized after being automatically
|
||||
// initialized, we honor the configuration request and do nothing.
|
||||
// BLIS will remain initialized unless and until the user explicitly
|
||||
// calls bli_finalize().
|
||||
|
||||
#else
|
||||
|
||||
bli_finalize_once();
|
||||
|
||||
#endif
|
||||
// The _auto() functions are used when initializing the BLAS compatibility
|
||||
// layer. It would not make much sense to automatically initialize and
|
||||
// finalize for every BLAS routine call; therefore, we remain initialized
|
||||
// unless and until the application explicitly calls bli_finalize().
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
@@ -71,7 +71,7 @@ void bli_free_pool( void* p )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void* bli_malloc_user( size_t size )
|
||||
void* bli_malloc_user( size_t size, err_t* r_val )
|
||||
{
|
||||
const malloc_ft malloc_fp = BLIS_MALLOC_USER;
|
||||
const size_t align_size = BLIS_HEAP_ADDR_ALIGN_SIZE;
|
||||
@@ -82,7 +82,9 @@ void* bli_malloc_user( size_t size )
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
return bli_fmalloc_align( malloc_fp, size, align_size );
|
||||
void* p = bli_fmalloc_align( malloc_fp, size, align_size, r_val );
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void bli_free_user( void* p )
|
||||
@@ -97,7 +99,7 @@ void bli_free_user( void* p )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void* bli_malloc_intl( size_t size )
|
||||
void* bli_malloc_intl( size_t size, err_t* r_val )
|
||||
{
|
||||
const malloc_ft malloc_fp = BLIS_MALLOC_INTL;
|
||||
|
||||
@@ -106,18 +108,21 @@ void* bli_malloc_intl( size_t size )
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
return bli_fmalloc_noalign( malloc_fp, size );
|
||||
void* p = bli_fmalloc_noalign( malloc_fp, size, r_val );
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void* bli_calloc_intl( size_t size )
|
||||
void* bli_calloc_intl( size_t size, err_t* r_val )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_calloc_intl(): " );
|
||||
#endif
|
||||
|
||||
void* p = bli_malloc_intl( size );
|
||||
void* p = bli_malloc_intl( size, r_val );
|
||||
|
||||
memset( p, 0, size );
|
||||
if ( bli_is_success( *r_val ) )
|
||||
memset( p, 0, size );
|
||||
|
||||
return p;
|
||||
}
|
||||
@@ -138,7 +143,8 @@ void* bli_fmalloc_align
|
||||
(
|
||||
malloc_ft f,
|
||||
size_t size,
|
||||
size_t align_size
|
||||
size_t align_size,
|
||||
err_t* r_val
|
||||
)
|
||||
{
|
||||
const size_t ptr_size = sizeof( void* );
|
||||
@@ -165,6 +171,9 @@ void* bli_fmalloc_align
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_fmalloc_post_check( p_orig );
|
||||
|
||||
// The pseudo-return value isn't used yet.
|
||||
*r_val = BLIS_SUCCESS;
|
||||
|
||||
// Advance the pointer by one pointer element.
|
||||
p_byte = p_orig;
|
||||
p_byte += ptr_size;
|
||||
@@ -226,7 +235,8 @@ void bli_ffree_align
|
||||
void* bli_fmalloc_noalign
|
||||
(
|
||||
malloc_ft f,
|
||||
size_t size
|
||||
size_t size,
|
||||
err_t* r_val
|
||||
)
|
||||
{
|
||||
void* p = f( size );
|
||||
@@ -235,6 +245,9 @@ void* bli_fmalloc_noalign
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_fmalloc_post_check( p );
|
||||
|
||||
// The pseudo-return value isn't used yet.
|
||||
*r_val = BLIS_SUCCESS;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
@@ -34,8 +34,8 @@
|
||||
*/
|
||||
|
||||
// Typedef function pointer types for malloc() and free() substitutes.
|
||||
typedef void* (*malloc_ft) ( size_t size );
|
||||
typedef void (*free_ft) ( void* p );
|
||||
//typedef void* (*malloc_ft) ( size_t size );
|
||||
//typedef void (*free_ft) ( void* p );
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
@@ -44,19 +44,19 @@ BLIS_EXPORT_BLIS void* bli_malloc_pool( size_t size );
|
||||
BLIS_EXPORT_BLIS void bli_free_pool( void* p );
|
||||
#endif
|
||||
|
||||
void* bli_malloc_intl( size_t size );
|
||||
void* bli_calloc_intl( size_t size );
|
||||
void* bli_malloc_intl( size_t size, err_t* r_val );
|
||||
void* bli_calloc_intl( size_t size, err_t* r_val );
|
||||
void bli_free_intl( void* p );
|
||||
|
||||
BLIS_EXPORT_BLIS void* bli_malloc_user( size_t size );
|
||||
BLIS_EXPORT_BLIS void* bli_malloc_user( size_t size, err_t* r_val );
|
||||
BLIS_EXPORT_BLIS void bli_free_user( void* p );
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void* bli_fmalloc_align( malloc_ft f, size_t size, size_t align_size );
|
||||
void* bli_fmalloc_align( malloc_ft f, size_t size, size_t align_size, err_t* r_val );
|
||||
void bli_ffree_align( free_ft f, void* p );
|
||||
|
||||
void* bli_fmalloc_noalign( malloc_ft f, size_t size );
|
||||
void* bli_fmalloc_noalign( malloc_ft f, size_t size, err_t* r_val );
|
||||
void bli_ffree_noalign( free_ft f, void* p );
|
||||
|
||||
void bli_fmalloc_align_check( malloc_ft f, size_t size, size_t align_size );
|
||||
|
||||
@@ -44,8 +44,9 @@ mbool_t* bli_mbool_create
|
||||
)
|
||||
{
|
||||
mbool_t* b;
|
||||
err_t r_val;
|
||||
|
||||
b = ( mbool_t* ) bli_malloc_intl( sizeof(mbool_t) );
|
||||
b = ( mbool_t* ) bli_malloc_intl( sizeof( mbool_t ), &r_val );
|
||||
|
||||
bli_mbool_init
|
||||
(
|
||||
|
||||
@@ -39,7 +39,7 @@
|
||||
void bli_memsys_init( void )
|
||||
{
|
||||
// Query a native context so we have something to pass into
|
||||
// bli_membrk_init_pools(). We use BLIS_DOUBLE for the datatype,
|
||||
// bli_pba_init_pools(). We use BLIS_DOUBLE for the datatype,
|
||||
// but the dt argument is actually only used when initializing
|
||||
// contexts for induced methods.
|
||||
// NOTE: Instead of calling bli_gks_query_cntx(), we call
|
||||
@@ -47,7 +47,7 @@ void bli_memsys_init( void )
|
||||
cntx_t* cntx_p = bli_gks_query_cntx_noinit();
|
||||
|
||||
// Initialize the packing block allocator and its data structures.
|
||||
bli_membrk_init( cntx_p );
|
||||
bli_pba_init( cntx_p );
|
||||
|
||||
// Initialize the small block allocator and its data structures.
|
||||
bli_sba_init();
|
||||
@@ -58,7 +58,7 @@ void bli_memsys_finalize( void )
|
||||
// Finalize the small block allocator and its data structures.
|
||||
bli_sba_finalize();
|
||||
|
||||
// Finalize the global membrk_t object and its data structures.
|
||||
bli_membrk_finalize();
|
||||
// Finalize the packing block allocator and its data structures.
|
||||
bli_pba_finalize();
|
||||
}
|
||||
|
||||
|
||||
@@ -147,6 +147,7 @@ void bli_obj_alloc_buffer
|
||||
siz_t elem_size;
|
||||
siz_t buffer_size;
|
||||
void* p;
|
||||
err_t r_val;
|
||||
|
||||
bli_init_once();
|
||||
|
||||
@@ -195,7 +196,7 @@ void bli_obj_alloc_buffer
|
||||
buffer_size = ( siz_t )n_elem * elem_size;
|
||||
|
||||
// Allocate the buffer.
|
||||
p = bli_malloc_user( buffer_size );
|
||||
p = bli_malloc_user( buffer_size, &r_val );
|
||||
|
||||
// Set individual fields.
|
||||
bli_obj_set_buffer( p, obj );
|
||||
|
||||
@@ -57,22 +57,22 @@ void bli_pack_finalize( void )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
dim_t bli_pack_get_pack_a( void )
|
||||
void bli_pack_get_pack_a( bool* pack_a )
|
||||
{
|
||||
// We must ensure that global_rntm has been initialized.
|
||||
bli_init_once();
|
||||
|
||||
return bli_rntm_pack_a( &global_rntm );
|
||||
*pack_a = bli_rntm_pack_a( &global_rntm );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
dim_t bli_pack_get_pack_b( void )
|
||||
void bli_pack_get_pack_b( bool* pack_b )
|
||||
{
|
||||
// We must ensure that global_rntm has been initialized.
|
||||
bli_init_once();
|
||||
|
||||
return bli_rntm_pack_b( &global_rntm );
|
||||
*pack_b = bli_rntm_pack_b( &global_rntm );
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
@@ -101,7 +101,7 @@ void bli_pack_set_pack_b( bool pack_b )
|
||||
// Acquire the mutex protecting global_rntm.
|
||||
bli_pthread_mutex_lock( &global_rntm_mutex );
|
||||
|
||||
bli_rntm_set_pack_a( pack_b, &global_rntm );
|
||||
bli_rntm_set_pack_b( pack_b, &global_rntm );
|
||||
|
||||
// Release the mutex protecting global_rntm.
|
||||
bli_pthread_mutex_unlock( &global_rntm_mutex );
|
||||
|
||||
@@ -38,10 +38,10 @@
|
||||
void bli_pack_init( void );
|
||||
void bli_pack_finalize( void );
|
||||
|
||||
BLIS_EXPORT_BLIS dim_t bli_pack_get_pack_a( void );
|
||||
BLIS_EXPORT_BLIS dim_t bli_pack_get_pack_b( void );
|
||||
BLIS_EXPORT_BLIS void bli_pack_set_pack_a( bool pack_a );
|
||||
BLIS_EXPORT_BLIS void bli_pack_set_pack_b( bool pack_b );
|
||||
BLIS_EXPORT_BLIS void bli_pack_get_pack_a( bool* pack_a );
|
||||
BLIS_EXPORT_BLIS void bli_pack_get_pack_b( bool* pack_b );
|
||||
BLIS_EXPORT_BLIS void bli_pack_set_pack_a( bool pack_a );
|
||||
BLIS_EXPORT_BLIS void bli_pack_set_pack_b( bool pack_b );
|
||||
|
||||
void bli_pack_init_rntm_from_env( rntm_t* rntm );
|
||||
|
||||
|
||||
@@ -36,55 +36,61 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
static membrk_t global_membrk;
|
||||
// Statically initialize the mutex within the packing block allocator object.
|
||||
static pba_t pba = { .mutex = BLIS_PTHREAD_MUTEX_INITIALIZER };
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
membrk_t* bli_membrk_query( void )
|
||||
pba_t* bli_pba_query( void )
|
||||
{
|
||||
return &global_membrk;
|
||||
return &pba;
|
||||
}
|
||||
|
||||
void bli_membrk_init
|
||||
void bli_pba_init
|
||||
(
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
membrk_t* restrict membrk = bli_membrk_query();
|
||||
pba_t* restrict pba = bli_pba_query();
|
||||
|
||||
const siz_t align_size = BLIS_POOL_ADDR_ALIGN_SIZE_GEN;
|
||||
malloc_ft malloc_fp = BLIS_MALLOC_POOL;
|
||||
free_ft free_fp = BLIS_FREE_POOL;
|
||||
|
||||
// These fields are used for general-purpose allocation (ie: buf_type
|
||||
// equal to BLIS_BUFFER_FOR_GEN_USE) within bli_membrk_acquire_m().
|
||||
bli_membrk_set_align_size( align_size, membrk );
|
||||
bli_membrk_set_malloc_fp( malloc_fp, membrk );
|
||||
bli_membrk_set_free_fp( free_fp, membrk );
|
||||
// equal to BLIS_BUFFER_FOR_GEN_USE) within bli_pba_acquire_m().
|
||||
bli_pba_set_align_size( align_size, pba );
|
||||
bli_pba_set_malloc_fp( malloc_fp, pba );
|
||||
bli_pba_set_free_fp( free_fp, pba );
|
||||
|
||||
// The mutex field of pba is initialized statically above. This
|
||||
// keeps bli_pba_init() simpler and removes the possibility of
|
||||
// something going wrong during mutex initialization.
|
||||
|
||||
bli_membrk_init_mutex( membrk );
|
||||
#ifdef BLIS_ENABLE_PBA_POOLS
|
||||
bli_membrk_init_pools( cntx, membrk );
|
||||
bli_pba_init_pools( cntx, pba );
|
||||
#endif
|
||||
}
|
||||
|
||||
void bli_membrk_finalize
|
||||
void bli_pba_finalize
|
||||
(
|
||||
void
|
||||
)
|
||||
{
|
||||
membrk_t* restrict membrk = bli_membrk_query();
|
||||
|
||||
bli_membrk_set_malloc_fp( NULL, membrk );
|
||||
bli_membrk_set_free_fp( NULL, membrk );
|
||||
pba_t* restrict pba = bli_pba_query();
|
||||
|
||||
#ifdef BLIS_ENABLE_PBA_POOLS
|
||||
bli_membrk_finalize_pools( membrk );
|
||||
bli_pba_finalize_pools( pba );
|
||||
#endif
|
||||
bli_membrk_finalize_mutex( membrk );
|
||||
|
||||
// The mutex field of pba is initialized statically above, and
|
||||
// therefore never destroyed.
|
||||
|
||||
bli_pba_set_malloc_fp( NULL, pba );
|
||||
bli_pba_set_free_fp( NULL, pba );
|
||||
}
|
||||
|
||||
void bli_membrk_acquire_m
|
||||
void bli_pba_acquire_m
|
||||
(
|
||||
rntm_t* rntm,
|
||||
siz_t req_size,
|
||||
@@ -95,37 +101,38 @@ void bli_membrk_acquire_m
|
||||
pool_t* pool;
|
||||
pblk_t* pblk;
|
||||
dim_t pi;
|
||||
err_t r_val;
|
||||
|
||||
// If the internal memory pools for packing block allocator are disabled,
|
||||
// we spoof the buffer type as BLIS_BUFFER_FOR_GEN_USE to induce the
|
||||
// immediate usage of bli_membrk_malloc().
|
||||
// immediate usage of bli_pba_malloc().
|
||||
#ifndef BLIS_ENABLE_PBA_POOLS
|
||||
buf_type = BLIS_BUFFER_FOR_GEN_USE;
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_membrk_acquire_m(): bli_fmalloc_align(): size %ld\n",
|
||||
printf( "bli_pba_acquire_m(): bli_fmalloc_align(): size %ld\n",
|
||||
( long )req_size );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Query the memory broker from the runtime.
|
||||
membrk_t* membrk = bli_rntm_membrk( rntm );
|
||||
pba_t* pba = bli_rntm_pba( rntm );
|
||||
|
||||
|
||||
if ( buf_type == BLIS_BUFFER_FOR_GEN_USE )
|
||||
{
|
||||
malloc_ft malloc_fp = bli_membrk_malloc_fp( membrk );
|
||||
siz_t align_size = bli_membrk_align_size( membrk );
|
||||
malloc_ft malloc_fp = bli_pba_malloc_fp( pba );
|
||||
siz_t align_size = bli_pba_align_size( pba );
|
||||
|
||||
// For general-use buffer requests, dynamically allocating memory
|
||||
// is assumed to be sufficient.
|
||||
void* buf = bli_fmalloc_align( malloc_fp, req_size, align_size );
|
||||
void* buf = bli_fmalloc_align( malloc_fp, req_size, align_size, &r_val );
|
||||
|
||||
// Initialize the mem_t object with:
|
||||
// - the address of the memory block,
|
||||
// - the buffer type (a packbuf_t value),
|
||||
// - the size of the requested region,
|
||||
// - the membrk_t from which the mem_t entry was acquired.
|
||||
// - the pba_t from which the mem_t entry was acquired.
|
||||
// NOTE: We initialize the pool field to NULL since this block did not
|
||||
// come from a memory pool.
|
||||
bli_mem_set_buffer( buf, mem );
|
||||
@@ -142,13 +149,13 @@ void bli_membrk_acquire_m
|
||||
// Map the requested packed buffer type to a zero-based index, which
|
||||
// we then use to select the corresponding memory pool.
|
||||
pi = bli_packbuf_index( buf_type );
|
||||
pool = bli_membrk_pool( pi, membrk );
|
||||
pool = bli_pba_pool( pi, pba );
|
||||
|
||||
// Extract the address of the pblk_t struct within the mem_t.
|
||||
pblk = bli_mem_pblk( mem );
|
||||
|
||||
// Acquire the mutex associated with the membrk object.
|
||||
bli_membrk_lock( membrk );
|
||||
// Acquire the mutex associated with the pba object.
|
||||
bli_pba_lock( pba );
|
||||
|
||||
// BEGIN CRITICAL SECTION
|
||||
{
|
||||
@@ -166,8 +173,8 @@ void bli_membrk_acquire_m
|
||||
}
|
||||
// END CRITICAL SECTION
|
||||
|
||||
// Release the mutex associated with the membrk object.
|
||||
bli_membrk_unlock( membrk );
|
||||
// Release the mutex associated with the pba object.
|
||||
bli_pba_unlock( pba );
|
||||
|
||||
// Query the block_size from the pblk_t. This will be at least
|
||||
// req_size, perhaps larger.
|
||||
@@ -178,7 +185,7 @@ void bli_membrk_acquire_m
|
||||
// - the address of the memory pool to which it belongs,
|
||||
// - the size of the contiguous memory block (NOT the size of the
|
||||
// requested region),
|
||||
// - the membrk_t from which the mem_t entry was acquired.
|
||||
// - the pba_t from which the mem_t entry was acquired.
|
||||
// The actual (aligned) address is already stored in the mem_t
|
||||
// struct's pblk_t field.
|
||||
bli_mem_set_buf_type( buf_type, mem );
|
||||
@@ -188,7 +195,7 @@ void bli_membrk_acquire_m
|
||||
}
|
||||
|
||||
|
||||
void bli_membrk_release
|
||||
void bli_pba_release
|
||||
(
|
||||
rntm_t* rntm,
|
||||
mem_t* mem
|
||||
@@ -199,21 +206,21 @@ void bli_membrk_release
|
||||
pblk_t* pblk;
|
||||
|
||||
// Query the memory broker from the runtime.
|
||||
membrk_t* membrk = bli_rntm_membrk( rntm );
|
||||
pba_t* pba = bli_rntm_pba( rntm );
|
||||
|
||||
// Extract the buffer type so we know what kind of memory was allocated.
|
||||
buf_type = bli_mem_buf_type( mem );
|
||||
|
||||
#ifndef BLIS_ENABLE_PBA_POOLS
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_membrk_release(): bli_ffree_align(): size %ld\n",
|
||||
printf( "bli_pba_release(): bli_ffree_align(): size %ld\n",
|
||||
( long )bli_mem_size( mem ) );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if ( buf_type == BLIS_BUFFER_FOR_GEN_USE )
|
||||
{
|
||||
free_ft free_fp = bli_membrk_free_fp( membrk );
|
||||
free_ft free_fp = bli_pba_free_fp( pba );
|
||||
void* buf = bli_mem_buffer( mem );
|
||||
|
||||
// For general-use buffers, we dynamically allocate memory, and so
|
||||
@@ -229,8 +236,8 @@ void bli_membrk_release
|
||||
// Extract the address of the pblk_t struct within the mem_t struct.
|
||||
pblk = bli_mem_pblk( mem );
|
||||
|
||||
// Acquire the mutex associated with the membrk object.
|
||||
bli_membrk_lock( membrk );
|
||||
// Acquire the mutex associated with the pba object.
|
||||
bli_pba_lock( pba );
|
||||
|
||||
// BEGIN CRITICAL SECTION
|
||||
{
|
||||
@@ -241,15 +248,15 @@ void bli_membrk_release
|
||||
}
|
||||
// END CRITICAL SECTION
|
||||
|
||||
// Release the mutex associated with the membrk object.
|
||||
bli_membrk_unlock( membrk );
|
||||
// Release the mutex associated with the pba object.
|
||||
bli_pba_unlock( pba );
|
||||
}
|
||||
|
||||
// Clear the mem_t object so that it appears unallocated. This clears:
|
||||
// - the pblk_t struct's fields (ie: the buffer addresses)
|
||||
// - the pool field
|
||||
// - the size field
|
||||
// - the membrk field
|
||||
// - the pba field
|
||||
// NOTE: We do not clear the buf_type field since there is no
|
||||
// "uninitialized" value for packbuf_t.
|
||||
bli_mem_clear( mem );
|
||||
@@ -257,35 +264,38 @@ void bli_membrk_release
|
||||
|
||||
|
||||
#if 0
|
||||
void bli_membrk_acquire_v
|
||||
void bli_pba_acquire_v
|
||||
(
|
||||
membrk_t* membrk,
|
||||
siz_t req_size,
|
||||
mem_t* mem
|
||||
pba_t* pba,
|
||||
siz_t req_size,
|
||||
mem_t* mem
|
||||
)
|
||||
{
|
||||
bli_membrk_acquire_m( membrk,
|
||||
req_size,
|
||||
BLIS_BUFFER_FOR_GEN_USE,
|
||||
mem );
|
||||
bli_pba_acquire_m
|
||||
(
|
||||
pba,
|
||||
req_size,
|
||||
BLIS_BUFFER_FOR_GEN_USE,
|
||||
mem
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void bli_membrk_rntm_set_membrk
|
||||
void bli_pba_rntm_set_pba
|
||||
(
|
||||
rntm_t* rntm
|
||||
)
|
||||
{
|
||||
membrk_t* membrk = bli_membrk_query();
|
||||
pba_t* pba = bli_pba_query();
|
||||
|
||||
bli_rntm_set_membrk( membrk, rntm );
|
||||
bli_rntm_set_pba( pba, rntm );
|
||||
}
|
||||
|
||||
|
||||
siz_t bli_membrk_pool_size
|
||||
siz_t bli_pba_pool_size
|
||||
(
|
||||
membrk_t* membrk,
|
||||
pba_t* pba,
|
||||
packbuf_t buf_type
|
||||
)
|
||||
{
|
||||
@@ -305,7 +315,7 @@ siz_t bli_membrk_pool_size
|
||||
// Acquire the pointer to the pool corresponding to the buf_type
|
||||
// provided.
|
||||
pool_index = bli_packbuf_index( buf_type );
|
||||
pool = bli_membrk_pool( pool_index, membrk );
|
||||
pool = bli_pba_pool( pool_index, pba );
|
||||
|
||||
// Compute the pool "size" as the product of the block size
|
||||
// and the number of blocks in the pool.
|
||||
@@ -318,10 +328,10 @@ siz_t bli_membrk_pool_size
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_membrk_init_pools
|
||||
void bli_pba_init_pools
|
||||
(
|
||||
cntx_t* cntx,
|
||||
membrk_t* membrk
|
||||
cntx_t* cntx,
|
||||
pba_t* pba
|
||||
)
|
||||
{
|
||||
// Map each of the packbuf_t values to an index starting at zero.
|
||||
@@ -330,9 +340,9 @@ void bli_membrk_init_pools
|
||||
const dim_t index_c = bli_packbuf_index( BLIS_BUFFER_FOR_C_PANEL );
|
||||
|
||||
// Alias the pool addresses to convenient identifiers.
|
||||
pool_t* pool_a = bli_membrk_pool( index_a, membrk );
|
||||
pool_t* pool_b = bli_membrk_pool( index_b, membrk );
|
||||
pool_t* pool_c = bli_membrk_pool( index_c, membrk );
|
||||
pool_t* pool_a = bli_pba_pool( index_a, pba );
|
||||
pool_t* pool_b = bli_pba_pool( index_b, pba );
|
||||
pool_t* pool_c = bli_pba_pool( index_c, pba );
|
||||
|
||||
// Start with empty pools.
|
||||
const dim_t num_blocks_a = 0;
|
||||
@@ -364,10 +374,10 @@ void bli_membrk_init_pools
|
||||
free_ft free_fp = BLIS_FREE_POOL;
|
||||
|
||||
// Determine the block size for each memory pool.
|
||||
bli_membrk_compute_pool_block_sizes( &block_size_a,
|
||||
&block_size_b,
|
||||
&block_size_c,
|
||||
cntx );
|
||||
bli_pba_compute_pool_block_sizes( &block_size_a,
|
||||
&block_size_b,
|
||||
&block_size_c,
|
||||
cntx );
|
||||
|
||||
// Initialize the memory pools for A, B, and C.
|
||||
bli_pool_init( num_blocks_a, block_ptrs_len_a, block_size_a, align_size_a,
|
||||
@@ -378,9 +388,9 @@ void bli_membrk_init_pools
|
||||
offset_size_c, malloc_fp, free_fp, pool_c );
|
||||
}
|
||||
|
||||
void bli_membrk_finalize_pools
|
||||
void bli_pba_finalize_pools
|
||||
(
|
||||
membrk_t* membrk
|
||||
pba_t* pba
|
||||
)
|
||||
{
|
||||
// Map each of the packbuf_t values to an index starting at zero.
|
||||
@@ -389,9 +399,9 @@ void bli_membrk_finalize_pools
|
||||
dim_t index_c = bli_packbuf_index( BLIS_BUFFER_FOR_C_PANEL );
|
||||
|
||||
// Alias the pool addresses to convenient identifiers.
|
||||
pool_t* pool_a = bli_membrk_pool( index_a, membrk );
|
||||
pool_t* pool_b = bli_membrk_pool( index_b, membrk );
|
||||
pool_t* pool_c = bli_membrk_pool( index_c, membrk );
|
||||
pool_t* pool_a = bli_pba_pool( index_a, pba );
|
||||
pool_t* pool_b = bli_pba_pool( index_b, pba );
|
||||
pool_t* pool_c = bli_pba_pool( index_c, pba );
|
||||
|
||||
// Finalize the memory pools for A, B, and C.
|
||||
bli_pool_finalize( pool_a );
|
||||
@@ -401,7 +411,7 @@ void bli_membrk_finalize_pools
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_membrk_compute_pool_block_sizes
|
||||
void bli_pba_compute_pool_block_sizes
|
||||
(
|
||||
siz_t* bs_a,
|
||||
siz_t* bs_b,
|
||||
@@ -429,11 +439,11 @@ void bli_membrk_compute_pool_block_sizes
|
||||
// Avoid considering induced methods for real datatypes.
|
||||
if ( bli_is_real( dt ) && im != BLIS_NAT ) continue;
|
||||
|
||||
bli_membrk_compute_pool_block_sizes_dt( dt,
|
||||
&bs_dt_a,
|
||||
&bs_dt_b,
|
||||
&bs_dt_c,
|
||||
cntx );
|
||||
bli_pba_compute_pool_block_sizes_dt( dt,
|
||||
&bs_dt_a,
|
||||
&bs_dt_b,
|
||||
&bs_dt_c,
|
||||
cntx );
|
||||
|
||||
bs_cand_a = bli_max( bs_dt_a, bs_cand_a );
|
||||
bs_cand_b = bli_max( bs_dt_b, bs_cand_b );
|
||||
@@ -448,7 +458,7 @@ void bli_membrk_compute_pool_block_sizes
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_membrk_compute_pool_block_sizes_dt
|
||||
void bli_pba_compute_pool_block_sizes_dt
|
||||
(
|
||||
num_t dt,
|
||||
siz_t* bs_a,
|
||||
@@ -37,83 +37,100 @@
|
||||
#ifndef BLIS_MEMBRK_H
|
||||
#define BLIS_MEMBRK_H
|
||||
|
||||
// membrk init
|
||||
// Packing block allocator (formerly memory broker)
|
||||
|
||||
BLIS_INLINE void bli_membrk_init_mutex( membrk_t* membrk )
|
||||
/*
|
||||
typedef struct pba_s
|
||||
{
|
||||
bli_pthread_mutex_init( &(membrk->mutex), NULL );
|
||||
pool_t pools[3];
|
||||
bli_pthread_mutex_t mutex;
|
||||
|
||||
// These fields are used for general-purpose allocation.
|
||||
siz_t align_size;
|
||||
malloc_ft malloc_fp;
|
||||
free_ft free_fp;
|
||||
|
||||
} pba_t;
|
||||
*/
|
||||
|
||||
|
||||
// pba init
|
||||
|
||||
//BLIS_INLINE void bli_pba_init_mutex( pba_t* pba )
|
||||
//{
|
||||
// bli_pthread_mutex_init( &(pba->mutex), NULL );
|
||||
//}
|
||||
|
||||
//BLIS_INLINE void bli_pba_finalize_mutex( pba_t* pba )
|
||||
//{
|
||||
// bli_pthread_mutex_destroy( &(pba->mutex) );
|
||||
//}
|
||||
|
||||
// pba query
|
||||
|
||||
BLIS_INLINE pool_t* bli_pba_pool( dim_t pool_index, pba_t* pba )
|
||||
{
|
||||
return &(pba->pools[ pool_index ]);
|
||||
}
|
||||
|
||||
BLIS_INLINE void bli_membrk_finalize_mutex( membrk_t* membrk )
|
||||
BLIS_INLINE siz_t bli_pba_align_size( pba_t* pba )
|
||||
{
|
||||
bli_pthread_mutex_destroy( &(membrk->mutex) );
|
||||
return pba->align_size;
|
||||
}
|
||||
|
||||
// membrk query
|
||||
|
||||
BLIS_INLINE pool_t* bli_membrk_pool( dim_t pool_index, membrk_t* membrk )
|
||||
BLIS_INLINE malloc_ft bli_pba_malloc_fp( pba_t* pba )
|
||||
{
|
||||
return &(membrk->pools[ pool_index ]);
|
||||
return pba->malloc_fp;
|
||||
}
|
||||
|
||||
BLIS_INLINE siz_t bli_membrk_align_size( membrk_t* membrk )
|
||||
BLIS_INLINE free_ft bli_pba_free_fp( pba_t* pba )
|
||||
{
|
||||
return membrk->align_size;
|
||||
return pba->free_fp;
|
||||
}
|
||||
|
||||
BLIS_INLINE malloc_ft bli_membrk_malloc_fp( membrk_t* membrk )
|
||||
// pba modification
|
||||
|
||||
BLIS_INLINE void bli_pba_set_align_size( siz_t align_size, pba_t* pba )
|
||||
{
|
||||
return membrk->malloc_fp;
|
||||
pba->align_size = align_size;
|
||||
}
|
||||
|
||||
BLIS_INLINE free_ft bli_membrk_free_fp( membrk_t* membrk )
|
||||
BLIS_INLINE void bli_pba_set_malloc_fp( malloc_ft malloc_fp, pba_t* pba )
|
||||
{
|
||||
return membrk->free_fp;
|
||||
pba->malloc_fp = malloc_fp;
|
||||
}
|
||||
|
||||
// membrk modification
|
||||
|
||||
BLIS_INLINE void bli_membrk_set_align_size( siz_t align_size, membrk_t* membrk )
|
||||
BLIS_INLINE void bli_pba_set_free_fp( free_ft free_fp, pba_t* pba )
|
||||
{
|
||||
membrk->align_size = align_size;
|
||||
pba->free_fp = free_fp;
|
||||
}
|
||||
|
||||
BLIS_INLINE void bli_membrk_set_malloc_fp( malloc_ft malloc_fp, membrk_t* membrk )
|
||||
// pba action
|
||||
|
||||
BLIS_INLINE void bli_pba_lock( pba_t* pba )
|
||||
{
|
||||
membrk->malloc_fp = malloc_fp;
|
||||
bli_pthread_mutex_lock( &(pba->mutex) );
|
||||
}
|
||||
|
||||
BLIS_INLINE void bli_membrk_set_free_fp( free_ft free_fp, membrk_t* membrk )
|
||||
BLIS_INLINE void bli_pba_unlock( pba_t* pba )
|
||||
{
|
||||
membrk->free_fp = free_fp;
|
||||
}
|
||||
|
||||
// membrk action
|
||||
|
||||
BLIS_INLINE void bli_membrk_lock( membrk_t* membrk )
|
||||
{
|
||||
bli_pthread_mutex_lock( &(membrk->mutex) );
|
||||
}
|
||||
|
||||
BLIS_INLINE void bli_membrk_unlock( membrk_t* membrk )
|
||||
{
|
||||
bli_pthread_mutex_unlock( &(membrk->mutex) );
|
||||
bli_pthread_mutex_unlock( &(pba->mutex) );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
membrk_t* bli_membrk_query( void );
|
||||
pba_t* bli_pba_query( void );
|
||||
|
||||
void bli_membrk_init
|
||||
void bli_pba_init
|
||||
(
|
||||
cntx_t* cntx
|
||||
);
|
||||
void bli_membrk_finalize
|
||||
void bli_pba_finalize
|
||||
(
|
||||
void
|
||||
);
|
||||
|
||||
void bli_membrk_acquire_m
|
||||
void bli_pba_acquire_m
|
||||
(
|
||||
rntm_t* rntm,
|
||||
siz_t req_size,
|
||||
@@ -121,43 +138,43 @@ void bli_membrk_acquire_m
|
||||
mem_t* mem
|
||||
);
|
||||
|
||||
void bli_membrk_release
|
||||
void bli_pba_release
|
||||
(
|
||||
rntm_t* rntm,
|
||||
mem_t* mem
|
||||
);
|
||||
|
||||
void bli_membrk_rntm_set_membrk
|
||||
void bli_pba_rntm_set_pba
|
||||
(
|
||||
rntm_t* rntm
|
||||
);
|
||||
|
||||
siz_t bli_membrk_pool_size
|
||||
siz_t bli_pba_pool_size
|
||||
(
|
||||
membrk_t* membrk,
|
||||
pba_t* pba,
|
||||
packbuf_t buf_type
|
||||
);
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void bli_membrk_init_pools
|
||||
void bli_pba_init_pools
|
||||
(
|
||||
cntx_t* cntx,
|
||||
membrk_t* membrk
|
||||
cntx_t* cntx,
|
||||
pba_t* pba
|
||||
);
|
||||
void bli_membrk_finalize_pools
|
||||
void bli_pba_finalize_pools
|
||||
(
|
||||
membrk_t* membrk
|
||||
pba_t* pba
|
||||
);
|
||||
|
||||
void bli_membrk_compute_pool_block_sizes
|
||||
void bli_pba_compute_pool_block_sizes
|
||||
(
|
||||
siz_t* bs_a,
|
||||
siz_t* bs_b,
|
||||
siz_t* bs_c,
|
||||
cntx_t* cntx
|
||||
);
|
||||
void bli_membrk_compute_pool_block_sizes_dt
|
||||
void bli_pba_compute_pool_block_sizes_dt
|
||||
(
|
||||
num_t dt,
|
||||
siz_t* bs_a,
|
||||
@@ -49,6 +49,8 @@ void bli_pool_init
|
||||
pool_t* restrict pool
|
||||
)
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
// Make sure that block_ptrs_len is at least num_blocks.
|
||||
block_ptrs_len = bli_max( block_ptrs_len, num_blocks );
|
||||
|
||||
@@ -62,7 +64,7 @@ void bli_pool_init
|
||||
// well as pool blocks? If so, don't forget to s/bli_free_intl/free_fp/g.
|
||||
pblk_t* restrict block_ptrs
|
||||
=
|
||||
bli_malloc_intl( block_ptrs_len * sizeof( pblk_t ) );
|
||||
bli_malloc_intl( block_ptrs_len * sizeof( pblk_t ), &r_val );
|
||||
|
||||
// Allocate and initialize each entry in the block_ptrs array.
|
||||
for ( dim_t i = 0; i < num_blocks; ++i )
|
||||
@@ -343,6 +345,8 @@ void bli_pool_grow
|
||||
pool_t* restrict pool
|
||||
)
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
// If the requested increase is zero, return early.
|
||||
if ( num_blocks_add == 0 ) return;
|
||||
|
||||
@@ -377,7 +381,7 @@ void bli_pool_grow
|
||||
// well as pool blocks? If so, don't forget to s/bli_free_intl/free_fp/g.
|
||||
pblk_t* restrict block_ptrs_new
|
||||
=
|
||||
bli_malloc_intl( block_ptrs_len_new * sizeof( pblk_t ) );
|
||||
bli_malloc_intl( block_ptrs_len_new * sizeof( pblk_t ), &r_val );
|
||||
|
||||
// Query the top_index of the pool.
|
||||
const siz_t top_index = bli_pool_top_index( pool );
|
||||
@@ -503,6 +507,8 @@ void bli_pool_alloc_block
|
||||
pblk_t* restrict block
|
||||
)
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_alloc_block(): calling fmalloc_align(): size %d (align %d, offset %d)\n",
|
||||
( int )block_size, ( int )align_size, ( int )offset_size );
|
||||
@@ -516,7 +522,7 @@ void bli_pool_alloc_block
|
||||
// that many bytes at the beginning of the allocated memory.
|
||||
void* restrict buf
|
||||
=
|
||||
bli_fmalloc_align( malloc_fp, block_size + offset_size, align_size );
|
||||
bli_fmalloc_align( malloc_fp, block_size + offset_size, align_size, &r_val );
|
||||
|
||||
#if 0
|
||||
// NOTE: This code is disabled because it is not needed, since
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
|
||||
bool bli_obj_equals( obj_t* a, obj_t* b )
|
||||
{
|
||||
#if 0
|
||||
bool r_val = FALSE;
|
||||
num_t dt_a;
|
||||
num_t dt_b;
|
||||
@@ -80,6 +81,18 @@ bool bli_obj_equals( obj_t* a, obj_t* b )
|
||||
}
|
||||
|
||||
return r_val;
|
||||
#else
|
||||
bool r_val;
|
||||
|
||||
if ( bli_obj_is_1x1( a ) && bli_obj_is_1x1( b ) )
|
||||
bli_eqsc( a, b, &r_val );
|
||||
else if ( bli_obj_is_vector( a ) && bli_obj_is_vector( b ) )
|
||||
bli_eqv( a, b, &r_val );
|
||||
else
|
||||
bli_eqm( a, b, &r_val );
|
||||
|
||||
return r_val;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool bli_obj_imag_equals( obj_t* a, obj_t* b )
|
||||
|
||||
@@ -52,7 +52,7 @@ typedef struct rntm_s
|
||||
bool l3_sup;
|
||||
|
||||
pool_t* sba_pool;
|
||||
membrk_t* membrk;
|
||||
pba_t* pba;
|
||||
|
||||
} rntm_t;
|
||||
*/
|
||||
@@ -124,9 +124,9 @@ BLIS_INLINE pool_t* bli_rntm_sba_pool( rntm_t* rntm )
|
||||
return rntm->sba_pool;
|
||||
}
|
||||
|
||||
BLIS_INLINE membrk_t* bli_rntm_membrk( rntm_t* rntm )
|
||||
BLIS_INLINE pba_t* bli_rntm_pba( rntm_t* rntm )
|
||||
{
|
||||
return rntm->membrk;
|
||||
return rntm->pba;
|
||||
}
|
||||
|
||||
#if 0
|
||||
@@ -205,9 +205,9 @@ BLIS_INLINE void bli_rntm_set_sba_pool( pool_t* sba_pool, rntm_t* rntm )
|
||||
rntm->sba_pool = sba_pool;
|
||||
}
|
||||
|
||||
BLIS_INLINE void bli_rntm_set_membrk( membrk_t* membrk, rntm_t* rntm )
|
||||
BLIS_INLINE void bli_rntm_set_pba( pba_t* pba, rntm_t* rntm )
|
||||
{
|
||||
rntm->membrk = membrk;
|
||||
rntm->pba = pba;
|
||||
}
|
||||
|
||||
BLIS_INLINE void bli_rntm_clear_num_threads_only( rntm_t* rntm )
|
||||
@@ -222,9 +222,9 @@ BLIS_INLINE void bli_rntm_clear_sba_pool( rntm_t* rntm )
|
||||
{
|
||||
bli_rntm_set_sba_pool( NULL, rntm );
|
||||
}
|
||||
BLIS_INLINE void bli_rntm_clear_membrk( rntm_t* rntm )
|
||||
BLIS_INLINE void bli_rntm_clear_pba( rntm_t* rntm )
|
||||
{
|
||||
bli_rntm_set_membrk( NULL, rntm );
|
||||
bli_rntm_set_pba( NULL, rntm );
|
||||
}
|
||||
|
||||
//
|
||||
@@ -313,7 +313,7 @@ BLIS_INLINE void bli_rntm_clear_l3_sup( rntm_t* rntm )
|
||||
.pack_b = FALSE, \
|
||||
.l3_sup = TRUE, \
|
||||
.sba_pool = NULL, \
|
||||
.membrk = NULL, \
|
||||
.pba = NULL, \
|
||||
} \
|
||||
|
||||
BLIS_INLINE void bli_rntm_init( rntm_t* rntm )
|
||||
@@ -327,7 +327,7 @@ BLIS_INLINE void bli_rntm_init( rntm_t* rntm )
|
||||
bli_rntm_clear_l3_sup( rntm );
|
||||
|
||||
bli_rntm_clear_sba_pool( rntm );
|
||||
bli_rntm_clear_membrk( rntm );
|
||||
bli_rntm_clear_pba( rntm );
|
||||
}
|
||||
|
||||
// -- rntm_t total thread calculation ------------------------------------------
|
||||
|
||||
@@ -34,8 +34,9 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
// The small block allocator: an apool_t of array_t of pool_t.
|
||||
static apool_t sba;
|
||||
// Statically initialize the mutex within the small block allocator.
|
||||
// Note that the sba is an apool_t of array_t of pool_t.
|
||||
static apool_t sba = { .mutex = BLIS_PTHREAD_MUTEX_INITIALIZER };
|
||||
|
||||
apool_t* bli_sba_query( void )
|
||||
{
|
||||
@@ -61,11 +62,12 @@ void* bli_sba_acquire
|
||||
)
|
||||
{
|
||||
void* block;
|
||||
err_t r_val;
|
||||
|
||||
#ifdef BLIS_ENABLE_SBA_POOLS
|
||||
if ( rntm == NULL )
|
||||
{
|
||||
block = bli_malloc_intl( req_size );
|
||||
block = bli_malloc_intl( req_size, &r_val );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -95,7 +97,7 @@ void* bli_sba_acquire
|
||||
}
|
||||
#else
|
||||
|
||||
block = bli_malloc_intl( req_size );
|
||||
block = bli_malloc_intl( req_size, &r_val );
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -59,9 +59,9 @@ err_t bli_setijm
|
||||
dim_t cs = bli_obj_col_stride( b );
|
||||
num_t dt = bli_obj_dt( b );
|
||||
|
||||
// Return error if i or j is beyond bounds of matrix/vector.
|
||||
if ( m <= i ) return BLIS_FAILURE;
|
||||
if ( n <= j ) return BLIS_FAILURE;
|
||||
// Return error if i or j is beyond bounds of the matrix/vector.
|
||||
if ( i < 0 || m <= i ) return BLIS_FAILURE;
|
||||
if ( j < 0 || n <= j ) return BLIS_FAILURE;
|
||||
|
||||
// Don't modify scalar constants.
|
||||
if ( dt == BLIS_CONSTANT ) return BLIS_FAILURE;
|
||||
@@ -133,35 +133,15 @@ err_t bli_getijm
|
||||
dim_t cs = bli_obj_col_stride( b );
|
||||
num_t dt = bli_obj_dt( b );
|
||||
|
||||
// Return error if i or j is beyond bounds of matrix/vector.
|
||||
if ( m <= i ) return BLIS_FAILURE;
|
||||
if ( n <= j ) return BLIS_FAILURE;
|
||||
// Return error if i or j is beyond bounds of the matrix/vector.
|
||||
if ( i < 0 || m <= i ) return BLIS_FAILURE;
|
||||
if ( j < 0 || n <= j ) return BLIS_FAILURE;
|
||||
|
||||
void* b_p;
|
||||
|
||||
#if 0
|
||||
// Handle scalar constants separately.
|
||||
if ( dt == BLIS_CONSTANT )
|
||||
{
|
||||
if ( i == 0 && j == 0 )
|
||||
{
|
||||
dt = BLIS_DCOMPLEX;
|
||||
b_p = bli_obj_buffer_for_const( dt, b )
|
||||
}
|
||||
else return BLIS_FAILURE;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Query the pointer to the buffer at the adjusted offsets.
|
||||
b_p = bli_obj_buffer_at_off( b );
|
||||
}
|
||||
#else
|
||||
// Disallow access into scalar constants.
|
||||
if ( dt == BLIS_CONSTANT ) return BLIS_FAILURE;
|
||||
|
||||
// Query the pointer to the buffer at the adjusted offsets.
|
||||
b_p = bli_obj_buffer_at_off( b );
|
||||
#endif
|
||||
void* b_p = bli_obj_buffer_at_off( b );
|
||||
|
||||
// Index into the function pointer array.
|
||||
getijm_fp f = ftypes_getijm[ dt ];
|
||||
168
frame/base/bli_setgetijv.c
Normal file
168
frame/base/bli_setgetijv.c
Normal file
@@ -0,0 +1,168 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
typedef void (*setijv_fp)
|
||||
(
|
||||
double ar,
|
||||
double ai,
|
||||
dim_t i,
|
||||
void* restrict x, inc_t incx
|
||||
);
|
||||
static setijv_fp GENARRAY(ftypes_setijv,setijv);
|
||||
|
||||
err_t bli_setijv
|
||||
(
|
||||
double ar,
|
||||
double ai,
|
||||
dim_t i,
|
||||
obj_t* x
|
||||
)
|
||||
{
|
||||
dim_t n = bli_obj_vector_dim( x );
|
||||
dim_t incx = bli_obj_vector_inc( x );
|
||||
num_t dt = bli_obj_dt( x );
|
||||
|
||||
// Return error if i is beyond bounds of the vector.
|
||||
if ( i < 0 || n <= i ) return BLIS_FAILURE;
|
||||
|
||||
// Don't modify scalar constants.
|
||||
if ( dt == BLIS_CONSTANT ) return BLIS_FAILURE;
|
||||
|
||||
// Query the pointer to the buffer at the adjusted offsets.
|
||||
void* x_p = bli_obj_buffer_at_off( x );
|
||||
|
||||
// Index into the function pointer array.
|
||||
setijv_fp f = ftypes_setijv[ dt ];
|
||||
|
||||
// Invoke the type-specific function.
|
||||
f
|
||||
(
|
||||
ar,
|
||||
ai,
|
||||
i,
|
||||
x_p, incx
|
||||
);
|
||||
|
||||
return BLIS_SUCCESS;
|
||||
}
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
double ar, \
|
||||
double ai, \
|
||||
dim_t i, \
|
||||
void* restrict x, inc_t incx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict x_cast = ( ctype* )x; \
|
||||
\
|
||||
ctype* restrict x_i = x_cast + (i )*incx; \
|
||||
\
|
||||
PASTEMAC2(z,ch,sets)( ar, ai, *x_i ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( setijv )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
typedef void (*getijv_fp)
|
||||
(
|
||||
dim_t i,
|
||||
void* restrict x, inc_t incx,
|
||||
double* ar,
|
||||
double* ai
|
||||
);
|
||||
static getijv_fp GENARRAY(ftypes_getijv,getijv);
|
||||
|
||||
err_t bli_getijv
|
||||
(
|
||||
dim_t i,
|
||||
obj_t* x,
|
||||
double* ar,
|
||||
double* ai
|
||||
)
|
||||
{
|
||||
dim_t n = bli_obj_vector_dim( x );
|
||||
dim_t incx = bli_obj_vector_inc( x );
|
||||
num_t dt = bli_obj_dt( x );
|
||||
|
||||
// Return error if i is beyond bounds of the vector.
|
||||
if ( i < 0 || n <= i ) return BLIS_FAILURE;
|
||||
|
||||
// Disallow access into scalar constants.
|
||||
if ( dt == BLIS_CONSTANT ) return BLIS_FAILURE;
|
||||
|
||||
// Query the pointer to the buffer at the adjusted offsets.
|
||||
void* x_p = bli_obj_buffer_at_off( x );
|
||||
|
||||
// Index into the function pointer array.
|
||||
getijv_fp f = ftypes_getijv[ dt ];
|
||||
|
||||
// Invoke the type-specific function.
|
||||
f
|
||||
(
|
||||
i,
|
||||
x_p, incx,
|
||||
ar,
|
||||
ai
|
||||
);
|
||||
|
||||
return BLIS_SUCCESS;
|
||||
}
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
dim_t i, \
|
||||
void* restrict x, inc_t incx, \
|
||||
double* ar, \
|
||||
double* ai \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict x_cast = ( ctype* )x; \
|
||||
\
|
||||
ctype* restrict x_i = x_cast + (i )*incx; \
|
||||
\
|
||||
PASTEMAC2(ch,z,gets)( *x_i, *ar, *ai ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( getijv )
|
||||
|
||||
78
frame/base/bli_setgetijv.h
Normal file
78
frame/base/bli_setgetijv.h
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
BLIS_EXPORT_BLIS err_t bli_setijv
|
||||
(
|
||||
double ar,
|
||||
double ai,
|
||||
dim_t i,
|
||||
obj_t* x
|
||||
);
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
double ar, \
|
||||
double ai, \
|
||||
dim_t i, \
|
||||
void* restrict x, inc_t incx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0( setijv )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
BLIS_EXPORT_BLIS err_t bli_getijv
|
||||
(
|
||||
dim_t i,
|
||||
obj_t* x,
|
||||
double* ar,
|
||||
double* ai
|
||||
);
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
dim_t i, \
|
||||
void* restrict b, inc_t incx, \
|
||||
double* ar, \
|
||||
double* ai \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0( getijv )
|
||||
|
||||
@@ -149,16 +149,6 @@
|
||||
#define BLIS_RELAX_MCNR_NCMR_CONSTRAINTS
|
||||
#endif
|
||||
|
||||
// Stay initialized after auto-initialization, unless and until the user
|
||||
// explicitly calls bli_finalize().
|
||||
#ifdef BLIS_DISABLE_STAY_AUTO_INITIALIZED
|
||||
#undef BLIS_ENABLE_STAY_AUTO_INITIALIZED
|
||||
#else
|
||||
// Default behavior is enabled.
|
||||
#undef BLIS_ENABLE_STAY_AUTO_INITIALIZED // In case user explicitly enabled.
|
||||
#define BLIS_ENABLE_STAY_AUTO_INITIALIZED
|
||||
#endif
|
||||
|
||||
|
||||
// -- BLAS COMPATIBILITY LAYER -------------------------------------------------
|
||||
|
||||
|
||||
@@ -35,7 +35,12 @@
|
||||
// This file defines macros used to allow the _oapi.c files to produce
|
||||
// object APIs that omit expert parameters.
|
||||
|
||||
// Define the macro to remove the function name suffix (in function
|
||||
// Define a macro that allows the source code to determine which interface
|
||||
// (basic or expert) we are compiling.
|
||||
#undef BLIS_OAPI_BASIC
|
||||
#define BLIS_OAPI_BASIC
|
||||
|
||||
// Define the macro to omit a suffix from the function names (in function
|
||||
// definitions).
|
||||
#undef EX_SUF
|
||||
#define EX_SUF
|
||||
@@ -45,14 +50,10 @@
|
||||
#undef BLIS_OAPI_EX_PARAMS
|
||||
#define BLIS_OAPI_EX_PARAMS
|
||||
|
||||
// Define the macro to declare local expert variables that are initialized
|
||||
// Define the macro to add local expert variables that are initialized
|
||||
// to NULL. The "( void )" statements are to prevent unused variable
|
||||
// warnings by the compiler.
|
||||
#undef BLIS_OAPI_EX_DECLS
|
||||
#define BLIS_OAPI_EX_DECLS cntx_t* cntx = NULL; ( void )cntx; \
|
||||
rntm_t* rntm = NULL; ( void )rntm;
|
||||
|
||||
// Define the macro to pass the local expert variables to another function.
|
||||
//#undef BLIS_TAPI_EX_VARS
|
||||
//#define BLIS_TAPI_EX_VARS
|
||||
|
||||
|
||||
@@ -35,8 +35,13 @@
|
||||
// This file defines macros used to allow the _oapi.c files to produce
|
||||
// object APIs that contain context parameters.
|
||||
|
||||
// Define the macro to add a suffix to the object API function names
|
||||
// (in function definitions).
|
||||
// Define a macro that allows the source code to determine which interface
|
||||
// (basic or expert) we are compiling.
|
||||
#undef BLIS_OAPI_EXPERT
|
||||
#define BLIS_OAPI_EXPERT
|
||||
|
||||
// Define the macro to add a suffix to the function names (in function
|
||||
// definitions).
|
||||
#undef EX_SUF
|
||||
#define EX_SUF BLIS_OAPI_EX_SUF
|
||||
|
||||
@@ -50,7 +55,3 @@
|
||||
#undef BLIS_OAPI_EX_DECLS
|
||||
#define BLIS_OAPI_EX_DECLS
|
||||
|
||||
// Define the macro to pass the local expert variables to another function.
|
||||
//#undef BLIS_TAPI_EX_VARS
|
||||
//#define BLIS_TAPI_EX_VARS ,cntx, rntm
|
||||
|
||||
|
||||
@@ -261,6 +261,12 @@ BLIS_INLINE trans_t bli_trans_toggled_conj( trans_t trans )
|
||||
( trans ^ BLIS_CONJ_BIT );
|
||||
}
|
||||
|
||||
BLIS_INLINE trans_t bli_apply_trans( trans_t transapp, trans_t trans )
|
||||
{
|
||||
return ( trans_t )
|
||||
( trans ^ transapp );
|
||||
}
|
||||
|
||||
BLIS_INLINE void bli_toggle_trans( trans_t* trans )
|
||||
{
|
||||
*trans = bli_trans_toggled( *trans );
|
||||
@@ -421,6 +427,21 @@ BLIS_INLINE bool bli_is_unit_diag( diag_t diag )
|
||||
}
|
||||
|
||||
|
||||
// err_t-related
|
||||
|
||||
BLIS_INLINE bool bli_is_success( err_t err )
|
||||
{
|
||||
return ( bool )
|
||||
( err == BLIS_SUCCESS );
|
||||
}
|
||||
|
||||
BLIS_INLINE bool bli_is_failure( err_t err )
|
||||
{
|
||||
return ( bool )
|
||||
( err != BLIS_SUCCESS );
|
||||
}
|
||||
|
||||
|
||||
// dimension-related
|
||||
|
||||
BLIS_INLINE bool bli_zero_dim1( dim_t m )
|
||||
|
||||
@@ -35,7 +35,12 @@
|
||||
// This file defines macros used to allow the _tapi.c files to produce
|
||||
// typed APIs that omit expert parameters.
|
||||
|
||||
// Define the macro to remove the function name suffix (in function
|
||||
// Define a macro that allows the source code to determine which interface
|
||||
// (basic or expert) we are compiling.
|
||||
#undef BLIS_TAPI_BASIC
|
||||
#define BLIS_TAPI_BASIC
|
||||
|
||||
// Define the macro to omit a suffix from the function names (in function
|
||||
// definitions).
|
||||
#undef EX_SUF
|
||||
#define EX_SUF
|
||||
@@ -45,14 +50,10 @@
|
||||
#undef BLIS_TAPI_EX_PARAMS
|
||||
#define BLIS_TAPI_EX_PARAMS
|
||||
|
||||
// Define the macro to declare local expert variables that are initialized
|
||||
// Define the macro to add local expert variables that are initialized
|
||||
// to NULL. The "( void )" statements are to prevent unused variable
|
||||
// warnings by the compiler.
|
||||
#undef BLIS_TAPI_EX_DECLS
|
||||
#define BLIS_TAPI_EX_DECLS cntx_t* cntx = NULL; ( void )cntx; \
|
||||
rntm_t* rntm = NULL; ( void )rntm;
|
||||
|
||||
// Define the macro to pass the local expert variables to another function.
|
||||
//#undef BLIS_TAPI_EX_VARS
|
||||
//#define BLIS_TAPI_EX_VARS
|
||||
|
||||
|
||||
@@ -35,8 +35,13 @@
|
||||
// This file defines macros used to allow the _tapi.c files to produce
|
||||
// typed APIs that contain context parameters.
|
||||
|
||||
// Define the macro to add a suffix to the typed API function names
|
||||
// (in function definitions).
|
||||
// Define a macro that allows the source code to determine which interface
|
||||
// (basic or expert) we are compiling.
|
||||
#undef BLIS_TAPI_EXPERT
|
||||
#define BLIS_TAPI_EXPERT
|
||||
|
||||
// Define the macro to add a suffix to the function names (in function
|
||||
// definitions).
|
||||
#undef EX_SUF
|
||||
#define EX_SUF BLIS_TAPI_EX_SUF
|
||||
|
||||
@@ -50,7 +55,3 @@
|
||||
#undef BLIS_TAPI_EX_DECLS
|
||||
#define BLIS_TAPI_EX_DECLS
|
||||
|
||||
// Define the macro to pass the local expert variables to another function.
|
||||
//#undef BLIS_TAPI_EX_VARS
|
||||
//#define BLIS_TAPI_EX_VARS ,cntx, rntm
|
||||
|
||||
|
||||
@@ -198,16 +198,19 @@ typedef double f77_double;
|
||||
typedef scomplex f77_scomplex;
|
||||
typedef dcomplex f77_dcomplex;
|
||||
|
||||
// -- Void function pointer types --
|
||||
// -- Misc. function pointer types --
|
||||
|
||||
// Note: This type should be used in any situation where the address of a
|
||||
// *function* will be conveyed or stored prior to it being typecast back
|
||||
// to the correct function type. It does not need to be used when conveying
|
||||
// or storing the address of *data* (such as an array of float or double).
|
||||
|
||||
//typedef void (*void_fp)( void );
|
||||
typedef void* void_fp;
|
||||
|
||||
// Typedef function pointer types for malloc() and free() substitutes.
|
||||
typedef void* (*malloc_ft)( size_t size );
|
||||
typedef void (*free_ft) ( void* p );
|
||||
|
||||
|
||||
//
|
||||
// -- BLIS info bit field offsets ----------------------------------------------
|
||||
@@ -1038,10 +1041,9 @@ typedef enum
|
||||
// -- BLIS misc. structure types -----------------------------------------------
|
||||
//
|
||||
|
||||
// These headers must be included here (or earlier) because definitions they
|
||||
// provide are needed in the pool_t and related structs.
|
||||
// This header must be included here (or earlier) because definitions it
|
||||
// provides are needed in the pool_t and related structs.
|
||||
#include "bli_pthread.h"
|
||||
#include "bli_malloc.h"
|
||||
|
||||
// -- Pool block type --
|
||||
|
||||
@@ -1099,7 +1101,7 @@ typedef struct
|
||||
|
||||
// -- packing block allocator: Locked set of pools type --
|
||||
|
||||
typedef struct membrk_s
|
||||
typedef struct pba_s
|
||||
{
|
||||
pool_t pools[3];
|
||||
bli_pthread_mutex_t mutex;
|
||||
@@ -1109,7 +1111,7 @@ typedef struct membrk_s
|
||||
malloc_ft malloc_fp;
|
||||
free_ft free_fp;
|
||||
|
||||
} membrk_t;
|
||||
} pba_t;
|
||||
|
||||
|
||||
// -- Memory object type --
|
||||
@@ -1479,7 +1481,7 @@ typedef struct rntm_s
|
||||
pool_t* sba_pool;
|
||||
|
||||
// The packing block allocator, which is attached in the l3 thread decorator.
|
||||
membrk_t* membrk;
|
||||
pba_t* pba;
|
||||
|
||||
} rntm_t;
|
||||
|
||||
|
||||
57
frame/include/bli_xapi_undef.h
Normal file
57
frame/include/bli_xapi_undef.h
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
// This file un-defines macros used to allow the _oapi.c and _tapi.c files to
|
||||
// produce object and typed APIs that omit or contain expert parameters.
|
||||
|
||||
// Un-define all macros that allow the source code to determine which interface
|
||||
// (basic or expert) we are compiling.
|
||||
#undef BLIS_OAPI_BASIC
|
||||
#undef BLIS_OAPI_EXPERT
|
||||
#undef BLIS_TAPI_BASIC
|
||||
#undef BLIS_TAPI_EXPERT
|
||||
|
||||
// Un-define the macro to omit or add the function name suffix (in function
|
||||
// definitions).
|
||||
#undef EX_SUF
|
||||
|
||||
// Un-define the macro to omit or add expert arguments from function signatures
|
||||
// and prototypes.
|
||||
#undef BLIS_OAPI_EX_PARAMS
|
||||
#undef BLIS_TAPI_EX_PARAMS
|
||||
|
||||
// Un-define the macro to omit or add local expert variables.
|
||||
#undef BLIS_OAPI_EX_DECLS
|
||||
#undef BLIS_TAPI_EX_DECLS
|
||||
|
||||
@@ -99,6 +99,7 @@ extern "C" {
|
||||
// -- Base operation prototypes --
|
||||
|
||||
#include "bli_init.h"
|
||||
#include "bli_malloc.h"
|
||||
#include "bli_const.h"
|
||||
#include "bli_obj.h"
|
||||
#include "bli_obj_scalar.h"
|
||||
@@ -109,7 +110,7 @@ extern "C" {
|
||||
#include "bli_rntm.h"
|
||||
#include "bli_gks.h"
|
||||
#include "bli_ind.h"
|
||||
#include "bli_membrk.h"
|
||||
#include "bli_pba.h"
|
||||
#include "bli_pool.h"
|
||||
#include "bli_array.h"
|
||||
#include "bli_apool.h"
|
||||
@@ -135,7 +136,8 @@ extern "C" {
|
||||
#include "bli_arch.h"
|
||||
#include "bli_cpuid.h"
|
||||
#include "bli_string.h"
|
||||
#include "bli_setgetij.h"
|
||||
#include "bli_setgetijm.h"
|
||||
#include "bli_setgetijv.h"
|
||||
#include "bli_setri.h"
|
||||
|
||||
#include "bli_castm.h"
|
||||
|
||||
@@ -73,7 +73,8 @@ void bli_l3_thread_decorator
|
||||
const dim_t n_threads = bli_rntm_num_threads( rntm );
|
||||
|
||||
#ifdef PRINT_THRINFO
|
||||
thrinfo_t** threads = bli_malloc_intl( n_threads * sizeof( thrinfo_t* ) );
|
||||
err_t r_val;
|
||||
thrinfo_t** threads = bli_malloc_intl( n_threads * sizeof( thrinfo_t* ), &r_val );
|
||||
#endif
|
||||
|
||||
// NOTE: The sba was initialized in bli_init().
|
||||
@@ -92,7 +93,7 @@ void bli_l3_thread_decorator
|
||||
// Set the packing block allocator field of the rntm. This will be
|
||||
// inherited by all of the child threads when they make local copies of
|
||||
// the rntm below.
|
||||
bli_membrk_rntm_set_membrk( rntm );
|
||||
bli_pba_rntm_set_pba( rntm );
|
||||
|
||||
// Allocate a global communicator for the root thrinfo_t structures.
|
||||
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );
|
||||
|
||||
@@ -146,6 +146,8 @@ void bli_l3_thread_decorator
|
||||
cntl_t* cntl
|
||||
)
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
// This is part of a hack to support mixed domain in bli_gemm_front().
|
||||
// Sometimes we need to specify a non-standard schema for A and B, and
|
||||
// we decided to transmit them via the schema field in the obj_t's
|
||||
@@ -176,7 +178,7 @@ void bli_l3_thread_decorator
|
||||
// Set the packing block allocator field of the rntm. This will be
|
||||
// inherited by all of the child threads when they make local copies of
|
||||
// the rntm below.
|
||||
bli_membrk_rntm_set_membrk( rntm );
|
||||
bli_pba_rntm_set_pba( rntm );
|
||||
|
||||
// Allocate a global communicator for the root thrinfo_t structures.
|
||||
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );
|
||||
@@ -187,12 +189,12 @@ void bli_l3_thread_decorator
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_l3_thread_decorator().pth: " );
|
||||
#endif
|
||||
bli_pthread_t* pthreads = bli_malloc_intl( sizeof( bli_pthread_t ) * n_threads );
|
||||
bli_pthread_t* pthreads = bli_malloc_intl( sizeof( bli_pthread_t ) * n_threads, &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_l3_thread_decorator().pth: " );
|
||||
#endif
|
||||
thread_data_t* datas = bli_malloc_intl( sizeof( thread_data_t ) * n_threads );
|
||||
thread_data_t* datas = bli_malloc_intl( sizeof( thread_data_t ) * n_threads, &r_val );
|
||||
|
||||
// NOTE: We must iterate backwards so that the chief thread (thread id 0)
|
||||
// can spawn all other threads before proceeding with its own computation.
|
||||
|
||||
@@ -78,7 +78,7 @@ void bli_l3_thread_decorator
|
||||
bli_sba_rntm_set_pool( 0, array, rntm );
|
||||
|
||||
// Set the packing block allocator field of the rntm.
|
||||
bli_membrk_rntm_set_membrk( rntm );
|
||||
bli_pba_rntm_set_pba( rntm );
|
||||
|
||||
// Allcoate a global communicator for the root thrinfo_t structures.
|
||||
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );
|
||||
|
||||
@@ -76,7 +76,7 @@ err_t bli_l3_sup_thread_decorator
|
||||
// Set the packing block allocator field of the rntm. This will be
|
||||
// inherited by all of the child threads when they make local copies of
|
||||
// the rntm below.
|
||||
bli_membrk_rntm_set_membrk( rntm );
|
||||
bli_pba_rntm_set_pba( rntm );
|
||||
|
||||
// Allcoate a global communicator for the root thrinfo_t structures.
|
||||
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );
|
||||
|
||||
@@ -122,6 +122,8 @@ err_t bli_l3_sup_thread_decorator
|
||||
rntm_t* rntm
|
||||
)
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
// Query the total number of threads from the context.
|
||||
const dim_t n_threads = bli_rntm_num_threads( rntm );
|
||||
|
||||
@@ -141,7 +143,7 @@ err_t bli_l3_sup_thread_decorator
|
||||
// Set the packing block allocator field of the rntm. This will be
|
||||
// inherited by all of the child threads when they make local copies of
|
||||
// the rntm below.
|
||||
bli_membrk_rntm_set_membrk( rntm );
|
||||
bli_pba_rntm_set_pba( rntm );
|
||||
|
||||
// Allocate a global communicator for the root thrinfo_t structures.
|
||||
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );
|
||||
@@ -152,12 +154,12 @@ err_t bli_l3_sup_thread_decorator
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_l3_thread_decorator().pth: " );
|
||||
#endif
|
||||
bli_pthread_t* pthreads = bli_malloc_intl( sizeof( bli_pthread_t ) * n_threads );
|
||||
bli_pthread_t* pthreads = bli_malloc_intl( sizeof( bli_pthread_t ) * n_threads, &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_l3_thread_decorator().pth: " );
|
||||
#endif
|
||||
thread_data_t* datas = bli_malloc_intl( sizeof( thread_data_t ) * n_threads );
|
||||
thread_data_t* datas = bli_malloc_intl( sizeof( thread_data_t ) * n_threads, &r_val );
|
||||
|
||||
// NOTE: We must iterate backwards so that the chief thread (thread id 0)
|
||||
// can spawn all other threads before proceeding with its own computation.
|
||||
|
||||
@@ -69,7 +69,7 @@ err_t bli_l3_sup_thread_decorator
|
||||
bli_sba_rntm_set_pool( 0, array, rntm );
|
||||
|
||||
// Set the packing block allocator field of the rntm.
|
||||
bli_membrk_rntm_set_membrk( rntm );
|
||||
bli_pba_rntm_set_pba( rntm );
|
||||
|
||||
#ifndef SKIP_THRINFO_TREE
|
||||
// Allcoate a global communicator for the root thrinfo_t structures.
|
||||
|
||||
@@ -111,17 +111,21 @@ void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm )
|
||||
|
||||
void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm )
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
if ( comm == NULL ) return;
|
||||
comm->sent_object = NULL;
|
||||
comm->n_threads = n_threads;
|
||||
comm->barriers = bli_malloc_intl( sizeof( barrier_t* ) * n_threads );
|
||||
comm->barriers = bli_malloc_intl( sizeof( barrier_t* ) * n_threads, &r_val );
|
||||
bli_thrcomm_tree_barrier_create( n_threads, BLIS_TREE_BARRIER_ARITY, comm->barriers, 0 );
|
||||
}
|
||||
|
||||
//Tree barrier used for Intel Xeon Phi
|
||||
barrier_t* bli_thrcomm_tree_barrier_create( int num_threads, int arity, barrier_t** leaves, int leaf_index )
|
||||
{
|
||||
barrier_t* me = bli_malloc_intl( sizeof(barrier_t) );
|
||||
err_t r_val;
|
||||
|
||||
barrier_t* me = bli_malloc_intl( sizeof( barrier_t ), &r_val );
|
||||
|
||||
me->dad = NULL;
|
||||
me->signal = 0;
|
||||
|
||||
@@ -93,18 +93,10 @@ void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm )
|
||||
comm->n_threads = n_threads;
|
||||
comm->barrier_sense = 0;
|
||||
comm->barrier_threads_arrived = 0;
|
||||
|
||||
//#ifdef BLIS_USE_PTHREAD_MUTEX
|
||||
// bli_pthread_mutex_init( &comm->mutex, NULL );
|
||||
//#endif
|
||||
}
|
||||
|
||||
void bli_thrcomm_cleanup( thrcomm_t* comm )
|
||||
{
|
||||
//#ifdef BLIS_USE_PTHREAD_MUTEX
|
||||
// if ( comm == NULL ) return;
|
||||
// bli_pthread_mutex_destroy( &comm->mutex );
|
||||
//#endif
|
||||
}
|
||||
|
||||
void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm )
|
||||
@@ -114,13 +106,7 @@ void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm )
|
||||
bool my_sense = comm->sense;
|
||||
dim_t my_threads_arrived;
|
||||
|
||||
#ifdef BLIS_USE_PTHREAD_MUTEX
|
||||
bli_pthread_mutex_lock( &comm->mutex );
|
||||
my_threads_arrived = ++(comm->threads_arrived);
|
||||
bli_pthread_mutex_unlock( &comm->mutex );
|
||||
#else
|
||||
my_threads_arrived = __sync_add_and_fetch(&(comm->threads_arrived), 1);
|
||||
#endif
|
||||
|
||||
if ( my_threads_arrived == comm->n_threads )
|
||||
{
|
||||
|
||||
@@ -52,10 +52,6 @@ struct thrcomm_s
|
||||
void* sent_object;
|
||||
dim_t n_threads;
|
||||
|
||||
//#ifdef BLIS_USE_PTHREAD_MUTEX
|
||||
// bli_pthread_mutex_t mutex;
|
||||
//#endif
|
||||
|
||||
// NOTE: barrier_sense was originally a gint_t-based bool_t, but upon
|
||||
// redefining bool_t as bool we discovered that some gcc __atomic built-ins
|
||||
// don't allow the use of bool for the variables being operated upon.
|
||||
|
||||
@@ -332,8 +332,10 @@ thrinfo_t* bli_thrinfo_create_for_cntl
|
||||
// pointers.
|
||||
if ( bli_thread_am_ochief( thread_par ) )
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
if ( parent_n_way > BLIS_NUM_STATIC_COMMS )
|
||||
new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ) );
|
||||
new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ), &r_val );
|
||||
else
|
||||
new_comms = static_comms;
|
||||
}
|
||||
|
||||
@@ -197,8 +197,10 @@ thrinfo_t* bli_thrinfo_sup_create_for_cntl
|
||||
// pointers.
|
||||
if ( bli_thread_am_ochief( thread_par ) )
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
if ( parent_n_way > BLIS_NUM_STATIC_COMMS )
|
||||
new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ) );
|
||||
new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ), &r_val );
|
||||
else
|
||||
new_comms = static_comms;
|
||||
}
|
||||
|
||||
@@ -37,18 +37,22 @@
|
||||
// Prototype object APIs (expert and non-expert).
|
||||
#include "bli_oapi_ex.h"
|
||||
#include "bli_util_oapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
#include "bli_oapi_ba.h"
|
||||
#include "bli_util_oapi.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
// Prototype typed APIs (expert and non-expert).
|
||||
#include "bli_tapi_ex.h"
|
||||
#include "bli_util_tapi.h"
|
||||
#include "bli_util_ft.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
#include "bli_tapi_ba.h"
|
||||
#include "bli_util_tapi.h"
|
||||
#include "bli_util_ft.h"
|
||||
#include "bli_xapi_undef.h"
|
||||
|
||||
// Generate function pointer arrays for tapi functions (expert only).
|
||||
#include "bli_util_fpa.h"
|
||||
|
||||
@@ -103,25 +103,6 @@ GENFRONT( normfm )
|
||||
GENFRONT( normim )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
obj_t* x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
) \
|
||||
{ \
|
||||
bli_utilm_fprint_check( file, s1, x, format, s2 ); \
|
||||
}
|
||||
|
||||
GENFRONT( fprintv )
|
||||
GENFRONT( fprintm )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
@@ -154,6 +135,73 @@ void PASTEMAC(opname,_check) \
|
||||
|
||||
GENFRONT( sumsqv )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
obj_t* chi, \
|
||||
obj_t* psi, \
|
||||
bool* is_eq \
|
||||
) \
|
||||
{ \
|
||||
bli_l0_xxbsc_check( chi, psi, is_eq ); \
|
||||
}
|
||||
|
||||
GENFRONT( eqsc )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* y, \
|
||||
bool* is_eq \
|
||||
) \
|
||||
{ \
|
||||
bli_l1v_xy_check( x, y ); \
|
||||
}
|
||||
|
||||
GENFRONT( eqv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* y, \
|
||||
bool* is_eq \
|
||||
) \
|
||||
{ \
|
||||
bli_l1m_xy_check( x, y ); \
|
||||
}
|
||||
|
||||
GENFRONT( eqm )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
obj_t* x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
) \
|
||||
{ \
|
||||
bli_utilm_fprint_check( file, s1, x, format, s2 ); \
|
||||
}
|
||||
|
||||
GENFRONT( fprintv )
|
||||
GENFRONT( fprintm )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -90,22 +90,6 @@ GENPROT( normfm )
|
||||
GENPROT( normim )
|
||||
|
||||
|
||||
#undef GENPROT
|
||||
#define GENPROT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
obj_t* x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
);
|
||||
|
||||
GENPROT( fprintv )
|
||||
GENPROT( fprintm )
|
||||
|
||||
|
||||
#undef GENPROT
|
||||
#define GENPROT( opname ) \
|
||||
\
|
||||
@@ -132,6 +116,49 @@ void PASTEMAC(opname,_check) \
|
||||
|
||||
GENPROT( sumsqv )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
obj_t* chi, \
|
||||
obj_t* psi, \
|
||||
bool* is_eq \
|
||||
);
|
||||
|
||||
GENTPROT( eqsc )
|
||||
|
||||
|
||||
#undef GENPROT
|
||||
#define GENPROT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* y, \
|
||||
bool* is_eq \
|
||||
);
|
||||
|
||||
GENPROT( eqv )
|
||||
GENPROT( eqm )
|
||||
|
||||
|
||||
#undef GENPROT
|
||||
#define GENPROT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
obj_t* x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
);
|
||||
|
||||
GENPROT( fprintv )
|
||||
GENPROT( fprintm )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -66,6 +66,9 @@ GENFRONT( randm )
|
||||
GENFRONT( randnm )
|
||||
GENFRONT( sumsqv )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Operations with only basic interfaces.
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
@@ -83,6 +86,9 @@ PASTEMAC(opname,_qfp)( num_t dt ) \
|
||||
return PASTECH(opname,_fpa)[ dt ]; \
|
||||
}
|
||||
|
||||
GENFRONT( eqsc )
|
||||
GENFRONT( eqv )
|
||||
GENFRONT( eqm )
|
||||
GENFRONT( fprintv )
|
||||
GENFRONT( fprintm )
|
||||
//GENFRONT( printv )
|
||||
|
||||
@@ -52,16 +52,13 @@ GENPROT( normiv )
|
||||
GENPROT( norm1m )
|
||||
GENPROT( normfm )
|
||||
GENPROT( normim )
|
||||
GENPROT( fprintv )
|
||||
GENPROT( fprintm )
|
||||
//GENPROT( printv )
|
||||
//GENPROT( printm )
|
||||
GENPROT( randv )
|
||||
GENPROT( randnv )
|
||||
GENPROT( randm )
|
||||
GENPROT( randnm )
|
||||
GENPROT( sumsqv )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
#undef GENPROT
|
||||
#define GENPROT( opname ) \
|
||||
@@ -69,6 +66,9 @@ GENPROT( sumsqv )
|
||||
PASTECH(opname,_vft) \
|
||||
PASTEMAC(opname,_qfp)( num_t dt );
|
||||
|
||||
GENPROT( eqsc )
|
||||
GENPROT( eqv )
|
||||
GENPROT( eqm )
|
||||
GENPROT( fprintv )
|
||||
GENPROT( fprintm )
|
||||
//GENPROT( printv )
|
||||
|
||||
@@ -191,3 +191,62 @@ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \
|
||||
|
||||
INSERT_GENTDEFR( sumsqv )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Operations with only basic interfaces.
|
||||
|
||||
#ifdef BLIS_TAPI_BASIC
|
||||
|
||||
// eqsc
|
||||
|
||||
#undef GENTDEF
|
||||
#define GENTDEF( ctype, ch, opname, tsuf ) \
|
||||
\
|
||||
typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
( \
|
||||
conj_t conjchi, \
|
||||
ctype* chi, \
|
||||
ctype* psi, \
|
||||
bool* is_eq \
|
||||
);
|
||||
|
||||
INSERT_GENTDEF( eqsc )
|
||||
|
||||
// eqv
|
||||
|
||||
#undef GENTDEF
|
||||
#define GENTDEF( ctype, ch, opname, tsuf ) \
|
||||
\
|
||||
typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
bool* is_eq \
|
||||
);
|
||||
|
||||
INSERT_GENTDEF( eqv )
|
||||
|
||||
// eqm
|
||||
|
||||
#undef GENTDEF
|
||||
#define GENTDEF( ctype, ch, opname, tsuf ) \
|
||||
\
|
||||
typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
( \
|
||||
doff_t diagoffx, \
|
||||
diag_t diagx, \
|
||||
uplo_t uplox, \
|
||||
trans_t transx, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t rs_x, inc_t cs_x, \
|
||||
ctype* y, inc_t rs_y, inc_t cs_y, \
|
||||
bool* is_eq \
|
||||
);
|
||||
|
||||
INSERT_GENTDEF( eqm )
|
||||
|
||||
#endif // #ifdef BLIS_OAPI_BASIC
|
||||
|
||||
|
||||
@@ -72,11 +72,11 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
n, \
|
||||
buf_x, incx, \
|
||||
buf_asum, \
|
||||
cntx, \
|
||||
rntm \
|
||||
n, \
|
||||
buf_x, incx, \
|
||||
buf_asum, \
|
||||
cntx, \
|
||||
rntm \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -114,11 +114,11 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
uploa, \
|
||||
m, \
|
||||
buf_a, rs_a, cs_a, \
|
||||
cntx, \
|
||||
rntm \
|
||||
uploa, \
|
||||
m, \
|
||||
buf_a, rs_a, cs_a, \
|
||||
cntx, \
|
||||
rntm \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -158,11 +158,11 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
n, \
|
||||
buf_x, incx, \
|
||||
buf_norm, \
|
||||
cntx, \
|
||||
rntm \
|
||||
n, \
|
||||
buf_x, incx, \
|
||||
buf_norm, \
|
||||
cntx, \
|
||||
rntm \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -207,15 +207,15 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
m, \
|
||||
n, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
buf_norm, \
|
||||
cntx, \
|
||||
rntm \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
m, \
|
||||
n, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
buf_norm, \
|
||||
cntx, \
|
||||
rntm \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -224,160 +224,6 @@ GENFRONT( normfm )
|
||||
GENFRONT( normim )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,EX_SUF) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
obj_t* x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
BLIS_OAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_OAPI_EX_DECLS \
|
||||
\
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( file, s1, x, format, s2 ); \
|
||||
\
|
||||
/* Handle constants up front. */ \
|
||||
if ( dt == BLIS_CONSTANT ) \
|
||||
{ \
|
||||
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \
|
||||
} \
|
||||
\
|
||||
/* Query a type-specific function pointer, except one that uses
|
||||
void* for function arguments instead of typed pointers. */ \
|
||||
PASTECH(opname,_vft) f = \
|
||||
PASTEMAC(opname,_qfp)( dt ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
file, \
|
||||
s1, \
|
||||
n, \
|
||||
buf_x, incx, \
|
||||
format, \
|
||||
s2 \
|
||||
); \
|
||||
}
|
||||
|
||||
GENFRONT( fprintv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,EX_SUF) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
obj_t* x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
BLIS_OAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_OAPI_EX_DECLS \
|
||||
\
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
dim_t m = bli_obj_length( x ); \
|
||||
dim_t n = bli_obj_width( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( x ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( file, s1, x, format, s2 ); \
|
||||
\
|
||||
/* Handle constants up front. */ \
|
||||
if ( dt == BLIS_CONSTANT ) \
|
||||
{ \
|
||||
float* sp = bli_obj_buffer_for_const( BLIS_FLOAT, x ); \
|
||||
double* dp = bli_obj_buffer_for_const( BLIS_DOUBLE, x ); \
|
||||
scomplex* cp = bli_obj_buffer_for_const( BLIS_SCOMPLEX, x ); \
|
||||
dcomplex* zp = bli_obj_buffer_for_const( BLIS_DCOMPLEX, x ); \
|
||||
gint_t* ip = bli_obj_buffer_for_const( BLIS_INT, x ); \
|
||||
\
|
||||
fprintf( file, "%s\n", s1 ); \
|
||||
fprintf( file, " float: %9.2e\n", bli_sreal( *sp ) ); \
|
||||
fprintf( file, " double: %9.2e\n", bli_dreal( *dp ) ); \
|
||||
fprintf( file, " scomplex: %9.2e + %9.2e\n", bli_creal( *cp ), \
|
||||
bli_cimag( *cp ) ); \
|
||||
fprintf( file, " dcomplex: %9.2e + %9.2e\n", bli_zreal( *zp ), \
|
||||
bli_zimag( *zp ) ); \
|
||||
fprintf( file, " int: %ld\n", ( long )(*ip) ); \
|
||||
fprintf( file, "\n" ); \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
/* Query a type-specific function pointer, except one that uses
|
||||
void* for function arguments instead of typed pointers. */ \
|
||||
PASTECH(opname,_vft) f = \
|
||||
PASTEMAC(opname,_qfp)( dt ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
file, \
|
||||
s1, \
|
||||
m, \
|
||||
n, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
format, \
|
||||
s2 \
|
||||
); \
|
||||
}
|
||||
|
||||
GENFRONT( fprintm )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname, varname ) \
|
||||
\
|
||||
void PASTEMAC(opname,EX_SUF) \
|
||||
( \
|
||||
char* s1, \
|
||||
obj_t* x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
BLIS_OAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_OAPI_EX_DECLS \
|
||||
\
|
||||
/* Suppress compiler warning about unused variables. */ \
|
||||
( void )cntx; \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
PASTEMAC0(varname) \
|
||||
( \
|
||||
stdout, \
|
||||
s1, \
|
||||
x, \
|
||||
format, \
|
||||
s2 \
|
||||
); \
|
||||
}
|
||||
|
||||
GENFRONT( printv, fprintv )
|
||||
GENFRONT( printm, fprintm )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
@@ -407,10 +253,10 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
n, \
|
||||
buf_x, incx, \
|
||||
cntx, \
|
||||
rntm \
|
||||
n, \
|
||||
buf_x, incx, \
|
||||
cntx, \
|
||||
rntm \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -451,13 +297,13 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
diagoffx, \
|
||||
uplox, \
|
||||
m, \
|
||||
n, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
cntx, \
|
||||
rntm \
|
||||
diagoffx, \
|
||||
uplox, \
|
||||
m, \
|
||||
n, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
cntx, \
|
||||
rntm \
|
||||
); \
|
||||
}
|
||||
|
||||
@@ -498,17 +344,330 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
n, \
|
||||
buf_x, incx, \
|
||||
buf_scale, \
|
||||
buf_sumsq, \
|
||||
cntx, \
|
||||
rntm \
|
||||
n, \
|
||||
buf_x, incx, \
|
||||
buf_scale, \
|
||||
buf_sumsq, \
|
||||
cntx, \
|
||||
rntm \
|
||||
); \
|
||||
}
|
||||
|
||||
GENFRONT( sumsqv )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Operations with only basic interfaces.
|
||||
|
||||
#ifdef BLIS_OAPI_BASIC
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC0(opname) \
|
||||
( \
|
||||
obj_t* chi, \
|
||||
obj_t* psi, \
|
||||
bool* is_eq \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt_chi = bli_obj_dt( chi ); \
|
||||
num_t dt_psi = bli_obj_dt( psi ); \
|
||||
num_t dt; \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( chi, psi, is_eq ); \
|
||||
\
|
||||
/* Decide which datatype will be used to query the buffer from the
|
||||
constant object (if there is one). */ \
|
||||
if ( bli_is_constant( dt_psi ) ) dt = dt_chi; \
|
||||
else dt = dt_psi; \
|
||||
\
|
||||
/* If chi and psi are both constants, then we compare only the dcomplex
|
||||
fields. */ \
|
||||
if ( bli_is_constant( dt ) ) dt = BLIS_DCOMPLEX; \
|
||||
\
|
||||
void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \
|
||||
void* buf_psi = bli_obj_buffer_for_1x1( dt, psi ); \
|
||||
\
|
||||
/* Integer objects are handled separately. */ \
|
||||
if ( bli_is_int( dt ) ) \
|
||||
{ \
|
||||
*is_eq = bli_ieqa( buf_chi, buf_psi ); \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
/* Query the conj status of each object and use the two to come up with a
|
||||
single "net" conj_t value. */ \
|
||||
conj_t conjchi = bli_obj_conj_status( chi ); \
|
||||
conj_t conjpsi = bli_obj_conj_status( psi ); \
|
||||
conj_t conj = bli_apply_conj( conjchi, conjpsi ); \
|
||||
\
|
||||
/* Query a type-specific function pointer, except one that uses
|
||||
void* for function arguments instead of typed pointers. */ \
|
||||
PASTECH(opname,_vft) f = \
|
||||
PASTEMAC(opname,_qfp)( dt ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
conj, \
|
||||
buf_chi, \
|
||||
buf_psi, \
|
||||
is_eq \
|
||||
); \
|
||||
}
|
||||
|
||||
GENFRONT( eqsc )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC0(opname) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* y, \
|
||||
bool* is_eq \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( x, y, is_eq ); \
|
||||
\
|
||||
/* Query the conj status of each object and use the two to come up with a
|
||||
single "net" conj_t value. */ \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
conj_t conjy = bli_obj_conj_status( y ); \
|
||||
conj_t conj = bli_apply_conj( conjx, conjy ); \
|
||||
\
|
||||
/* Query a type-specific function pointer, except one that uses
|
||||
void* for function arguments instead of typed pointers. */ \
|
||||
PASTECH(opname,_vft) f = \
|
||||
PASTEMAC(opname,_qfp)( dt ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
conj, \
|
||||
n, \
|
||||
buf_x, inc_x, \
|
||||
buf_y, inc_y, \
|
||||
is_eq \
|
||||
); \
|
||||
}
|
||||
|
||||
GENFRONT( eqv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC0(opname) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* y, \
|
||||
bool* is_eq \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
doff_t diagoffx = bli_obj_diag_offset( x ); \
|
||||
diag_t diagx = bli_obj_diag( x ); \
|
||||
uplo_t uplox = bli_obj_uplo( x ); \
|
||||
dim_t m = bli_obj_length( y ); \
|
||||
dim_t n = bli_obj_width( y ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t rs_y = bli_obj_row_stride( y ); \
|
||||
inc_t cs_y = bli_obj_col_stride( y ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( x, y, is_eq ); \
|
||||
\
|
||||
/* Query the combined trans and conj status of each object and use the two
|
||||
to come up with a single "net" trans_t value. */ \
|
||||
trans_t transx = bli_obj_conjtrans_status( x ); \
|
||||
trans_t transy = bli_obj_conjtrans_status( y ); \
|
||||
trans_t trans = bli_apply_trans( transy, transx ); \
|
||||
\
|
||||
/* Query a type-specific function pointer, except one that uses
|
||||
void* for function arguments instead of typed pointers. */ \
|
||||
PASTECH(opname,_vft) f = \
|
||||
PASTEMAC(opname,_qfp)( dt ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
trans, \
|
||||
m, \
|
||||
n, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
buf_y, rs_y, cs_y, \
|
||||
is_eq \
|
||||
); \
|
||||
}
|
||||
|
||||
GENFRONT( eqm )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC0(opname) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
obj_t* x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( file, s1, x, format, s2 ); \
|
||||
\
|
||||
/* Handle constants up front. */ \
|
||||
if ( dt == BLIS_CONSTANT ) \
|
||||
{ \
|
||||
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \
|
||||
} \
|
||||
\
|
||||
/* Query a type-specific function pointer, except one that uses
|
||||
void* for function arguments instead of typed pointers. */ \
|
||||
PASTECH(opname,_vft) f = \
|
||||
PASTEMAC(opname,_qfp)( dt ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
file, \
|
||||
s1, \
|
||||
n, \
|
||||
buf_x, incx, \
|
||||
format, \
|
||||
s2 \
|
||||
); \
|
||||
}
|
||||
|
||||
GENFRONT( fprintv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC0(opname) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
obj_t* x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
dim_t m = bli_obj_length( x ); \
|
||||
dim_t n = bli_obj_width( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( x ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( file, s1, x, format, s2 ); \
|
||||
\
|
||||
/* Handle constants up front. */ \
|
||||
if ( dt == BLIS_CONSTANT ) \
|
||||
{ \
|
||||
float* sp = bli_obj_buffer_for_const( BLIS_FLOAT, x ); \
|
||||
double* dp = bli_obj_buffer_for_const( BLIS_DOUBLE, x ); \
|
||||
scomplex* cp = bli_obj_buffer_for_const( BLIS_SCOMPLEX, x ); \
|
||||
dcomplex* zp = bli_obj_buffer_for_const( BLIS_DCOMPLEX, x ); \
|
||||
gint_t* ip = bli_obj_buffer_for_const( BLIS_INT, x ); \
|
||||
\
|
||||
fprintf( file, "%s\n", s1 ); \
|
||||
fprintf( file, " float: %9.2e\n", bli_sreal( *sp ) ); \
|
||||
fprintf( file, " double: %9.2e\n", bli_dreal( *dp ) ); \
|
||||
fprintf( file, " scomplex: %9.2e + %9.2e\n", bli_creal( *cp ), \
|
||||
bli_cimag( *cp ) ); \
|
||||
fprintf( file, " dcomplex: %9.2e + %9.2e\n", bli_zreal( *zp ), \
|
||||
bli_zimag( *zp ) ); \
|
||||
fprintf( file, " int: %ld\n", ( long )(*ip) ); \
|
||||
fprintf( file, "\n" ); \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
/* Query a type-specific function pointer, except one that uses
|
||||
void* for function arguments instead of typed pointers. */ \
|
||||
PASTECH(opname,_vft) f = \
|
||||
PASTEMAC(opname,_qfp)( dt ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
file, \
|
||||
s1, \
|
||||
m, \
|
||||
n, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
format, \
|
||||
s2 \
|
||||
); \
|
||||
}
|
||||
|
||||
GENFRONT( fprintm )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname, varname ) \
|
||||
\
|
||||
void PASTEMAC0(opname) \
|
||||
( \
|
||||
char* s1, \
|
||||
obj_t* x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
PASTEMAC0(varname) \
|
||||
( \
|
||||
stdout, \
|
||||
s1, \
|
||||
x, \
|
||||
format, \
|
||||
s2 \
|
||||
); \
|
||||
}
|
||||
|
||||
GENFRONT( printv, fprintv )
|
||||
GENFRONT( printm, fprintm )
|
||||
|
||||
#endif // #ifdef BLIS_OAPI_BASIC
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -94,39 +94,6 @@ GENPROT( normfm )
|
||||
GENPROT( normim )
|
||||
|
||||
|
||||
#undef GENPROT
|
||||
#define GENPROT( opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
obj_t* x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
BLIS_OAPI_EX_PARAMS \
|
||||
);
|
||||
|
||||
GENPROT( fprintv )
|
||||
GENPROT( fprintm )
|
||||
|
||||
|
||||
#undef GENPROT
|
||||
#define GENPROT( opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \
|
||||
( \
|
||||
char* s1, \
|
||||
obj_t* x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
BLIS_OAPI_EX_PARAMS \
|
||||
);
|
||||
|
||||
GENPROT( printv )
|
||||
GENPROT( printm )
|
||||
|
||||
|
||||
#undef GENPROT
|
||||
#define GENPROT( opname ) \
|
||||
\
|
||||
@@ -166,3 +133,84 @@ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \
|
||||
|
||||
GENPROT( sumsqv )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Operations with basic interfaces only.
|
||||
|
||||
#ifdef BLIS_OAPI_BASIC
|
||||
|
||||
/*
|
||||
#undef GENPROT
|
||||
#define GENPROT( opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC0(opname) \
|
||||
( \
|
||||
obj_t* chi, \
|
||||
obj_t* psi, \
|
||||
bool* is_eq \
|
||||
);
|
||||
|
||||
GENPROT( eqsc )
|
||||
|
||||
|
||||
#undef GENPROT
|
||||
#define GENPROT( opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC0(opname) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* y, \
|
||||
bool* is_eq \
|
||||
);
|
||||
|
||||
GENPROT( eqv )
|
||||
*/
|
||||
|
||||
|
||||
#undef GENPROT
|
||||
#define GENPROT( opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC0(opname) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* y, \
|
||||
bool* is_eq \
|
||||
);
|
||||
|
||||
GENPROT( eqsc )
|
||||
GENPROT( eqv )
|
||||
GENPROT( eqm )
|
||||
|
||||
|
||||
#undef GENPROT
|
||||
#define GENPROT( opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC0(opname) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
obj_t* x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
);
|
||||
|
||||
GENPROT( fprintv )
|
||||
GENPROT( fprintm )
|
||||
|
||||
|
||||
#undef GENPROT
|
||||
#define GENPROT( opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC0(opname) \
|
||||
( \
|
||||
char* s1, \
|
||||
obj_t* x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
);
|
||||
|
||||
GENPROT( printv )
|
||||
GENPROT( printm )
|
||||
|
||||
#endif // #ifdef BLIS_OAPI_BASIC
|
||||
|
||||
|
||||
@@ -213,64 +213,6 @@ INSERT_GENTFUNCR_BASIC0( normfm )
|
||||
INSERT_GENTFUNCR_BASIC0( normim )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname, varname ) \
|
||||
\
|
||||
void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
( \
|
||||
char* s1, \
|
||||
dim_t n, \
|
||||
void* x, inc_t incx, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
PASTEMAC(ch,varname) \
|
||||
( \
|
||||
stdout, \
|
||||
s1, \
|
||||
n, \
|
||||
x, incx, \
|
||||
format, \
|
||||
s2 \
|
||||
); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC_I( printv, fprintv )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname, varname ) \
|
||||
\
|
||||
void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
( \
|
||||
char* s1, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
void* x, inc_t rs_x, inc_t cs_x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
PASTEMAC(ch,varname) \
|
||||
( \
|
||||
stdout, \
|
||||
s1, \
|
||||
m, \
|
||||
n, \
|
||||
x, rs_x, cs_x, \
|
||||
format, \
|
||||
s2 \
|
||||
); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC_I( printm, fprintm )
|
||||
|
||||
|
||||
#undef GENTFUNCR
|
||||
#define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \
|
||||
\
|
||||
@@ -430,6 +372,168 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
|
||||
INSERT_GENTFUNCR_BASIC0( sumsqv )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Operations with only basic interfaces.
|
||||
|
||||
#ifdef BLIS_TAPI_BASIC
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjchi, \
|
||||
ctype* chi, \
|
||||
ctype* psi, \
|
||||
bool* is_eq \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
ctype chi_conj; \
|
||||
\
|
||||
PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \
|
||||
\
|
||||
*is_eq = PASTEMAC(ch,eq)( chi_conj, *psi ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( eqsc )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
bool* is_eq \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
/* If x is zero length, return with a result of TRUE. */ \
|
||||
if ( bli_zero_dim1( n ) ) { *is_eq = TRUE; return; } \
|
||||
\
|
||||
/* Obtain a valid context from the gks if necessary. */ \
|
||||
/*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \
|
||||
\
|
||||
*is_eq = PASTEMAC2(ch,opname,_unb_var1) \
|
||||
( \
|
||||
conjx, \
|
||||
n, \
|
||||
x, incx, \
|
||||
y, incy \
|
||||
); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( eqv )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
doff_t diagoffx, \
|
||||
diag_t diagx, \
|
||||
uplo_t uplox, \
|
||||
trans_t transx, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t rs_x, inc_t cs_x, \
|
||||
ctype* y, inc_t rs_y, inc_t cs_y, \
|
||||
bool* is_eq \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
/* If x has a zero dimension, return with a result of TRUE. See the
|
||||
_unb_var() variant for why we return TRUE in this scenario. */ \
|
||||
if ( bli_zero_dim2( m, n ) ) { *is_eq = TRUE; return; } \
|
||||
\
|
||||
/* Obtain a valid context from the gks if necessary. */ \
|
||||
/*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \
|
||||
\
|
||||
/* Invoke the helper variant. */ \
|
||||
*is_eq = PASTEMAC2(ch,opname,_unb_var1) \
|
||||
( \
|
||||
diagoffx, \
|
||||
diagx, \
|
||||
uplox, \
|
||||
transx, \
|
||||
m, \
|
||||
n, \
|
||||
x, rs_x, cs_x, \
|
||||
y, rs_y, cs_y \
|
||||
); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( eqm )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
char* s1, \
|
||||
dim_t n, \
|
||||
void* x, inc_t incx, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
PASTEMAC(ch,varname) \
|
||||
( \
|
||||
stdout, \
|
||||
s1, \
|
||||
n, \
|
||||
x, incx, \
|
||||
format, \
|
||||
s2 \
|
||||
); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC_I( printv, fprintv )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
char* s1, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
void* x, inc_t rs_x, inc_t cs_x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
PASTEMAC(ch,varname) \
|
||||
( \
|
||||
stdout, \
|
||||
s1, \
|
||||
m, \
|
||||
n, \
|
||||
x, rs_x, cs_x, \
|
||||
format, \
|
||||
s2 \
|
||||
); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC_I( printm, fprintm )
|
||||
|
||||
#endif // #ifdef BLIS_TAPI_BASIC
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -103,37 +103,6 @@ INSERT_GENTPROTR_BASIC0( normfm )
|
||||
INSERT_GENTPROTR_BASIC0( normim )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
( \
|
||||
char* s1, \
|
||||
dim_t n, \
|
||||
void* x, inc_t incx, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0_I( printv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
( \
|
||||
char* s1, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
void* x, inc_t rs_x, inc_t cs_x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0_I( printm )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
@@ -179,4 +148,89 @@ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
|
||||
INSERT_GENTPROTR_BASIC0( sumsqv )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Operations with basic interfaces only.
|
||||
|
||||
#ifdef BLIS_TAPI_BASIC
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjchi, \
|
||||
ctype* chi, \
|
||||
ctype* psi, \
|
||||
bool* is_eq \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0( eqsc )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
bool* is_eq \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0( eqv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
doff_t diagoffx, \
|
||||
diag_t diagx, \
|
||||
uplo_t uplox, \
|
||||
trans_t transx, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t rs_x, inc_t cs_x, \
|
||||
ctype* y, inc_t rs_y, inc_t cs_y, \
|
||||
bool* is_eq \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0( eqm )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
char* s1, \
|
||||
dim_t n, \
|
||||
void* x, inc_t incx, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0_I( printv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
char* s1, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
void* x, inc_t rs_x, inc_t cs_x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0_I( printm )
|
||||
|
||||
#endif // #ifdef BLIS_TAPI_BASIC
|
||||
|
||||
|
||||
@@ -862,85 +862,6 @@ void PASTEMAC(ch,varname) \
|
||||
INSERT_GENTFUNCR_BASIC( normim_unb_var1, norm1m_unb_var1 )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
) \
|
||||
{ \
|
||||
dim_t i; \
|
||||
ctype* chi1; \
|
||||
char default_spec[32] = PASTEMAC(ch,formatspec)(); \
|
||||
\
|
||||
if ( format == NULL ) format = default_spec; \
|
||||
\
|
||||
chi1 = x; \
|
||||
\
|
||||
fprintf( file, "%s\n", s1 ); \
|
||||
\
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,fprints)( file, format, *chi1 ); \
|
||||
fprintf( file, "\n" ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
} \
|
||||
\
|
||||
fprintf( file, "%s\n", s2 ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0_I( fprintv )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t rs_x, inc_t cs_x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
ctype* chi1; \
|
||||
char default_spec[32] = PASTEMAC(ch,formatspec)(); \
|
||||
\
|
||||
if ( format == NULL ) format = default_spec; \
|
||||
\
|
||||
fprintf( file, "%s\n", s1 ); \
|
||||
\
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
{ \
|
||||
chi1 = (( ctype* ) x) + i*rs_x + j*cs_x; \
|
||||
\
|
||||
PASTEMAC(ch,fprints)( file, format, *chi1 ); \
|
||||
fprintf( file, " " ); \
|
||||
} \
|
||||
\
|
||||
fprintf( file, "\n" ); \
|
||||
} \
|
||||
\
|
||||
fprintf( file, "%s\n", s2 ); \
|
||||
fflush( file ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0_I( fprintm )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, varname, randmac ) \
|
||||
\
|
||||
@@ -1215,3 +1136,238 @@ void PASTEMAC(ch,varname) \
|
||||
|
||||
INSERT_GENTFUNCR_BASIC0( sumsqv_unb_var1 )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname ) \
|
||||
\
|
||||
bool PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy \
|
||||
) \
|
||||
{ \
|
||||
for ( dim_t i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
ctype* chi1 = x + (i )*incx; \
|
||||
ctype* psi1 = y + (i )*incy; \
|
||||
\
|
||||
ctype chi1c; \
|
||||
\
|
||||
if ( bli_is_conj( conjx ) ) { PASTEMAC(ch,copyjs)( *chi1, chi1c ); } \
|
||||
else { PASTEMAC(ch,copys)( *chi1, chi1c ); } \
|
||||
\
|
||||
if ( !PASTEMAC(ch,eq)( chi1c, *psi1 ) ) \
|
||||
return FALSE; \
|
||||
} \
|
||||
\
|
||||
return TRUE; \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( eqv_unb_var1 )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname ) \
|
||||
\
|
||||
bool PASTEMAC(ch,opname) \
|
||||
( \
|
||||
doff_t diagoffx, \
|
||||
diag_t diagx, \
|
||||
uplo_t uplox, \
|
||||
trans_t transx, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t rs_x, inc_t cs_x, \
|
||||
ctype* y, inc_t rs_y, inc_t cs_y \
|
||||
) \
|
||||
{ \
|
||||
uplo_t uplox_eff; \
|
||||
conj_t conjx; \
|
||||
dim_t n_iter; \
|
||||
dim_t n_elem_max; \
|
||||
inc_t ldx, incx; \
|
||||
inc_t ldy, incy; \
|
||||
dim_t ij0, n_shift; \
|
||||
\
|
||||
/* Set various loop parameters. */ \
|
||||
bli_set_dims_incs_uplo_2m \
|
||||
( \
|
||||
diagoffx, diagx, transx, \
|
||||
uplox, m, n, rs_x, cs_x, rs_y, cs_y, \
|
||||
&uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, &incy, &ldy, \
|
||||
&ij0, &n_shift \
|
||||
); \
|
||||
\
|
||||
/* In the odd case where we are comparing against a complete unstored
|
||||
matrix, we assert equality. Why? We assume the matrices are equal
|
||||
unless we can find two corresponding elements that are unequal. So
|
||||
if there are no elements, there is no inequality. Granted, this logic
|
||||
is strange to think about no matter what, and thankfully it should
|
||||
never be used under normal usage. */ \
|
||||
if ( bli_is_zeros( uplox_eff ) ) return TRUE; \
|
||||
\
|
||||
/* Extract the conjugation component from the transx parameter. */ \
|
||||
conjx = bli_extract_conj( transx ); \
|
||||
\
|
||||
/* Handle dense and upper/lower storage cases separately. */ \
|
||||
if ( bli_is_dense( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
const dim_t n_elem = n_elem_max; \
|
||||
\
|
||||
ctype* x1 = x + (j )*ldx + (0 )*incx; \
|
||||
ctype* y1 = y + (j )*ldy + (0 )*incy; \
|
||||
\
|
||||
for ( dim_t i = 0; i < n_elem; ++i ) \
|
||||
{ \
|
||||
ctype* x11 = x1 + (i )*incx; \
|
||||
ctype* y11 = y1 + (i )*incy; \
|
||||
ctype x11c; \
|
||||
\
|
||||
if ( bli_is_conj( conjx ) ) { PASTEMAC(ch,copyjs)( *x11, x11c ); } \
|
||||
else { PASTEMAC(ch,copys)( *x11, x11c ); } \
|
||||
\
|
||||
if ( !PASTEMAC(ch,eq)( x11c, *y11 ) ) \
|
||||
return FALSE; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if ( bli_is_upper( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
const dim_t n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
|
||||
\
|
||||
ctype* x1 = x + (ij0+j )*ldx + (0 )*incx; \
|
||||
ctype* y1 = y + (ij0+j )*ldy + (0 )*incy; \
|
||||
\
|
||||
for ( dim_t i = 0; i < n_elem; ++i ) \
|
||||
{ \
|
||||
ctype* x11 = x1 + (i )*incx; \
|
||||
ctype* y11 = y1 + (i )*incy; \
|
||||
ctype x11c; \
|
||||
\
|
||||
if ( bli_is_conj( conjx ) ) { PASTEMAC(ch,copyjs)( *x11, x11c ); } \
|
||||
else { PASTEMAC(ch,copys)( *x11, x11c ); } \
|
||||
\
|
||||
if ( !PASTEMAC(ch,eq)( x11c, *y11 ) ) \
|
||||
return FALSE; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else if ( bli_is_lower( uplox_eff ) ) \
|
||||
{ \
|
||||
for ( dim_t j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
const dim_t offi = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
|
||||
const dim_t n_elem = n_elem_max - offi; \
|
||||
\
|
||||
ctype* x1 = x + (j )*ldx + (ij0+offi )*incx; \
|
||||
ctype* y1 = y + (j )*ldy + (ij0+offi )*incy; \
|
||||
\
|
||||
for ( dim_t i = 0; i < n_elem; ++i ) \
|
||||
{ \
|
||||
ctype* x11 = x1 + (i )*incx; \
|
||||
ctype* y11 = y1 + (i )*incy; \
|
||||
ctype x11c; \
|
||||
\
|
||||
if ( bli_is_conj( conjx ) ) { PASTEMAC(ch,copyjs)( *x11, x11c ); } \
|
||||
else { PASTEMAC(ch,copys)( *x11, x11c ); } \
|
||||
\
|
||||
if ( !PASTEMAC(ch,eq)( x11c, *y11 ) ) \
|
||||
return FALSE; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
return TRUE; \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( eqm_unb_var1 )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
) \
|
||||
{ \
|
||||
dim_t i; \
|
||||
ctype* chi1; \
|
||||
char default_spec[32] = PASTEMAC(ch,formatspec)(); \
|
||||
\
|
||||
if ( format == NULL ) format = default_spec; \
|
||||
\
|
||||
chi1 = x; \
|
||||
\
|
||||
fprintf( file, "%s\n", s1 ); \
|
||||
\
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,fprints)( file, format, *chi1 ); \
|
||||
fprintf( file, "\n" ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
} \
|
||||
\
|
||||
fprintf( file, "%s\n", s2 ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0_I( fprintv )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t rs_x, inc_t cs_x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
ctype* chi1; \
|
||||
char default_spec[32] = PASTEMAC(ch,formatspec)(); \
|
||||
\
|
||||
if ( format == NULL ) format = default_spec; \
|
||||
\
|
||||
fprintf( file, "%s\n", s1 ); \
|
||||
\
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
{ \
|
||||
chi1 = (( ctype* ) x) + i*rs_x + j*cs_x; \
|
||||
\
|
||||
PASTEMAC(ch,fprints)( file, format, *chi1 ); \
|
||||
fprintf( file, " " ); \
|
||||
} \
|
||||
\
|
||||
fprintf( file, "\n" ); \
|
||||
} \
|
||||
\
|
||||
fprintf( file, "%s\n", s2 ); \
|
||||
fflush( file ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0_I( fprintm )
|
||||
|
||||
|
||||
@@ -107,39 +107,6 @@ INSERT_GENTPROTR_BASIC0( normfm_unb_var1 )
|
||||
INSERT_GENTPROTR_BASIC0( normim_unb_var1 )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0_I( fprintv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t rs_x, inc_t cs_x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0_I( fprintm )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
@@ -188,3 +155,70 @@ void PASTEMAC(ch,varname) \
|
||||
|
||||
INSERT_GENTPROTR_BASIC0( sumsqv_unb_var1 )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
bool PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0( eqv_unb_var1 )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
bool PASTEMAC(ch,varname) \
|
||||
( \
|
||||
doff_t diagoffx, \
|
||||
diag_t diagx, \
|
||||
uplo_t uplox, \
|
||||
trans_t transx, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t rs_x, inc_t cs_x, \
|
||||
ctype* y, inc_t rs_y, inc_t cs_y \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0( eqm_unb_var1 )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0_I( fprintv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
FILE* file, \
|
||||
char* s1, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t rs_x, inc_t cs_x, \
|
||||
char* format, \
|
||||
char* s2 \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0_I( fprintm )
|
||||
|
||||
|
||||
|
||||
@@ -268,12 +268,12 @@ static err_t bli_sgemm_small
|
||||
|
||||
bli_rntm_init_from_global( &rntm );
|
||||
bli_rntm_set_num_threads_only( 1, &rntm );
|
||||
bli_membrk_rntm_set_membrk( &rntm );
|
||||
bli_pba_rntm_set_pba( &rntm );
|
||||
|
||||
// Get the current size of the buffer pool for A block packing.
|
||||
// We will use the same size to avoid pool re-initialization
|
||||
siz_t buffer_size = bli_pool_block_size(bli_membrk_pool(bli_packbuf_index(BLIS_BITVAL_BUFFER_FOR_A_BLOCK),
|
||||
bli_rntm_membrk(&rntm)));
|
||||
siz_t buffer_size = bli_pool_block_size(bli_pba_pool(bli_packbuf_index(BLIS_BITVAL_BUFFER_FOR_A_BLOCK),
|
||||
bli_rntm_pba(&rntm)));
|
||||
|
||||
// Based on the available memory in the buffer we will decide if
|
||||
// we want to do packing or not.
|
||||
@@ -299,7 +299,7 @@ static err_t bli_sgemm_small
|
||||
#endif
|
||||
// Get the buffer from the pool, if there is no pool with
|
||||
// required size, it will be created.
|
||||
bli_membrk_acquire_m(&rntm,
|
||||
bli_pba_acquire_m(&rntm,
|
||||
buffer_size,
|
||||
BLIS_BITVAL_BUFFER_FOR_A_BLOCK,
|
||||
&local_mem_buf_A_s);
|
||||
@@ -1699,7 +1699,7 @@ static err_t bli_sgemm_small
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_sgemm_small(): releasing mem pool block\n" );
|
||||
#endif
|
||||
bli_membrk_release(&rntm,
|
||||
bli_pba_release(&rntm,
|
||||
&local_mem_buf_A_s);
|
||||
}
|
||||
|
||||
@@ -1833,13 +1833,13 @@ static err_t bli_dgemm_small
|
||||
|
||||
bli_rntm_init_from_global( &rntm );
|
||||
bli_rntm_set_num_threads_only( 1, &rntm );
|
||||
bli_membrk_rntm_set_membrk( &rntm );
|
||||
bli_pba_rntm_set_pba( &rntm );
|
||||
|
||||
// Get the current size of the buffer pool for A block packing.
|
||||
// We will use the same size to avoid pool re-initliazaton
|
||||
siz_t buffer_size = bli_pool_block_size(
|
||||
bli_membrk_pool(bli_packbuf_index(BLIS_BITVAL_BUFFER_FOR_A_BLOCK),
|
||||
bli_rntm_membrk(&rntm)));
|
||||
bli_pba_pool(bli_packbuf_index(BLIS_BITVAL_BUFFER_FOR_A_BLOCK),
|
||||
bli_rntm_pba(&rntm)));
|
||||
|
||||
//
|
||||
// This kernel assumes that "A" will be unpackged if N <= 3.
|
||||
@@ -1863,7 +1863,7 @@ static err_t bli_dgemm_small
|
||||
printf( "bli_dgemm_small: Requesting mem pool block of size %lu\n", buffer_size);
|
||||
#endif
|
||||
// Get the buffer from the pool.
|
||||
bli_membrk_acquire_m(&rntm,
|
||||
bli_pba_acquire_m(&rntm,
|
||||
buffer_size,
|
||||
BLIS_BITVAL_BUFFER_FOR_A_BLOCK,
|
||||
&local_mem_buf_A_s);
|
||||
@@ -3309,7 +3309,7 @@ static err_t bli_dgemm_small
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_dgemm_small(): releasing mem pool block\n" );
|
||||
#endif
|
||||
bli_membrk_release(&rntm,
|
||||
bli_pba_release(&rntm,
|
||||
&local_mem_buf_A_s);
|
||||
}
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO);
|
||||
|
||||
@@ -10773,10 +10773,11 @@ static err_t bli_dtrsm_small_XAltB_unitDiag(
|
||||
k_iter = j / D_NR; //number of GEMM operations to be performed(in block of 4x4)
|
||||
|
||||
dim_t iter;
|
||||
err_t r_val;
|
||||
|
||||
if((j+n_remainder) == n)
|
||||
{
|
||||
f_temp = bli_malloc_user(4 * sizeof(double));
|
||||
f_temp = bli_malloc_user(4 * sizeof(double), &r_val);
|
||||
for(iter = 0; iter < m_remainder; iter++)
|
||||
f_temp[iter] = (b11 + cs_b * (n_remainder-1))[iter];
|
||||
}
|
||||
|
||||
@@ -593,10 +593,6 @@ void GENBARNAME(cntx_init)
|
||||
bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS, cntx );
|
||||
bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS, cntx );
|
||||
bli_cntx_set_schema_c_panel( BLIS_NOT_PACKED, cntx );
|
||||
|
||||
//bli_cntx_set_anti_pref( FALSE, cntx );
|
||||
|
||||
//bli_cntx_set_membrk( bli_membrk_query(), cntx );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
@@ -45,7 +45,7 @@ void blx_l3_packm
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
membrk_t* membrk;
|
||||
pba_t* pba;
|
||||
packbuf_t pack_buf_type;
|
||||
mem_t* cntl_mem_p;
|
||||
siz_t size_needed;
|
||||
@@ -71,7 +71,7 @@ void blx_l3_packm
|
||||
if ( size_needed == 0 ) return;
|
||||
|
||||
// Query the memory broker from the context.
|
||||
membrk = bli_cntx_get_membrk( cntx );
|
||||
pba = bli_cntx_get_pba( cntx );
|
||||
|
||||
// Query the pack buffer type from the control tree node.
|
||||
pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl );
|
||||
@@ -91,9 +91,9 @@ void blx_l3_packm
|
||||
{
|
||||
// The chief thread acquires a block from the memory broker
|
||||
// and saves the associated mem_t entry to local_mem_s.
|
||||
bli_membrk_acquire_m
|
||||
bli_pba_acquire_m
|
||||
(
|
||||
membrk,
|
||||
pba,
|
||||
size_needed,
|
||||
pack_buf_type,
|
||||
&local_mem_s
|
||||
@@ -130,10 +130,10 @@ void blx_l3_packm
|
||||
// The chief thread releases the existing block associated with
|
||||
// the mem_t entry in the control tree, and then re-acquires a
|
||||
// new block, saving the associated mem_t entry to local_mem_s.
|
||||
bli_membrk_release( cntl_mem_p );
|
||||
bli_membrk_acquire_m
|
||||
bli_pba_release( cntl_mem_p );
|
||||
bli_pba_acquire_m
|
||||
(
|
||||
membrk,
|
||||
pba,
|
||||
size_needed,
|
||||
pack_buf_type,
|
||||
&local_mem_s
|
||||
|
||||
@@ -147,7 +147,7 @@ void blx_gemm_thread
|
||||
// Set the packing block allocator field of the rntm. This will be
|
||||
// inherited by all of the child threads when they make local copies of
|
||||
// the rntm below.
|
||||
bli_membrk_rntm_set_membrk( rntm );
|
||||
bli_pba_rntm_set_pba( rntm );
|
||||
|
||||
// Allocate a global communicator for the root thrinfo_t structures.
|
||||
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
3
|
||||
4
|
||||
0.0
|
||||
|
||||
@@ -121,6 +121,8 @@ void* libblis_test_thread_entry( void* tdata_void )
|
||||
|
||||
void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
|
||||
{
|
||||
err_t r_val;
|
||||
|
||||
// Query the total number of threads to simulate.
|
||||
size_t nt = ( size_t )params->n_app_threads;
|
||||
|
||||
@@ -130,12 +132,12 @@ void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "libblis_test_thread_decorator(): " );
|
||||
#endif
|
||||
bli_pthread_t* pthread = bli_malloc_user( sizeof( bli_pthread_t ) * nt );
|
||||
bli_pthread_t* pthread = bli_malloc_user( sizeof( bli_pthread_t ) * nt, &r_val );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "libblis_test_thread_decorator(): " );
|
||||
#endif
|
||||
thread_data_t* tdata = bli_malloc_user( sizeof( thread_data_t ) * nt );
|
||||
thread_data_t* tdata = bli_malloc_user( sizeof( thread_data_t ) * nt, &r_val );
|
||||
|
||||
// Allocate a mutex for the threads to share.
|
||||
//bli_pthread_mutex_t* mutex = bli_malloc_user( sizeof( bli_pthread_mutex_t ) );
|
||||
@@ -145,7 +147,7 @@ void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "libblis_test_thread_decorator(): " );
|
||||
#endif
|
||||
bli_pthread_barrier_t* barrier = bli_malloc_user( sizeof( bli_pthread_barrier_t ) );
|
||||
bli_pthread_barrier_t* barrier = bli_malloc_user( sizeof( bli_pthread_barrier_t ), &r_val );
|
||||
|
||||
// Initialize the mutex.
|
||||
//bli_pthread_mutex_init( mutex, NULL );
|
||||
|
||||
Reference in New Issue
Block a user