diff --git a/config/excavator/bli_cntx_init_excavator.c b/config/excavator/bli_cntx_init_excavator.c index 065154d27..56d04ef4e 100644 --- a/config/excavator/bli_cntx_init_excavator.c +++ b/config/excavator/bli_cntx_init_excavator.c @@ -39,7 +39,7 @@ void bli_cntx_init_excavator( cntx_t* cntx ) blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. - bli_cntx_init_piledriver_ref( cntx ); + bli_cntx_init_excavator_ref( cntx ); // ------------------------------------------------------------------------- diff --git a/config/steamroller/bli_cntx_init_steamroller.c b/config/steamroller/bli_cntx_init_steamroller.c index b1409e4fc..1b6566c5c 100644 --- a/config/steamroller/bli_cntx_init_steamroller.c +++ b/config/steamroller/bli_cntx_init_steamroller.c @@ -39,7 +39,7 @@ void bli_cntx_init_steamroller( cntx_t* cntx ) blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. - bli_cntx_init_piledriver_ref( cntx ); + bli_cntx_init_steamroller_ref( cntx ); // ------------------------------------------------------------------------- diff --git a/examples/oapi/0obj_basic.c b/examples/oapi/00obj_basic.c similarity index 100% rename from examples/oapi/0obj_basic.c rename to examples/oapi/00obj_basic.c diff --git a/examples/oapi/1obj_attach.c b/examples/oapi/01obj_attach.c similarity index 100% rename from examples/oapi/1obj_attach.c rename to examples/oapi/01obj_attach.c diff --git a/examples/oapi/2obj_ij.c b/examples/oapi/02obj_ij.c similarity index 100% rename from examples/oapi/2obj_ij.c rename to examples/oapi/02obj_ij.c diff --git a/examples/oapi/03obj_view.c b/examples/oapi/03obj_view.c new file mode 100644 index 000000000..68c9c922f --- /dev/null +++ b/examples/oapi/03obj_view.c @@ -0,0 +1,272 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include +#include "blis.h" + +void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); +void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); +void init_dobj_by_cols( obj_t* a ); +void init_zobj_by_cols( obj_t* a ); + +int main( int argc, char** argv ) +{ + obj_t a1, a2; + obj_t v1, v2, v3, v4, v5; + num_t dt; + dim_t m, n; + inc_t rs, cs; + dim_t i, j; + dim_t mv, nv; + + // + // This file demonstrates creating and submatrix views into existing matrices. + // + + // + // Example 1: Create an object and then create a submatrix view. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // We'll use these parameters for the following examples. + dt = BLIS_DOUBLE; + m = 6; n = 7; rs = 1; cs = m; + + // Create an object a1 using bli_obj_create(). + bli_obj_create( dt, m, n, rs, cs, &a1 ); + + // Initialize a1 to contain known values. + init_dobj_by_cols( &a1 ); + + bli_printm( "matrix 'a1' (initial state)", &a1, "%5.1f", "" ); + + // Acquire a 4x3 submatrix view into a1 at (i,j) offsets (1,2). + i = 1; j = 2; mv = 4; nv = 3; + bli_acquire_mpart( i, j, mv, nv, &a1, &v1 ); + + bli_printm( "4x3 submatrix 'v1' at offsets (1,2)", &v1, "%5.1f", "" ); + + // NOTE: Submatrix views should never be passed to bli_obj_free(). It + // will not cause an immediate error, but it is bad practice. Instead, + // you should only release the objects that were created directy via + // bli_obj_create(). In the above example, that means only object a1 + // would be passed to bli_obj_free(). + + // + // Example 2: Modify the contents of a submatrix view. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Modify the first three elements of the first column. + bli_setijm( -3.0, 0.0, 0, 0, &v1 ); + bli_setijm( -4.0, 0.0, 1, 0, &v1 ); + bli_setijm( -5.0, 0.0, 2, 0, &v1 ); + + // Modify the first three elements of the second column. + bli_setijm( -6.0, 0.0, 0, 1, &v1 ); + bli_setijm( -7.0, 0.0, 1, 1, &v1 ); + bli_setijm( -8.0, 0.0, 2, 1, &v1 ); + + // Print the matrix again so we can see the update elements. + bli_printm( "submatrix view 'v1' (modified state)", &v1, "%5.1f", "" ); + bli_printm( "matrix 'a1' (indirectly modified due to changes to 'v1')", &a1, "%5.1f", "" ); + + // + // Example 3: Create a submatrix view that is "too big". + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // bli_acquire_mpart() will safely truncate your requested submatrix + // view dimensions (or even the offsets) if they extend beyond the + // bounds of the parent object. + + bli_printm( "matrix 'a1' (current state)", &a1, "%5.1f", "" ); + + // Acquire a 4x3 submatrix view into a1 at offsets (4,2). Notice how + // the requested view contains four rows, but the view is created with + // only two rows because the starting m offset of 4 leaves only two rows + // left in the parent matrix. + bli_acquire_mpart( 4, 2, 4, 3, &a1, &v2 ); + + bli_printm( "4x3 submatrix 'v2' at offsets (4,2) -- two rows truncated for safety", &v2, "%5.1f", "" ); + + // + // Example 4: Create a bufferless object, attach an external buffer, and + // then create a submatrix view. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // Create a object with known elements using the same approach as the + // previous example file. + double* p1 = malloc( m * n * sizeof( double ) ); + init_dmatrix_by_cols( m, n, p1, rs, cs ); + bli_obj_create_with_attached_buffer( dt, m, n, p1, rs, cs, &a2 ); + + bli_printm( "matrix 'a2' (initial state)", &a2, "%5.1f", "" ); + + // Acquire a 3x4 submatrix view at offset (2,3). + bli_acquire_mpart( 2, 3, 3, 4, &a2, &v3 ); + + bli_printm( "3x4 submatrix view 'v3' at offsets (2,3)", &v3, "%5.1f", "" ); + + // + // Example 5: Use a submatrix view to set a region of a larger matrix to + // zero. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + bli_printm( "3x4 submatrix view 'v3' at offsets (2,3)", &v3, "%5.1f", "" ); + + bli_setm( &BLIS_ZERO, &v3 ); + + bli_printm( "3x4 submatrix view 'v3' (zeroed out)", &v3, "%5.1f", "" ); + + bli_printm( "matrix 'a2' (modified state)", &a2, "%5.1f", "" ); + + // + // Example 6: Obtain a submatrix view into a submatrix view. + // + + printf( "\n#\n# -- Example 6 --\n#\n\n" ); + + bli_acquire_mpart( 1, 1, 5, 6, &a2, &v4 ); + + bli_printm( "5x6 submatrix view 'v4' at offsets (1,1) of 'a2'", &v4, "%5.1f", "" ); + + bli_acquire_mpart( 1, 0, 4, 5, &v4, &v5 ); + + bli_printm( "4x5 submatrix view 'v5' at offsets (1,0) of 'v4'", &v5, "%5.1f", "" ); + + + // Free the memory arrays we allocated. + free( p1 ); + + // Free the objects we created. + bli_obj_free( &a1 ); + + return 0; +} + +// ----------------------------------------------------------------------------- + +void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) +{ + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by rows, assigning each element a unique + // value, starting at 0. + for ( i = 0; i < m; ++i ) + { + for ( j = 0; j < n; ++j ) + { + double* a_ij = a + i*rs + j*cs; + + *a_ij = alpha; + + alpha += 1.0; + } + } +} + +void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) +{ + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each element a unique + // value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + double* a_ij = a + i*rs + j*cs; + + *a_ij = alpha; + + alpha += 1.0; + } + } +} + +void init_dobj_by_cols( obj_t* a ) +{ + dim_t m = bli_obj_length( a ); + dim_t n = bli_obj_width( a ); + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each element a unique + // value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + bli_setijm( alpha, 0.0, i, j, a ); + + alpha += 1.0; + } + } +} + +void init_zobj_by_cols( obj_t* a ) +{ + dim_t m = bli_obj_length( a ); + dim_t n = bli_obj_width( a ); + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each real and imaginary + // element a unique value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + bli_setijm( alpha, alpha + 1.0, i, j, a ); + + alpha += 2.0; + } + } +} + diff --git a/examples/oapi/3level0.c b/examples/oapi/04level0.c similarity index 100% rename from examples/oapi/3level0.c rename to examples/oapi/04level0.c diff --git a/examples/oapi/4level1v.c b/examples/oapi/05level1v.c similarity index 100% rename from examples/oapi/4level1v.c rename to examples/oapi/05level1v.c diff --git a/examples/oapi/5level1m.c b/examples/oapi/06level1m.c similarity index 100% rename from examples/oapi/5level1m.c rename to examples/oapi/06level1m.c diff --git a/examples/oapi/6level1m_diag.c b/examples/oapi/07level1m_diag.c similarity index 100% rename from examples/oapi/6level1m_diag.c rename to examples/oapi/07level1m_diag.c diff --git a/examples/oapi/7level2.c b/examples/oapi/08level2.c similarity index 100% rename from examples/oapi/7level2.c rename to examples/oapi/08level2.c diff --git a/examples/oapi/8level3.c b/examples/oapi/09level3.c similarity index 100% rename from examples/oapi/8level3.c rename to examples/oapi/09level3.c diff --git a/examples/oapi/9util.c b/examples/oapi/10util.c similarity index 100% rename from examples/oapi/9util.c rename to examples/oapi/10util.c diff --git a/examples/oapi/Makefile b/examples/oapi/Makefile index 905ef6727..08964e479 100644 --- a/examples/oapi/Makefile +++ b/examples/oapi/Makefile @@ -105,16 +105,17 @@ CFLAGS += -I$(TEST_SRC_PATH) LIBBLIS_LINK := $(BUILD_PATH)/$(LIBBLIS_LINK) # Binary executable name. -TEST_BINS := 0obj_basic.x \ - 1obj_attach.x \ - 2obj_ij.x \ - 3level0.x \ - 4level1v.x \ - 5level1m.x \ - 6level1m_diag.x \ - 7level2.x \ - 8level3.x \ - 9util.x +TEST_BINS := 00obj_basic.x \ + 01obj_attach.x \ + 02obj_ij.x \ + 03obj_view.x \ + 04level0.x \ + 05level1v.x \ + 06level1m.x \ + 07level1m_diag.x \ + 08level2.x \ + 09level3.x \ + 10util.x diff --git a/examples/oapi/README b/examples/oapi/README index 28cc6d84e..adf7ded9d 100644 --- a/examples/oapi/README +++ b/examples/oapi/README @@ -6,7 +6,7 @@ This directory contains several files, each containing various pieces of example code that demonstrate core functionality of the object API in BLIS. These example files should be thought of collectively like a tutorial, and therefore it is recommended to start from the beginning (the file that -starts in '0'). +starts in '00'). You can build all of the examples by simply running 'make' from this directory. (You can also run 'make clean'.) The makefile assumes that diff --git a/frame/base/bli_part.c b/frame/base/bli_part.c index 47fa4fdfd..d05eabb79 100644 --- a/frame/base/bli_part.c +++ b/frame/base/bli_part.c @@ -38,6 +38,49 @@ // -- Matrix partitioning ------------------------------------------------------ +void bli_acquire_mpart + ( + dim_t i, + dim_t j, + dim_t bm, + dim_t bn, + obj_t* parent, + obj_t* child + ) +{ + // Query the dimensions of the parent object. + const dim_t m_par = bli_obj_length( parent ); + const dim_t n_par = bli_obj_width( parent ); + + // If either i or j is already beyond what exists of the parent matrix, + // slide them back to the outer dimensions. (What will happen in this + // scenario is that bm and bn and/or will be reduced to zero so that the + // child matrix does not refer to anything beyond the bounds of the + // parent. (Note: This is a safety measure and generally should never + // be needed if the caller is passing in sane arguments.) + if ( i > m_par ) i = m_par; + if ( j > n_par ) j = n_par; + + // If either bm or bn spills out over the edge of the parent matrix, + // reduce them so that the child matrix fits within the bounds of the + // parent. (Note: This is a safety measure and generally should never + // be needed if the caller is passing in sane arguments, though this + // code is somewhat more likely to be needed than the code above.) + if ( bm > m_par - i ) bm = m_par - i; + if ( bn > n_par - j ) bn = n_par - j; + + // Alias the parent object's contents into the child object. + bli_obj_alias_to( parent, child ); + + // Set the offsets and dimensions of the child object. Note that we + // increment, rather than overwrite, the offsets of the child object + // in case the parent object already had non-zero offsets (usually + // because the parent was itself a child a larger grandparent object). + bli_obj_inc_offs( i, j, child ); + bli_obj_set_dims( bm, bn, child ); +} + + void bli_acquire_mpart_mdim ( dir_t direct, diff --git a/frame/base/bli_part.h b/frame/base/bli_part.h index fd24f1d82..284a87ffa 100644 --- a/frame/base/bli_part.h +++ b/frame/base/bli_part.h @@ -36,6 +36,16 @@ // -- Matrix partitioning ------------------------------------------------------ +void bli_acquire_mpart + ( + dim_t i, + dim_t j, + dim_t m, + dim_t n, + obj_t* obj, + obj_t* sub_obj + ); + #undef GENPROT #define GENPROT( opname ) \ \ diff --git a/ref_kernels/1m/bli_packm_cxk_1er_ref.c b/ref_kernels/1m/bli_packm_cxk_1er_ref.c index 3c526506d..693fd3c47 100644 --- a/ref_kernels/1m/bli_packm_cxk_1er_ref.c +++ b/ref_kernels/1m/bli_packm_cxk_1er_ref.c @@ -189,7 +189,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_2xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_2xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -364,7 +364,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_4xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_4xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -555,7 +555,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_6xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_6xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -762,7 +762,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_8xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_8xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -985,7 +985,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_10xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_10xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -1224,7 +1224,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_12xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_12xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -1479,7 +1479,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_14xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_14xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -1750,7 +1750,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_16xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_16xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -2133,5 +2133,5 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_30xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_30xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) diff --git a/ref_kernels/bli_cntx_ref.c b/ref_kernels/bli_cntx_ref.c index cc5828f4d..81df4f9c4 100644 --- a/ref_kernels/bli_cntx_ref.c +++ b/ref_kernels/bli_cntx_ref.c @@ -221,23 +221,23 @@ #define packm_30xk_rih_ker_name GENARNAME(packm_30xk_rih) #undef packm_2xk_1er_ker_name -#define packm_2xk_1er_ker_name GENARNAME(packm_2xk_1e) +#define packm_2xk_1er_ker_name GENARNAME(packm_2xk_1er) #undef packm_4xk_1er_ker_name -#define packm_4xk_1er_ker_name GENARNAME(packm_4xk_1e) +#define packm_4xk_1er_ker_name GENARNAME(packm_4xk_1er) #undef packm_6xk_1er_ker_name -#define packm_6xk_1er_ker_name GENARNAME(packm_6xk_1e) +#define packm_6xk_1er_ker_name GENARNAME(packm_6xk_1er) #undef packm_8xk_1er_ker_name -#define packm_8xk_1er_ker_name GENARNAME(packm_8xk_1e) +#define packm_8xk_1er_ker_name GENARNAME(packm_8xk_1er) #undef packm_10xk_1er_ker_name -#define packm_10xk_1er_ker_name GENARNAME(packm_10xk_1e) +#define packm_10xk_1er_ker_name GENARNAME(packm_10xk_1er) #undef packm_12xk_1er_ker_name -#define packm_12xk_1er_ker_name GENARNAME(packm_12xk_1e) +#define packm_12xk_1er_ker_name GENARNAME(packm_12xk_1er) #undef packm_14xk_1er_ker_name -#define packm_14xk_1er_ker_name GENARNAME(packm_14xk_1e) +#define packm_14xk_1er_ker_name GENARNAME(packm_14xk_1er) #undef packm_16xk_1er_ker_name -#define packm_16xk_1er_ker_name GENARNAME(packm_16xk_1e) +#define packm_16xk_1er_ker_name GENARNAME(packm_16xk_1er) #undef packm_30xk_1er_ker_name -#define packm_30xk_1er_ker_name GENARNAME(packm_30xk_1e) +#define packm_30xk_1er_ker_name GENARNAME(packm_30xk_1er) // Include the level-1m kernel API template. #include "bli_l1m_ker.h"