Renamed various datatype-related macros/functions.

Details: - Renamed the following macros in bli_obj_macro_defs.h and bli_param_macro_defs.h: - bli_obj_datatype() -> bli_obj_dt() - bli_obj_target_datatype() -> bli_obj_target_dt() - bli_obj_execution_datatype() -> bli_obj_exec_dt() - bli_obj_set_datatype() -> bli_obj_set_dt() - bli_obj_set_target_datatype() -> bli_obj_set_target_dt() - bli_obj_set_execution_datatype() -> bli_obj_set_exec_dt() - bli_obj_datatype_proj_to_real() -> bli_obj_dt_proj_to_real() - bli_obj_datatype_proj_to_complex() -> bli_obj_dt_proj_to_complex() - bli_datatype_proj_to_real() -> bli_dt_proj_to_real() - bli_datatype_proj_to_complex() -> bli_dt_proj_to_complex() - Renamed the following functions in bli_obj.c: - bli_datatype_size() -> bli_dt_size() - bli_datatype_string() -> bli_dt_string() - bli_datatype_union() -> bli_dt_union() - Removed a pair of old level-1f penryn intrinsics kernels that were no longer in use.
2026-04-19 23:28:52 +00:00 · 2018-04-30 14:57:33 -05:00
parent 01c4173238
commit 75d0d1057d
132 changed files with 331 additions and 1094 deletions
--- a/examples/oapi/0obj_basic.c
+++ b/examples/oapi/0obj_basic.c
@@ -121,8 +121,8 @@ int main( int argc, char** argv )

 	// Let's inspect the amount of padding inserted for alignment. Note
 	// the difference between the m dimension and the column stride.
-	printf( "datatype            %s\n", bli_datatype_string( bli_obj_datatype( a8 ) ) );
-	printf( "datatype size       %d bytes\n", bli_datatype_size( bli_obj_datatype( a8 ) ) );
+	printf( "datatype            %s\n", bli_dt_string( bli_obj_dt( a8 ) ) );
+	printf( "datatype size       %d bytes\n", bli_dt_size( bli_obj_dt( a8 ) ) );
 	printf( "m dim (# of rows):  %d\n", ( int )bli_obj_length( a8 ) );
 	printf( "n dim (# of cols):  %d\n", ( int )bli_obj_width( a8 ) );
 	printf( "row stride:         %d\n", ( int )bli_obj_row_stride( a8 ) );
@@ -139,24 +139,24 @@ int main( int argc, char** argv )
 	bli_obj_create( BLIS_SCOMPLEX, 3, 5, 0, 0, &a10);
 	bli_obj_create( BLIS_DCOMPLEX, 3, 5, 0, 0, &a11 );

-	printf( "datatype            %s\n", bli_datatype_string( bli_obj_datatype( a9 ) ) );
-	printf( "datatype size       %d bytes\n", bli_datatype_size( bli_obj_datatype( a9 ) ) );
+	printf( "datatype            %s\n", bli_dt_string( bli_obj_dt( a9 ) ) );
+	printf( "datatype size       %d bytes\n", bli_dt_size( bli_obj_dt( a9 ) ) );
 	printf( "m dim (# of rows):  %d\n", ( int )bli_obj_length( a9 ) );
 	printf( "n dim (# of cols):  %d\n", ( int )bli_obj_width( a9 ) );
 	printf( "row stride:         %d\n", ( int )bli_obj_row_stride( a9 ) );
 	printf( "col stride:         %d\n", ( int )bli_obj_col_stride( a9 ) );

 	printf( "\n" );
-	printf( "datatype            %s\n", bli_datatype_string( bli_obj_datatype( a10 ) ) );
-	printf( "datatype size       %d bytes\n", bli_datatype_size( bli_obj_datatype( a10 ) ) );
+	printf( "datatype            %s\n", bli_dt_string( bli_obj_dt( a10 ) ) );
+	printf( "datatype size       %d bytes\n", bli_dt_size( bli_obj_dt( a10 ) ) );
 	printf( "m dim (# of rows):  %d\n", ( int )bli_obj_length( a10 ) );
 	printf( "n dim (# of cols):  %d\n", ( int )bli_obj_width( a10 ) );
 	printf( "row stride:         %d\n", ( int )bli_obj_row_stride( a10 ) );
 	printf( "col stride:         %d\n", ( int )bli_obj_col_stride( a10 ) );

 	printf( "\n" );
-	printf( "datatype            %s\n", bli_datatype_string( bli_obj_datatype( a11 ) ) );
-	printf( "datatype size       %d bytes\n", bli_datatype_size( bli_obj_datatype( a11 ) ) );
+	printf( "datatype            %s\n", bli_dt_string( bli_obj_dt( a11 ) ) );
+	printf( "datatype size       %d bytes\n", bli_dt_size( bli_obj_dt( a11 ) ) );
 	printf( "m dim (# of rows):  %d\n", ( int )bli_obj_length( a11 ) );
 	printf( "n dim (# of cols):  %d\n", ( int )bli_obj_width( a11 ) );
 	printf( "row stride:         %d\n", ( int )bli_obj_row_stride( a11 ) );
--- a/frame/0/bli_l0_oapi.c
+++ b/frame/0/bli_l0_oapi.c
@@ -50,7 +50,7 @@ void PASTEMAC0(opname) \
 	bli_init_once(); \
 \
 	num_t     dt_chi; \
-	num_t     dt_absq_c  = bli_obj_datatype_proj_to_complex( *absq ); \
+	num_t     dt_absq_c  = bli_obj_dt_proj_to_complex( *absq ); \
 \
    void*     buf_chi; \
    void*     buf_absq   = bli_obj_buffer_at_off( *absq ); \
@@ -88,7 +88,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt        = bli_obj_datatype( *psi ); \
+	num_t     dt        = bli_obj_dt( *psi ); \
 \
 	conj_t    conjchi   = bli_obj_conj_status( *chi ); \
 \
@@ -125,7 +125,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt        = bli_obj_datatype( *chi ); \
+	num_t     dt        = bli_obj_dt( *chi ); \
 \
 	conj_t    conjchi   = bli_obj_conj_status( *chi ); \
 \
@@ -158,7 +158,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt        = bli_obj_datatype( *psi ); \
+	num_t     dt        = bli_obj_dt( *psi ); \
 \
    void*     buf_chi   = bli_obj_buffer_for_1x1( dt, *chi ); \
 	void*     buf_psi   = bli_obj_buffer_at_off( *psi ); \
@@ -191,7 +191,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt_chi    = bli_obj_datatype( *chi ); \
+	num_t     dt_chi    = bli_obj_dt( *chi ); \
 	num_t     dt_def    = BLIS_DCOMPLEX; \
 	num_t     dt_use; \
 \
@@ -234,7 +234,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt_chi    = bli_obj_datatype( *chi ); \
+	num_t     dt_chi    = bli_obj_dt( *chi ); \
 \
 	void*     buf_chi   = bli_obj_buffer_at_off( *chi ); \
 \
@@ -268,7 +268,7 @@ void PASTEMAC0(opname) \
 	bli_init_once(); \
 \
 	num_t     dt_chi; \
-	num_t     dt_zeta_c   = bli_obj_datatype_proj_to_complex( *zeta_r ); \
+	num_t     dt_zeta_c   = bli_obj_dt_proj_to_complex( *zeta_r ); \
 \
    void*     buf_chi; \
 \
@@ -309,7 +309,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt_chi      = bli_obj_datatype( *chi ); \
+	num_t     dt_chi      = bli_obj_dt( *chi ); \
 \
    void*     buf_zeta_r  = bli_obj_buffer_for_1x1( dt_chi, *zeta_r ); \
    void*     buf_zeta_i  = bli_obj_buffer_for_1x1( dt_chi, *zeta_i ); \
--- a/frame/0/copysc/bli_copysc.c
+++ b/frame/0/copysc/bli_copysc.c
@@ -65,7 +65,7 @@ void PASTEMAC0(opname) \
 \
 	conj_t    conjchi   = bli_obj_conj_status( *chi ); \
 \
-	num_t     dt_psi    = bli_obj_datatype( *psi ); \
+	num_t     dt_psi    = bli_obj_dt( *psi ); \
    void*     buf_psi   = bli_obj_buffer_at_off( *psi ); \
 \
 	num_t     dt_chi; \
--- a/frame/1/bli_l1v_oapi.c
+++ b/frame/1/bli_l1v_oapi.c
@@ -54,7 +54,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
 	dim_t     n         = bli_obj_vector_dim( *x ); \
@@ -98,7 +98,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	dim_t     n         = bli_obj_vector_dim( *x ); \
 	void*     buf_x     = bli_obj_buffer_at_off( *x ); \
@@ -140,7 +140,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
 	dim_t     n         = bli_obj_vector_dim( *x ); \
@@ -200,7 +200,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
 	dim_t     n         = bli_obj_vector_dim( *x ); \
@@ -255,7 +255,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
 	conj_t    conjy     = bli_obj_conj_status( *y ); \
@@ -304,7 +304,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
 	conj_t    conjy     = bli_obj_conj_status( *y ); \
@@ -366,7 +366,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	dim_t     n         = bli_obj_vector_dim( *x ); \
 	void*     buf_x     = bli_obj_buffer_at_off( *x ); \
@@ -403,7 +403,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	/* conj_t    conjalpha = bli_obj_conj_status( *alpha ); */ \
 	dim_t     n         = bli_obj_vector_dim( *x ); \
@@ -454,7 +454,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	dim_t     n         = bli_obj_vector_dim( *x ); \
 	void*     buf_x     = bli_obj_buffer_at_off( *x ); \
@@ -495,7 +495,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
 	dim_t     n         = bli_obj_vector_dim( *x ); \
--- a/frame/1/other/packv/bli_packv_init.c
+++ b/frame/1/other/packv/bli_packv_init.c
@@ -81,7 +81,7 @@ void bli_packv_init

 	// Now, if we are not skipping the pack operation, then the only question
 	// left is whether we are to typecast vector a before packing.
-	if ( bli_obj_datatype( *a ) != bli_obj_target_datatype( *a ) )
+	if ( bli_obj_dt( *a ) != bli_obj_target_dt( *a ) )
 		bli_abort();

 	// Extract various fields from the control tree and pass them in
@@ -113,7 +113,7 @@ siz_t bli_packv_init_pack
       cntx_t* cntx
     )
 {
-	num_t     dt     = bli_obj_datatype( *a );
+	num_t     dt     = bli_obj_dt( *a );
 	dim_t     dim_a  = bli_obj_vector_dim( *a );
 	dim_t     bmult  = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx );

--- a/frame/1/other/packv/bli_packv_unb_var1.c
+++ b/frame/1/other/packv/bli_packv_unb_var1.c
@@ -51,7 +51,7 @@ void bli_packv_unb_var1( obj_t*   c,
                         cntx_t*  cntx,
                         packv_t* cntl )
 {
-	num_t     dt_cp     = bli_obj_datatype( *c );
+	num_t     dt_cp     = bli_obj_dt( *c );

 	dim_t     dim_p     = bli_obj_vector_dim( *p );

--- a/frame/1/other/unpackv/bli_unpackv_int.c
+++ b/frame/1/other/unpackv/bli_unpackv_int.c
@@ -94,10 +94,10 @@ void bli_unpackv_int( obj_t*     p,

 	// Now, if we are not skipping the unpack operation, then the only
 	// question left is whether we are to typecast vector a after unpacking.
-	if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
+	if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) )
 		bli_abort();
 /*
-	if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
+	if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) )
 	{
 		// Initialize an object c for the intermediate typecast vector.
 		bli_unpackv_init_cast( p,
@@ -132,7 +132,7 @@ void bli_unpackv_int( obj_t*     p,
 	// was not necessary, then we are done because the call to the unpackv
 	// implementation would have unpacked directly to vector a.
 /*
-	if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
+	if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) )
 	{
 		// Copy/typecast vector c to vector a.
 		// NOTE: Here, we use copynzv instead of copym because, in the cases
@@ -179,15 +179,15 @@ void bli_unpackv_init_cast( obj_t*  p,
 	//      already available. (After acquring a mem entry from the memory
 	//      manager, it is cached within p for quick access later on.)

-	num_t dt_targ_a    = bli_obj_target_datatype( *a );
+	num_t dt_targ_a    = bli_obj_target_dt( *a );
 	dim_t dim_a        = bli_obj_vector_dim( *a );
-	siz_t elem_size_c  = bli_datatype_size( dt_targ_a );
+	siz_t elem_size_c  = bli_dt_size( dt_targ_a );

 	// We begin by copying the basic fields of a.
 	bli_obj_alias_to( *a, *c );

 	// Update datatype and element size fields.
-	bli_obj_set_datatype( dt_targ_a, *c );
+	bli_obj_set_dt( dt_targ_a, *c );
 	bli_obj_set_elem_size( elem_size_c, *c );

 	// Update the strides and dimensions. We set the increments to reflect a
--- a/frame/1/other/unpackv/bli_unpackv_unb_var1.c
+++ b/frame/1/other/unpackv/bli_unpackv_unb_var1.c
@@ -51,7 +51,7 @@ void bli_unpackv_unb_var1( obj_t*     p,
                           cntx_t*    cntx,
                           unpackv_t* cntl )
 {
-	num_t     dt_pc     = bli_obj_datatype( *p );
+	num_t     dt_pc     = bli_obj_dt( *p );

 	dim_t     dim_c     = bli_obj_vector_dim( *c );

--- a/frame/1d/bli_l1d_oapi.c
+++ b/frame/1d/bli_l1d_oapi.c
@@ -54,7 +54,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
    doff_t    diagoffx  = bli_obj_diag_offset( *x ); \
    diag_t    diagx     = bli_obj_diag( *x ); \
@@ -107,7 +107,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
    doff_t    diagoffx  = bli_obj_diag_offset( *x ); \
    diag_t    diagx     = bli_obj_diag( *x ); \
@@ -168,7 +168,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
    doff_t    diagoffx  = bli_obj_diag_offset( *x ); \
 	dim_t     m         = bli_obj_length( *x ); \
@@ -210,7 +210,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
    /* conj_t    conjalpha = bli_obj_conj_status( *alpha ); */ \
    doff_t    diagoffx  = bli_obj_diag_offset( *x ); \
@@ -266,7 +266,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
    doff_t    diagoffx  = bli_obj_diag_offset( *x ); \
 	dim_t     m         = bli_obj_length( *x ); \
--- a/frame/1f/bli_l1f_oapi.c
+++ b/frame/1f/bli_l1f_oapi.c
@@ -57,7 +57,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
 	conj_t    conjy     = bli_obj_conj_status( *y ); \
@@ -123,7 +123,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	conj_t    conja     = bli_obj_conj_status( *a ); \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
@@ -191,7 +191,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	conj_t    conjxt    = bli_obj_conj_status( *xt ); \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
@@ -259,7 +259,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	conj_t    conjat    = bli_obj_conj_status( *at ); \
 	conj_t    conja     = bli_obj_conj_status( *a ); \
@@ -342,7 +342,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	conj_t    conjat    = bli_obj_conj_status( *a ); \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
--- a/frame/1m/bli_l1m_oapi.c
+++ b/frame/1m/bli_l1m_oapi.c
@@ -54,7 +54,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	doff_t    diagoffx  = bli_obj_diag_offset( *x ); \
 	diag_t    diagx     = bli_obj_diag( *x ); \
@@ -109,7 +109,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	doff_t    diagoffx  = bli_obj_diag_offset( *x ); \
 	diag_t    diagx     = bli_obj_diag( *x ); \
@@ -173,7 +173,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	/* conj_t    conjalpha = bli_obj_conj_status( *alpha ); */ \
 	doff_t    diagoffx  = bli_obj_diag_offset( *x ); \
@@ -245,7 +245,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	/* conj_t    conjalpha = bli_obj_conj_status( *alpha ); */ \
 	doff_t    diagoffx  = bli_obj_diag_offset( *x ); \
--- a/frame/1m/packm/bli_packm_blk_var1.c
+++ b/frame/1m/packm/bli_packm_blk_var1.c
@@ -108,7 +108,7 @@ void bli_packm_blk_var1
       thrinfo_t* t
     )
 {
-	num_t     dt_cp      = bli_obj_datatype( *c );
+	num_t     dt_cp      = bli_obj_dt( *c );

 	struc_t   strucc     = bli_obj_struc( *c );
 	doff_t    diagoffc   = bli_obj_diag_offset( *c );
--- a/frame/1m/packm/bli_packm_blk_var1.c.old
+++ b/frame/1m/packm/bli_packm_blk_var1.c.old
@@ -68,7 +68,7 @@ void bli_packm_blk_var1( obj_t*   c,
                         obj_t*   p,
                         packm_thrinfo_t* t )
 {
-	num_t     dt_cp      = bli_obj_datatype( *c );
+	num_t     dt_cp      = bli_obj_dt( *c );

 	struc_t   strucc     = bli_obj_struc( *c );
 	doff_t    diagoffc   = bli_obj_diag_offset( *c );
--- a/frame/1m/packm/bli_packm_init.c
+++ b/frame/1m/packm/bli_packm_init.c
@@ -189,7 +189,7 @@ siz_t bli_packm_init_pack
 {
 	bli_init_once();

-	num_t     dt           = bli_obj_datatype( *a );
+	num_t     dt           = bli_obj_dt( *a );
 	trans_t   transa       = bli_obj_onlytrans_status( *a );
 	dim_t     m_a          = bli_obj_length( *a );
 	dim_t     n_a          = bli_obj_width( *a );
--- a/frame/1m/packm/bli_packm_unb_var1.c
+++ b/frame/1m/packm/bli_packm_unb_var1.c
@@ -64,7 +64,7 @@ void bli_packm_unb_var1
       thrinfo_t* thread
     )
 {
-	num_t     dt_cp     = bli_obj_datatype( *c );
+	num_t     dt_cp     = bli_obj_dt( *c );

 	struc_t   strucc    = bli_obj_struc( *c );
 	doff_t    diagoffc  = bli_obj_diag_offset( *c );
--- a/frame/1m/unpackm/bli_unpackm_blk_var1.c
+++ b/frame/1m/unpackm/bli_unpackm_blk_var1.c
@@ -64,7 +64,7 @@ void bli_unpackm_blk_var1
       thrinfo_t* thread
     )
 {
-	num_t     dt_cp     = bli_obj_datatype( *c );
+	num_t     dt_cp     = bli_obj_dt( *c );

 	// Normally we take the parameters from the source argument. But here,
 	// the packm/unpackm framework is not yet solidified enough for us to
--- a/frame/1m/unpackm/bli_unpackm_unb_var1.c
+++ b/frame/1m/unpackm/bli_unpackm_unb_var1.c
@@ -59,7 +59,7 @@ void bli_unpackm_unb_var1
       thrinfo_t* thread
     )
 {
-	num_t     dt_pc     = bli_obj_datatype( *p );
+	num_t     dt_pc     = bli_obj_dt( *p );

 	doff_t    diagoffp  = bli_obj_diag_offset( *p );
 	uplo_t    uplop     = bli_obj_uplo( *p );
--- a/frame/2/bli_l2_oapi.c
+++ b/frame/2/bli_l2_oapi.c
@@ -57,7 +57,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *a ); \
+	num_t     dt        = bli_obj_dt( *a ); \
 \
    trans_t   transa    = bli_obj_conjtrans_status( *a ); \
    conj_t    conjx     = bli_obj_conj_status( *x ); \
@@ -126,7 +126,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *a ); \
+	num_t     dt        = bli_obj_dt( *a ); \
 \
    conj_t    conjx     = bli_obj_conj_status( *x ); \
    conj_t    conjy     = bli_obj_conj_status( *y ); \
@@ -190,7 +190,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *a ); \
+	num_t     dt        = bli_obj_dt( *a ); \
 \
    uplo_t    uploa     = bli_obj_uplo( *a ); \
    conj_t    conja     = bli_obj_conj_status( *a ); \
@@ -259,7 +259,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *a ); \
+	num_t     dt        = bli_obj_dt( *a ); \
 \
    uplo_t    uploa     = bli_obj_uplo( *a ); \
    conj_t    conjx     = bli_obj_conj_status( *x ); \
@@ -318,7 +318,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *a ); \
+	num_t     dt        = bli_obj_dt( *a ); \
 \
    uplo_t    uploa     = bli_obj_uplo( *a ); \
    conj_t    conjx     = bli_obj_conj_status( *x ); \
@@ -381,7 +381,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *a ); \
+	num_t     dt        = bli_obj_dt( *a ); \
 \
    uplo_t    uploa     = bli_obj_uplo( *a ); \
    trans_t   transa    = bli_obj_conjtrans_status( *a ); \
--- a/frame/2/gemv/bli_gemv_var_oapi.c
+++ b/frame/2/gemv/bli_gemv_var_oapi.c
@@ -50,7 +50,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt        = bli_obj_datatype( *a ); \
+	num_t     dt        = bli_obj_dt( *a ); \
 \
 	trans_t   transa    = bli_obj_conjtrans_status( *a ); \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
--- a/frame/2/gemv/bli_gemv_var_oapi.c.prev
+++ b/frame/2/gemv/bli_gemv_var_oapi.c.prev
@@ -51,7 +51,7 @@ void PASTEMAC0(opname) \
       gemv_t* cntl \
     ) \
 { \
-	num_t     dt        = bli_obj_datatype( *a ); \
+	num_t     dt        = bli_obj_dt( *a ); \
 \
 	trans_t   transa    = bli_obj_conjtrans_status( *a ); \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
--- a/frame/2/gemv/other/bli_gemv_front.c
+++ b/frame/2/gemv/other/bli_gemv_front.c
@@ -67,9 +67,9 @@ void bli_gemv_front


 	// Query the target datatypes of each object.
-	dt_targ_a = bli_obj_target_datatype( *a );
-	dt_targ_x = bli_obj_target_datatype( *x );
-	dt_targ_y = bli_obj_target_datatype( *y );
+	dt_targ_a = bli_obj_target_dt( *a );
+	dt_targ_x = bli_obj_target_dt( *x );
+	dt_targ_y = bli_obj_target_dt( *y );

 	// Determine whether each operand is stored with unit stride.
 	a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
@@ -81,7 +81,7 @@ void bli_gemv_front
 	// Create an object to hold a copy-cast of alpha. Notice that we use
 	// the type union of the target datatypes of a and x to prevent any
 	// unnecessary loss of information during the computation.
-	dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
+	dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x );
 	bli_obj_scalar_init_detached_copy_of( dt_alpha,
 	                                      BLIS_NO_CONJUGATE,
 	                                      alpha,
--- a/frame/2/ger/bli_ger_var_oapi.c
+++ b/frame/2/ger/bli_ger_var_oapi.c
@@ -49,7 +49,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt        = bli_obj_datatype( *a ); \
+	num_t     dt        = bli_obj_dt( *a ); \
 \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
 	conj_t    conjy     = bli_obj_conj_status( *y ); \
--- a/frame/2/ger/other/bli_ger_front.c
+++ b/frame/2/ger/other/bli_ger_front.c
@@ -64,9 +64,9 @@ void bli_ger_front


 	// Query the target datatypes of each object.
-	dt_targ_x = bli_obj_target_datatype( *x );
-	dt_targ_y = bli_obj_target_datatype( *y );
-	//dt_targ_a = bli_obj_target_datatype( *a );
+	dt_targ_x = bli_obj_target_dt( *x );
+	dt_targ_y = bli_obj_target_dt( *y );
+	//dt_targ_a = bli_obj_target_dt( *a );

 	// Determine whether each operand with unit stride.
 	x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
@@ -78,7 +78,7 @@ void bli_ger_front
 	// Create an object to hold a copy-cast of alpha. Notice that we use
 	// the type union of the target datatypes of x and y to prevent any
 	// unnecessary loss of information during the computation.
-	dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y );
+	dt_alpha = bli_dt_union( dt_targ_x, dt_targ_y );
 	bli_obj_scalar_init_detached_copy_of( dt_alpha,
 	                                      BLIS_NO_CONJUGATE,
 	                                      alpha,
--- a/frame/2/ger/other/bli_ger_int.c
+++ b/frame/2/ger/other/bli_ger_int.c
@@ -94,7 +94,7 @@ void bli_ger_int( conj_t  conjx,
 		bli_obj_toggle_conj( x_local );
 		bli_obj_toggle_conj( y_local );

-		bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha ),
+		bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha ),
 		                                      BLIS_CONJUGATE,
 		                                      alpha,
 		                                      &alpha_local );
--- a/frame/2/hemv/bli_hemv_var_oapi.c
+++ b/frame/2/hemv/bli_hemv_var_oapi.c
@@ -51,7 +51,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt        = bli_obj_datatype( *a ); \
+	num_t     dt        = bli_obj_dt( *a ); \
 \
 	uplo_t    uplo      = bli_obj_uplo( *a ); \
 	conj_t    conja     = bli_obj_conj_status( *a ); \
--- a/frame/2/hemv/other/bli_hemv_front.c
+++ b/frame/2/hemv/other/bli_hemv_front.c
@@ -67,9 +67,9 @@ void bli_hemv_front


 	// Query the target datatypes of each object.
-	dt_targ_a = bli_obj_target_datatype( *a );
-	dt_targ_x = bli_obj_target_datatype( *x );
-	dt_targ_y = bli_obj_target_datatype( *y );
+	dt_targ_a = bli_obj_target_dt( *a );
+	dt_targ_x = bli_obj_target_dt( *x );
+	dt_targ_y = bli_obj_target_dt( *y );

 	// Determine whether each operand with unit stride.
 	a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
@@ -81,7 +81,7 @@ void bli_hemv_front
 	// Create an object to hold a copy-cast of alpha. Notice that we use
 	// the type union of the target datatypes of a and x to prevent any
 	// unnecessary loss of information during the computation.
-	dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
+	dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x );
 	bli_obj_scalar_init_detached_copy_of( dt_alpha,
 	                                      BLIS_NO_CONJUGATE,
 	                                      alpha,
--- a/frame/2/her/bli_her_var_oapi.c
+++ b/frame/2/her/bli_her_var_oapi.c
@@ -49,7 +49,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt        = bli_obj_datatype( *c ); \
+	num_t     dt        = bli_obj_dt( *c ); \
 \
 	uplo_t    uplo      = bli_obj_uplo( *c ); \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
--- a/frame/2/her/other/bli_her_front.c
+++ b/frame/2/her/other/bli_her_front.c
@@ -61,8 +61,8 @@ void bli_her_front


 	// Query the target datatypes of each object.
-	dt_targ_x = bli_obj_target_datatype( *x );
-	//dt_targ_c = bli_obj_target_datatype( *c );
+	dt_targ_x = bli_obj_target_dt( *x );
+	//dt_targ_c = bli_obj_target_dt( *c );

 	// Determine whether each operand with unit stride.
 	x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
--- a/frame/2/her2/bli_her2_var_oapi.c
+++ b/frame/2/her2/bli_her2_var_oapi.c
@@ -51,7 +51,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt        = bli_obj_datatype( *c ); \
+	num_t     dt        = bli_obj_dt( *c ); \
 \
 	uplo_t    uplo      = bli_obj_uplo( *c ); \
 	conj_t    conjx     = bli_obj_conj_status( *x ); \
--- a/frame/2/her2/other/bli_her2_front.c
+++ b/frame/2/her2/other/bli_her2_front.c
@@ -65,9 +65,9 @@ void bli_her2_front


 	// Query the target datatypes of each object.
-	dt_targ_x = bli_obj_target_datatype( *x );
-	dt_targ_y = bli_obj_target_datatype( *y );
-	//dt_targ_c = bli_obj_target_datatype( *c );
+	dt_targ_x = bli_obj_target_dt( *x );
+	dt_targ_y = bli_obj_target_dt( *y );
+	//dt_targ_c = bli_obj_target_dt( *c );

 	// Determine whether each operand with unit stride.
 	x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
@@ -78,7 +78,7 @@ void bli_her2_front

 	// Create an object to hold a copy-cast of alpha. Notice that we use
 	// the type union of the datatypes of x and y.
-	dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y );
+	dt_alpha = bli_dt_union( dt_targ_x, dt_targ_y );
 	bli_obj_scalar_init_detached_copy_of( dt_alpha,
 	                                      BLIS_NO_CONJUGATE,
 	                                      alpha,
--- a/frame/2/her2/other/bli_her2_int.c
+++ b/frame/2/her2/other/bli_her2_int.c
@@ -98,11 +98,11 @@ void bli_her2_int( conj_t  conjh,
 		bli_obj_toggle_conj( x_local );
 		bli_obj_toggle_conj( y_local );

-		bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha ),
+		bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha ),
 		                                      BLIS_CONJUGATE,
 		                                      alpha,
 		                                      &alpha_local );
-		bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha_conj ),
+		bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha_conj ),
 		                                      BLIS_CONJUGATE,
 		                                      alpha_conj,
 		                                      &alpha_conj_local );
--- a/frame/2/symv/other/bli_symv_front.c
+++ b/frame/2/symv/other/bli_symv_front.c
@@ -67,9 +67,9 @@ void bli_symv_front


 	// Query the target datatypes of each object.
-	dt_targ_a = bli_obj_target_datatype( *a );
-	dt_targ_x = bli_obj_target_datatype( *x );
-	dt_targ_y = bli_obj_target_datatype( *y );
+	dt_targ_a = bli_obj_target_dt( *a );
+	dt_targ_x = bli_obj_target_dt( *x );
+	dt_targ_y = bli_obj_target_dt( *y );

 	// Determine whether each operand with unit stride.
 	a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
@@ -81,7 +81,7 @@ void bli_symv_front
 	// Create an object to hold a copy-cast of alpha. Notice that we use
 	// the type union of the target datatypes of a and x to prevent any
 	// unnecessary loss of information during the computation.
-	dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
+	dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x );
 	bli_obj_scalar_init_detached_copy_of( dt_alpha,
 	                                      BLIS_NO_CONJUGATE,
 	                                      alpha,
--- a/frame/2/syr/other/bli_syr_front.c
+++ b/frame/2/syr/other/bli_syr_front.c
@@ -61,8 +61,8 @@ void bli_syr_front


 	// Query the target datatypes of each object.
-	dt_targ_x = bli_obj_target_datatype( *x );
-	dt_targ_c = bli_obj_target_datatype( *c );
+	dt_targ_x = bli_obj_target_dt( *x );
+	dt_targ_c = bli_obj_target_dt( *c );

 	// Determine whether each operand with unit stride.
 	x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
@@ -73,7 +73,7 @@ void bli_syr_front
 	// Create an object to hold a copy-cast of alpha. Notice that we use
 	// the type union of the target datatypes of x and c to prevent any
 	// unnecessary loss of information during the computation.
-	dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_c );
+	dt_alpha = bli_dt_union( dt_targ_x, dt_targ_c );
 	bli_obj_scalar_init_detached_copy_of( dt_alpha,
 	                                      BLIS_NO_CONJUGATE,
 	                                      alpha,
--- a/frame/2/syr2/other/bli_syr2_front.c
+++ b/frame/2/syr2/other/bli_syr2_front.c
@@ -64,9 +64,9 @@ void bli_syr2_front


 	// Query the target datatypes of each object.
-	dt_targ_x = bli_obj_target_datatype( *x );
-	dt_targ_y = bli_obj_target_datatype( *y );
-	//dt_targ_c = bli_obj_target_datatype( *c );
+	dt_targ_x = bli_obj_target_dt( *x );
+	dt_targ_y = bli_obj_target_dt( *y );
+	//dt_targ_c = bli_obj_target_dt( *c );

 	// Determine whether each operand with unit stride.
 	x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
@@ -77,7 +77,7 @@ void bli_syr2_front

 	// Create an object to hold a copy-cast of alpha. Notice that we use
 	// the type union of the datatypes of x and y.
-	dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y );
+	dt_alpha = bli_dt_union( dt_targ_x, dt_targ_y );
 	bli_obj_scalar_init_detached_copy_of( dt_alpha,
 	                                      BLIS_NO_CONJUGATE,
 	                                      alpha,
--- a/frame/2/trmv/bli_trmv_var_oapi.c
+++ b/frame/2/trmv/bli_trmv_var_oapi.c
@@ -48,7 +48,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt        = bli_obj_datatype( *a ); \
+	num_t     dt        = bli_obj_dt( *a ); \
 \
 	uplo_t    uploa     = bli_obj_uplo( *a ); \
 	trans_t   transa    = bli_obj_conjtrans_status( *a ); \
--- a/frame/2/trmv/other/bli_trmv_front.c
+++ b/frame/2/trmv/other/bli_trmv_front.c
@@ -61,8 +61,8 @@ void bli_trmv_front


 	// Query the target datatypes of each object.
-	dt_targ_a = bli_obj_target_datatype( *a );
-	dt_targ_x = bli_obj_target_datatype( *x );
+	dt_targ_a = bli_obj_target_dt( *a );
+	dt_targ_x = bli_obj_target_dt( *x );

 	// Determine whether each operand with unit stride.
 	a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
@@ -73,7 +73,7 @@ void bli_trmv_front
 	// Create an object to hold a copy-cast of alpha. Notice that we use
 	// the type union of the target datatypes of a and x to prevent any
 	// unnecessary loss of information during the computation.
-	dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
+	dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x );
 	bli_obj_scalar_init_detached_copy_of( dt_alpha,
 	                                      BLIS_NO_CONJUGATE,
 	                                      alpha,
--- a/frame/2/trsv/bli_trsv_var_oapi.c
+++ b/frame/2/trsv/bli_trsv_var_oapi.c
@@ -48,7 +48,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt        = bli_obj_datatype( *a ); \
+	num_t     dt        = bli_obj_dt( *a ); \
 \
 	uplo_t    uploa     = bli_obj_uplo( *a ); \
 	trans_t   transa    = bli_obj_conjtrans_status( *a ); \
--- a/frame/2/trsv/other/bli_trsv_front.c
+++ b/frame/2/trsv/other/bli_trsv_front.c
@@ -61,8 +61,8 @@ void bli_trsv_front


 	// Query the target datatypes of each object.
-	dt_targ_a = bli_obj_datatype( *a );
-	dt_targ_x = bli_obj_datatype( *x );
+	dt_targ_a = bli_obj_dt( *a );
+	dt_targ_x = bli_obj_dt( *x );

 	// Determine whether each operand with unit stride.
 	a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
@@ -73,7 +73,7 @@ void bli_trsv_front
 	// Create an object to hold a copy-cast of alpha. Notice that we use
 	// the type union of the target datatypes of a and x to prevent any
 	// unnecessary loss of information during the computation.
-	dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
+	dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x );
 	bli_obj_scalar_init_detached_copy_of( dt_alpha,
 	                                      BLIS_NO_CONJUGATE,
 	                                      alpha,
--- a/frame/3/bli_l3_blocksize.c
+++ b/frame/3/bli_l3_blocksize.c
@@ -130,7 +130,7 @@ dim_t PASTEMAC0(opname) \
 \
 	/* Extract the execution datatype and use it to query the corresponding
 	   blocksize and blocksize maximum values from the blksz_t object. */ \
-	dt    = bli_obj_execution_datatype( *a ); \
+	dt    = bli_obj_exec_dt( *a ); \
 	bsize = bli_cntx_get_blksz( bszid, cntx ); \
 	b_alg = bli_blksz_get_def( dt, bsize ); \
 	b_max = bli_blksz_get_max( dt, bsize ); \
@@ -196,7 +196,7 @@ dim_t PASTEMAC0(opname) \
 \
 	/* Extract the execution datatype and use it to query the corresponding
 	   blocksize and blocksize maximum values from the blksz_t object. */ \
-	dt    = bli_obj_execution_datatype( *a ); \
+	dt    = bli_obj_exec_dt( *a ); \
 	bsize = bli_cntx_get_blksz( bszid, cntx ); \
 	b_alg = bli_blksz_get_def( dt, bsize ); \
 	b_max = bli_blksz_get_max( dt, bsize ); \
@@ -249,7 +249,7 @@ dim_t PASTEMAC0(opname) \
 \
 	/* Extract the execution datatype and use it to query the corresponding
 	   blocksize and blocksize maximum values from the blksz_t object. */ \
-	dt    = bli_obj_execution_datatype( *a ); \
+	dt    = bli_obj_exec_dt( *a ); \
 	bsize = bli_cntx_get_blksz( bszid, cntx ); \
 	b_alg = bli_blksz_get_def( dt, bsize ); \
 	b_max = bli_blksz_get_max( dt, bsize ); \
@@ -310,7 +310,7 @@ dim_t PASTEMAC0(opname) \
 \
 	/* Extract the execution datatype and use it to query the corresponding
 	   blocksize and blocksize maximum values from the blksz_t object. */ \
-	dt    = bli_obj_execution_datatype( *a ); \
+	dt    = bli_obj_exec_dt( *a ); \
 	bsize = bli_cntx_get_blksz( bszid, cntx ); \
 	b_alg = bli_blksz_get_def( dt, bsize ); \
 	b_max = bli_blksz_get_max( dt, bsize ); \
--- a/frame/3/bli_l3_check.c
+++ b/frame/3/bli_l3_check.c
@@ -479,7 +479,7 @@ void bli_l3_basic_check

 	// Check for sufficiently sized stack buffers

-	e_val = bli_check_sufficient_stack_buf_size( bli_obj_datatype( *a ), cntx );
+	e_val = bli_check_sufficient_stack_buf_size( bli_obj_dt( *a ), cntx );
 	bli_check_error_code( e_val );
 }

--- a/frame/3/bli_l3_ukr_oapi.c
+++ b/frame/3/bli_l3_ukr_oapi.c
@@ -49,7 +49,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt        = bli_obj_datatype( *c ); \
+	num_t     dt        = bli_obj_dt( *c ); \
 \
 	dim_t     k         = bli_obj_width( *a ); \
 	void*     buf_a     = bli_obj_buffer_at_off( *a ); \
@@ -100,7 +100,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt        = bli_obj_datatype( *c ); \
+	num_t     dt        = bli_obj_dt( *c ); \
 \
 	void*     buf_a     = bli_obj_buffer_at_off( *a ); \
 	void*     buf_b     = bli_obj_buffer_at_off( *b ); \
@@ -164,7 +164,7 @@ void PASTEMAC0(opname) \
 { \
 	bli_init_once(); \
 \
-	num_t     dt        = bli_obj_datatype( *c11 ); \
+	num_t     dt        = bli_obj_dt( *c11 ); \
 \
 	dim_t     k         = bli_obj_width( *a1x ); \
 	void*     buf_a1x   = bli_obj_buffer_at_off( *a1x ); \
--- a/frame/3/gemm/bli_gemm_ker_var2.c
+++ b/frame/3/gemm/bli_gemm_ker_var2.c
@@ -66,7 +66,7 @@ void bli_gemm_ker_var2
       thrinfo_t* thread
     )
 {
-	num_t     dt_exec   = bli_obj_execution_datatype( *c );
+	num_t     dt_exec   = bli_obj_exec_dt( *c );

 	pack_t    schema_a  = bli_obj_pack_schema( *a );
 	pack_t    schema_b  = bli_obj_pack_schema( *b );
--- a/frame/3/gemm/ind/bli_gemm4mb_ker_var2.c
+++ b/frame/3/gemm/ind/bli_gemm4mb_ker_var2.c
@@ -66,7 +66,7 @@ void bli_gemm4mb_ker_var2
       thrinfo_t* thread
     )
 {
-	num_t     dt_exec   = bli_obj_execution_datatype( *c );
+	num_t     dt_exec   = bli_obj_exec_dt( *c );

 	pack_t    schema_a  = bli_obj_pack_schema( *a );
 	pack_t    schema_b  = bli_obj_pack_schema( *b );
--- a/frame/3/gemm/ind/old/bli_gemm3m2_ker_var2.c
+++ b/frame/3/gemm/ind/old/bli_gemm3m2_ker_var2.c
@@ -66,7 +66,7 @@ void bli_gemm3m2_ker_var2
       thrinfo_t* thread
     )
 {
-	num_t     dt_exec   = bli_obj_execution_datatype( *c );
+	num_t     dt_exec   = bli_obj_exec_dt( *c );

 	pack_t    schema_a  = bli_obj_pack_schema( *a );
 	pack_t    schema_b  = bli_obj_pack_schema( *b );
--- a/frame/3/gemm/other/bli_gemm_ker_var5.c
+++ b/frame/3/gemm/other/bli_gemm_ker_var5.c
@@ -58,7 +58,7 @@ void bli_gemm_ker_var5( obj_t*  a,
                        gemm_t* cntl,
                        gemm_thrinfo_t* thread )
 {
-	num_t     dt_exec   = bli_obj_execution_datatype( *c );
+	num_t     dt_exec   = bli_obj_exec_dt( *c );

 	dim_t     m         = bli_obj_length( *c );
 	dim_t     n         = bli_obj_width( *c );
--- a/frame/3/her2k/bli_her2k_front.c
+++ b/frame/3/her2k/bli_her2k_front.c
@@ -83,7 +83,7 @@ void bli_her2k_front
 	bli_obj_toggle_conj( ah_local );

 	// Initialize a conjugated copy of alpha.
-	bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ),
+	bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *a ),
 	                                      BLIS_CONJUGATE,
 	                                      alpha,
 	                                      &alpha_conj );
--- a/frame/3/herk/bli_herk_l_ker_var2.c
+++ b/frame/3/herk/bli_herk_l_ker_var2.c
@@ -67,7 +67,7 @@ void bli_herk_l_ker_var2
       thrinfo_t* thread
     )
 {
-	num_t     dt_exec   = bli_obj_execution_datatype( *c );
+	num_t     dt_exec   = bli_obj_exec_dt( *c );

 	doff_t    diagoffc  = bli_obj_diag_offset( *c );

--- a/frame/3/herk/bli_herk_u_ker_var2.c
+++ b/frame/3/herk/bli_herk_u_ker_var2.c
@@ -67,7 +67,7 @@ void bli_herk_u_ker_var2
       thrinfo_t* thread
     )
 {
-	num_t     dt_exec   = bli_obj_execution_datatype( *c );
+	num_t     dt_exec   = bli_obj_exec_dt( *c );

 	doff_t    diagoffc  = bli_obj_diag_offset( *c );

--- a/frame/3/trmm/bli_trmm_ll_ker_var2.c
+++ b/frame/3/trmm/bli_trmm_ll_ker_var2.c
@@ -65,7 +65,7 @@ void bli_trmm_ll_ker_var2
       thrinfo_t* thread
     )
 {
-	num_t     dt_exec   = bli_obj_execution_datatype( *c );
+	num_t     dt_exec   = bli_obj_exec_dt( *c );

 	doff_t    diagoffa  = bli_obj_diag_offset( *a );

--- a/frame/3/trmm/bli_trmm_lu_ker_var2.c
+++ b/frame/3/trmm/bli_trmm_lu_ker_var2.c
@@ -65,7 +65,7 @@ void bli_trmm_lu_ker_var2
       thrinfo_t* thread
     )
 {
-	num_t     dt_exec   = bli_obj_execution_datatype( *c );
+	num_t     dt_exec   = bli_obj_exec_dt( *c );

 	doff_t    diagoffa  = bli_obj_diag_offset( *a );

--- a/frame/3/trmm/bli_trmm_rl_ker_var2.c
+++ b/frame/3/trmm/bli_trmm_rl_ker_var2.c
@@ -65,7 +65,7 @@ void bli_trmm_rl_ker_var2
       thrinfo_t* thread
     )
 {
-	num_t     dt_exec   = bli_obj_execution_datatype( *c );
+	num_t     dt_exec   = bli_obj_exec_dt( *c );

 	doff_t    diagoffb  = bli_obj_diag_offset( *b );

--- a/frame/3/trmm/bli_trmm_ru_ker_var2.c
+++ b/frame/3/trmm/bli_trmm_ru_ker_var2.c
@@ -65,7 +65,7 @@ void bli_trmm_ru_ker_var2
       thrinfo_t* thread
     )
 {
-	num_t     dt_exec   = bli_obj_execution_datatype( *c );
+	num_t     dt_exec   = bli_obj_exec_dt( *c );

 	doff_t    diagoffb  = bli_obj_diag_offset( *b );

--- a/frame/3/trsm/bli_trsm_ll_ker_var2.c
+++ b/frame/3/trsm/bli_trsm_ll_ker_var2.c
@@ -65,7 +65,7 @@ void bli_trsm_ll_ker_var2
       thrinfo_t* thread
     )
 {
-	num_t     dt_exec   = bli_obj_execution_datatype( *c );
+	num_t     dt_exec   = bli_obj_exec_dt( *c );

 	doff_t    diagoffa  = bli_obj_diag_offset( *a );

--- a/frame/3/trsm/bli_trsm_lu_ker_var2.c
+++ b/frame/3/trsm/bli_trsm_lu_ker_var2.c
@@ -65,7 +65,7 @@ void bli_trsm_lu_ker_var2
       thrinfo_t* thread
     )
 {
-	num_t     dt_exec   = bli_obj_execution_datatype( *c );
+	num_t     dt_exec   = bli_obj_exec_dt( *c );

 	doff_t    diagoffa  = bli_obj_diag_offset( *a );

--- a/frame/3/trsm/bli_trsm_rl_ker_var2.c
+++ b/frame/3/trsm/bli_trsm_rl_ker_var2.c
@@ -65,7 +65,7 @@ void bli_trsm_rl_ker_var2
       thrinfo_t* thread
     )
 {
-	num_t     dt_exec   = bli_obj_execution_datatype( *c );
+	num_t     dt_exec   = bli_obj_exec_dt( *c );

 	doff_t    diagoffb  = bli_obj_diag_offset( *b );

--- a/frame/3/trsm/bli_trsm_ru_ker_var2.c
+++ b/frame/3/trsm/bli_trsm_ru_ker_var2.c
@@ -65,7 +65,7 @@ void bli_trsm_ru_ker_var2
       thrinfo_t* thread
     )
 {
-	num_t     dt_exec   = bli_obj_execution_datatype( *c );
+	num_t     dt_exec   = bli_obj_exec_dt( *c );

 	doff_t    diagoffb  = bli_obj_diag_offset( *b );

--- a/frame/base/bli_blksz.c
+++ b/frame/base/bli_blksz.c
@@ -261,7 +261,7 @@ dim_t bli_determine_blocksize_f

 	// Extract the execution datatype and use it to query the corresponding
 	// blocksize and blocksize maximum values from the blksz_t object.
-	dt    = bli_obj_execution_datatype( *obj );
+	dt    = bli_obj_exec_dt( *obj );
 	bsize = bli_cntx_get_blksz( bszid, cntx );
 	b_alg = bli_blksz_get_def( dt, bsize );
 	b_max = bli_blksz_get_max( dt, bsize );
@@ -287,7 +287,7 @@ dim_t bli_determine_blocksize_b

 	// Extract the execution datatype and use it to query the corresponding
 	// blocksize and blocksize maximum values from the blksz_t object.
-	dt    = bli_obj_execution_datatype( *obj );
+	dt    = bli_obj_exec_dt( *obj );
 	bsize = bli_cntx_get_blksz( bszid, cntx );
 	b_alg = bli_blksz_get_def( dt, bsize );
 	b_max = bli_blksz_get_max( dt, bsize );
--- a/frame/base/bli_check.c
+++ b/frame/base/bli_check.c
@@ -159,7 +159,7 @@ err_t bli_check_object_valid_datatype( obj_t* a )
 	err_t e_val;
 	num_t dt;

-	dt = bli_obj_datatype( *a );
+	dt = bli_obj_dt( *a );
 	e_val = bli_check_valid_datatype( dt );

 	return e_val;
@@ -180,7 +180,7 @@ err_t bli_check_noninteger_object( obj_t* a )
 	err_t e_val;
 	num_t dt;

-	dt = bli_obj_datatype( *a );
+	dt = bli_obj_dt( *a );
 	e_val = bli_check_noninteger_datatype( dt );

 	return e_val;
@@ -201,7 +201,7 @@ err_t bli_check_nonconstant_object( obj_t* a )
 	err_t e_val;
 	num_t dt;

-	dt = bli_obj_datatype( *a );
+	dt = bli_obj_dt( *a );
 	e_val = bli_check_nonconstant_datatype( dt );

 	return e_val;
@@ -225,7 +225,7 @@ err_t bli_check_floating_object( obj_t* a )
 	err_t e_val;
 	num_t dt;

-	dt = bli_obj_datatype( *a );
+	dt = bli_obj_dt( *a );
 	e_val = bli_check_floating_datatype( dt );

 	return e_val;
@@ -247,7 +247,7 @@ err_t bli_check_real_object( obj_t* a )
 	err_t e_val;
 	num_t dt;

-	dt = bli_obj_datatype( *a );
+	dt = bli_obj_dt( *a );
 	e_val = bli_check_real_datatype( dt );

 	return e_val;
@@ -268,7 +268,7 @@ err_t bli_check_integer_object( obj_t* a )
 	err_t e_val;
 	num_t dt;

-	dt = bli_obj_datatype( *a );
+	dt = bli_obj_dt( *a );
 	e_val = bli_check_integer_datatype( dt );

 	return e_val;
@@ -292,8 +292,8 @@ err_t bli_check_consistent_object_datatypes( obj_t* a, obj_t* b )
 	num_t dt_a;
 	num_t dt_b;

-	dt_a = bli_obj_datatype( *a );
-	dt_b = bli_obj_datatype( *b );
+	dt_a = bli_obj_dt( *a );
+	dt_b = bli_obj_dt( *b );

 	e_val = bli_check_consistent_datatypes( dt_a, dt_b );

@@ -320,8 +320,8 @@ err_t bli_check_object_real_proj_of( obj_t* c, obj_t* r )
 	num_t dt_c;
 	num_t dt_r;

-	dt_c = bli_obj_datatype( *c );
-	dt_r = bli_obj_datatype( *r );
+	dt_c = bli_obj_dt( *c );
+	dt_r = bli_obj_dt( *r );

 	e_val = bli_check_datatype_real_proj_of( dt_c, dt_r );

@@ -773,7 +773,7 @@ err_t bli_check_sufficient_stack_buf_size( num_t dt, cntx_t* cntx )

 	dim_t mr      = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx );
 	dim_t nr      = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx );
-	siz_t dt_size = bli_datatype_size( dt );
+	siz_t dt_size = bli_dt_size( dt );

 	// NOTE: For induced methods, we use the size of the complex datatypes
 	// (rather than the size of the native micro-kernels' datatype) because
--- a/frame/base/bli_cntx.h
+++ b/frame/base/bli_cntx.h
@@ -469,7 +469,7 @@ static bool_t bli_cntx_l3_nat_ukr_prefers_cols_dt( num_t dt, l3ukr_t ukr_id, cnt

 static bool_t bli_cntx_l3_nat_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
 {
-	const num_t  dt    = bli_obj_datatype( *obj );
+	const num_t  dt    = bli_obj_dt( *obj );
 	const bool_t ukr_prefers_rows
 	                   = bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx );
 	const bool_t ukr_prefers_cols
@@ -514,7 +514,7 @@ static bool_t bli_cntx_l3_ukr_prefers_rows_dt( num_t dt, l3ukr_t ukr_id, cntx_t*
 	// For induced methods, return the ukernel storage preferences of the
 	// corresponding real micro-kernel.
 	if ( bli_cntx_method( cntx ) != BLIS_NAT )
-	    dt = bli_datatype_proj_to_real( dt );
+	    dt = bli_dt_proj_to_real( dt );

 	return bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx );
 }
@@ -524,14 +524,14 @@ static bool_t bli_cntx_l3_ukr_prefers_cols_dt( num_t dt, l3ukr_t ukr_id, cntx_t*
 	// For induced methods, return the ukernel storage preferences of the
 	// corresponding real micro-kernel.
 	if ( bli_cntx_method( cntx ) != BLIS_NAT )
-	    dt = bli_datatype_proj_to_real( dt );
+	    dt = bli_dt_proj_to_real( dt );

 	return bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, ukr_id, cntx );
 }

 static bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
 {
-	const num_t  dt    = bli_obj_datatype( *obj );
+	const num_t  dt    = bli_obj_dt( *obj );
 	const bool_t ukr_prefers_rows
 	                   = bli_cntx_l3_ukr_prefers_rows_dt( dt, ukr_id, cntx );
 	const bool_t ukr_prefers_cols
--- a/frame/base/bli_machval.c
+++ b/frame/base/bli_machval.c
@@ -51,7 +51,7 @@ static FUNCPTR_T GENARRAY(ftypes,machval);
 void bli_machval( machval_t mval,
                  obj_t*    v )
 {
-	num_t     dt_v  = bli_obj_datatype( *v );
+	num_t     dt_v  = bli_obj_dt( *v );

 	void*     buf_v = bli_obj_buffer_at_off( *v );

--- a/frame/base/bli_membrk.c
+++ b/frame/base/bli_membrk.c
@@ -412,7 +412,7 @@ void bli_membrk_compute_pool_block_sizes_dt
       cntx_t* cntx
     )
 {
-	siz_t    size_dt = bli_datatype_size( dt );
+	siz_t    size_dt = bli_dt_size( dt );

 	blksz_t* mr;
 	blksz_t* nr;
--- a/frame/base/bli_obj.c
+++ b/frame/base/bli_obj.c
@@ -77,7 +77,7 @@ void bli_obj_create_without_buffer( num_t  dt,
 		bli_obj_create_without_buffer_check( dt, m, n, obj );

 	// Query the size of one element of the object's pre-set datatype.
-	elem_size = bli_datatype_size( dt );
+	elem_size = bli_dt_size( dt );

 	// Set any default properties that are appropriate.
 	bli_obj_set_defaults( *obj );
@@ -95,10 +95,10 @@ void bli_obj_create_without_buffer( num_t  dt,

 	// Set individual fields.
 	bli_obj_set_buffer( NULL, *obj );
-	bli_obj_set_datatype( dt, *obj );
+	bli_obj_set_dt( dt, *obj );
 	bli_obj_set_elem_size( elem_size, *obj );
-	bli_obj_set_target_datatype( dt, *obj );
-	bli_obj_set_execution_datatype( dt, *obj );
+	bli_obj_set_target_dt( dt, *obj );
+	bli_obj_set_exec_dt( dt, *obj );
 	bli_obj_set_dims( m, n, *obj );
 	bli_obj_set_offs( 0, 0, *obj );
 	bli_obj_set_diag_offset( 0, *obj );
@@ -220,7 +220,7 @@ void bli_obj_create_1x1_with_attached_buffer( num_t  dt,

 void bli_obj_create_conf_to( obj_t* s, obj_t* d )
 {
-	const num_t dt = bli_obj_datatype( *s );
+	const num_t dt = bli_obj_dt( *s );
 	const dim_t m  = bli_obj_length( *s );
 	const dim_t n  = bli_obj_width( *s );
 	const inc_t rs = bli_obj_row_stride( *s );
@@ -422,10 +422,10 @@ static siz_t dt_sizes[6] =
 	sizeof( constdata_t )
 };

-siz_t bli_datatype_size( num_t dt )
+siz_t bli_dt_size( num_t dt )
 {
 	if ( bli_error_checking_is_enabled() )
-		bli_datatype_size_check( dt );
+		bli_dt_size_check( dt );

 	return dt_sizes[dt];
 }
@@ -439,10 +439,10 @@ static char* dt_names[ BLIS_NUM_FP_TYPES+1 ] =
 	"int"
 };

-char* bli_datatype_string( num_t dt )
+char* bli_dt_string( num_t dt )
 {
 	if ( bli_error_checking_is_enabled() )
-		bli_datatype_string_check( dt );
+		bli_dt_string_check( dt );

 	return dt_names[dt];
 }
@@ -493,10 +493,10 @@ static num_t type_union[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES] =
 	/* z */ { BLIS_DCOMPLEX, BLIS_DCOMPLEX, BLIS_DCOMPLEX, BLIS_DCOMPLEX }
 };

-num_t bli_datatype_union( num_t dt1, num_t dt2 )
+num_t bli_dt_union( num_t dt1, num_t dt2 )
 {
 	if ( bli_error_checking_is_enabled() )
-		bli_datatype_union_check( dt1, dt2 );
+		bli_dt_union_check( dt1, dt2 );

 	return type_union[dt1][dt2];
 }
@@ -536,9 +536,9 @@ void bli_obj_print( char* label, obj_t* obj )
 	fprintf( file, " info            %lX\n", ( unsigned long int )(*obj).info );
 	fprintf( file, " - is complex    %lu\n", ( unsigned long int )bli_obj_is_complex( *obj ) );
 	fprintf( file, " - is d. prec    %lu\n", ( unsigned long int )bli_obj_is_double_precision( *obj ) );
-	fprintf( file, " - datatype      %lu\n", ( unsigned long int )bli_obj_datatype( *obj ) );
-	fprintf( file, " - target dt     %lu\n", ( unsigned long int )bli_obj_target_datatype( *obj ) );
-	fprintf( file, " - exec dt       %lu\n", ( unsigned long int )bli_obj_execution_datatype( *obj ) );
+	fprintf( file, " - datatype      %lu\n", ( unsigned long int )bli_obj_dt( *obj ) );
+	fprintf( file, " - target dt     %lu\n", ( unsigned long int )bli_obj_target_dt( *obj ) );
+	fprintf( file, " - exec dt       %lu\n", ( unsigned long int )bli_obj_exec_dt( *obj ) );
 	fprintf( file, " - has trans     %lu\n", ( unsigned long int )bli_obj_has_trans( *obj ) );
 	fprintf( file, " - has conj      %lu\n", ( unsigned long int )bli_obj_has_conj( *obj ) );
 	fprintf( file, " - unit diag?    %lu\n", ( unsigned long int )bli_obj_has_unit_diag( *obj ) );
--- a/frame/base/bli_obj.h
+++ b/frame/base/bli_obj.h
@@ -87,14 +87,14 @@ void bli_adjust_strides( dim_t  m,
                         inc_t* cs,
                         inc_t* is );

-siz_t bli_datatype_size( num_t dt );
-char* bli_datatype_string( num_t dt );
+siz_t bli_dt_size( num_t dt );
+char* bli_dt_string( num_t dt );

 dim_t bli_align_dim_to_mult( dim_t dim, dim_t dim_mult );
 dim_t bli_align_dim_to_size( dim_t dim, siz_t elem_size, siz_t align_size );
 dim_t bli_align_ptr_to_size( void* p, size_t align_size );

-num_t bli_datatype_union( num_t dt1, num_t dt2 );
+num_t bli_dt_union( num_t dt1, num_t dt2 );

 void bli_obj_print( char* label, obj_t* obj );

--- a/frame/base/bli_obj_scalar.c
+++ b/frame/base/bli_obj_scalar.c
@@ -74,7 +74,7 @@ void bli_obj_scalar_init_detached_copy_of( num_t  dt,
 void bli_obj_scalar_detach( obj_t* a,
                            obj_t* alpha )
 {
-	num_t dt_a = bli_obj_datatype( *a );
+	num_t dt_a = bli_obj_dt( *a );

 	// Initialize alpha to be a bufferless internal scalar of the same
 	// datatype as A.
@@ -92,7 +92,7 @@ void bli_obj_scalar_attach( conj_t conj,

 	// Make a copy-cast of alpha of the same datatype as A. This step
 	// gives us the opportunity to conjugate and/or typecast alpha.
-	bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ),
+	bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *a ),
 	                                      conj,
 	                                      alpha,
 	                                      &alpha_cast );
@@ -109,7 +109,7 @@ void bli_obj_scalar_apply_scalar( obj_t* alpha,

 	// Make a copy-cast of alpha of the same datatype as A. This step
 	// gives us the opportunity to typecast alpha.
-	bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ),
+	bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *a ),
 	                                      BLIS_NO_CONJUGATE,
 	                                      alpha,
 	                                      &alpha_cast );
@@ -125,7 +125,7 @@ void bli_obj_scalar_apply_scalar( obj_t* alpha,

 void bli_obj_scalar_reset( obj_t* a )
 {
-	num_t dt       = bli_obj_datatype( *a );
+	num_t dt       = bli_obj_dt( *a );
 	void* scalar_a = bli_obj_internal_scalar_buffer( *a );
 	void* one      = bli_obj_buffer_for_const( dt, BLIS_ONE );

@@ -141,7 +141,7 @@ void bli_obj_scalar_reset( obj_t* a )
 bool_t bli_obj_scalar_has_nonzero_imag( obj_t* a )
 {
 	bool_t r_val     = FALSE;
-	num_t  dt        = bli_obj_datatype( *a );
+	num_t  dt        = bli_obj_dt( *a );
 	void*  scalar_a  = bli_obj_internal_scalar_buffer( *a );

 	if      ( bli_is_real( dt ) )
--- a/frame/base/bli_query.c
+++ b/frame/base/bli_query.c
@@ -47,8 +47,8 @@ bool_t bli_obj_equals( obj_t* a,
 	     !bli_obj_is_1x1( *b ) )
 		bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );

-	dt_a = bli_obj_datatype( *a );
-	dt_b = bli_obj_datatype( *b );
+	dt_a = bli_obj_dt( *a );
+	dt_b = bli_obj_dt( *b );

 	// If B is BLIS_CONSTANT, then we need to test equality based on the
 	// datatype of A--this works even if A is also BLIS_CONSTANT. If B
@@ -90,8 +90,8 @@ bool_t bli_obj_imag_equals( obj_t* a,
 	num_t  dt_a;
 	num_t  dt_b;

-	dt_a = bli_obj_datatype( *a );
-	dt_b = bli_obj_datatype( *b );
+	dt_a = bli_obj_dt( *a );
+	dt_b = bli_obj_dt( *b );

 	// The function is not yet implemented for vectors and matrices.
 	if ( !bli_obj_is_1x1( *a ) ||
@@ -108,7 +108,7 @@ bool_t bli_obj_imag_equals( obj_t* a,
 	}
 	else // if ( bli_is_complex( dt_a ) )
 	{
-		num_t dt_a_real = bli_datatype_proj_to_real( dt_a );
+		num_t dt_a_real = bli_dt_proj_to_real( dt_a );

 		// Now we compare the imaginary part of a to b. Notice that since
 		// we are using bli_obj_buffer_for_1x1() to acquire the buffer for
--- a/frame/base/bli_setgetij.c
+++ b/frame/base/bli_setgetij.c
@@ -57,7 +57,7 @@ err_t bli_setijm
 	dim_t n  = bli_obj_width( *b );
 	dim_t rs = bli_obj_row_stride( *b );
 	dim_t cs = bli_obj_col_stride( *b );
-	num_t dt = bli_obj_datatype( *b );
+	num_t dt = bli_obj_dt( *b );

 	// Return error if i or j is beyond bounds of matrix/vector.
 	if ( m <= i ) return BLIS_FAILURE;
@@ -131,7 +131,7 @@ err_t bli_getijm
 	dim_t n  = bli_obj_width( *b );
 	dim_t rs = bli_obj_row_stride( *b );
 	dim_t cs = bli_obj_col_stride( *b );
-	num_t dt = bli_obj_datatype( *b );
+	num_t dt = bli_obj_dt( *b );

 	// Return error if i or j is beyond bounds of matrix/vector.
 	if ( m <= i ) return BLIS_FAILURE;
--- a/frame/base/check/bli_obj_check.c
+++ b/frame/base/check/bli_obj_check.c
@@ -158,7 +158,7 @@ void bli_obj_create_const_copy_of_check( obj_t* a, obj_t* b )
 }
 #endif

-void bli_datatype_size_check( num_t dt )
+void bli_dt_size_check( num_t dt )
 {
 	err_t e_val;

@@ -166,7 +166,7 @@ void bli_datatype_size_check( num_t dt )
 	bli_check_error_code( e_val );
 }

-void bli_datatype_string_check( num_t dt )
+void bli_dt_string_check( num_t dt )
 {
 	err_t e_val;

@@ -174,7 +174,7 @@ void bli_datatype_string_check( num_t dt )
 	bli_check_error_code( e_val );
 }

-void bli_datatype_union_check( num_t dt1, num_t dt2 )
+void bli_dt_union_check( num_t dt1, num_t dt2 )
 {
 	err_t e_val;

--- a/frame/base/check/bli_obj_check.h
+++ b/frame/base/check/bli_obj_check.h
@@ -64,11 +64,11 @@ void bli_obj_create_const_check( double value, obj_t* obj );

 void bli_obj_create_const_copy_of_check( obj_t* a, obj_t* b );

-void bli_datatype_size_check( num_t dt );
+void bli_dt_size_check( num_t dt );

-void bli_datatype_string_check( num_t dt );
+void bli_dt_string_check( num_t dt );

-void bli_datatype_union_check( num_t dt1, num_t dt2 );
+void bli_dt_union_check( num_t dt1, num_t dt2 );

 void bli_obj_print_check( char* label, obj_t* obj );

--- a/frame/include/bli_obj_macro_defs.h
+++ b/frame/include/bli_obj_macro_defs.h
@@ -85,23 +85,23 @@
 \
 	( ( (obj).info & BLIS_PRECISION_BIT ) == BLIS_BITVAL_DOUBLE_PREC )

-#define bli_obj_datatype( obj ) \
+#define bli_obj_dt( obj ) \
 \
 	(   (obj).info & BLIS_DATATYPE_BITS )

-#define bli_obj_datatype_proj_to_real( obj ) \
+#define bli_obj_dt_proj_to_real( obj ) \
 \
 	( ( (obj).info & BLIS_DATATYPE_BITS ) & ~BLIS_BITVAL_COMPLEX )

-#define bli_obj_datatype_proj_to_complex( obj ) \
+#define bli_obj_dt_proj_to_complex( obj ) \
 \
 	( ( (obj).info & BLIS_DATATYPE_BITS ) &  BLIS_BITVAL_COMPLEX )

-#define bli_obj_target_datatype( obj ) \
+#define bli_obj_target_dt( obj ) \
 \
 	( ( (obj).info & BLIS_TARGET_DT_BITS ) >> BLIS_TARGET_DT_SHIFT )

-#define bli_obj_execution_datatype( obj ) \
+#define bli_obj_exec_dt( obj ) \
 \
 	( ( (obj).info & BLIS_EXECUTION_DT_BITS ) >> BLIS_EXECUTION_DT_SHIFT )

@@ -277,17 +277,17 @@
 	(obj).info = ( (obj).info & ~BLIS_INVERT_DIAG_BIT ) | (inv_diag); \
 }

-#define bli_obj_set_datatype( dt, obj ) \
+#define bli_obj_set_dt( dt, obj ) \
 { \
 	(obj).info = ( (obj).info & ~BLIS_DATATYPE_BITS ) | (dt); \
 }

-#define bli_obj_set_target_datatype( dt, obj ) \
+#define bli_obj_set_target_dt( dt, obj ) \
 { \
 	(obj).info = ( (obj).info & ~BLIS_TARGET_DT_BITS ) | ( dt << BLIS_TARGET_DT_SHIFT ); \
 }

-#define bli_obj_set_execution_datatype( dt, obj ) \
+#define bli_obj_set_exec_dt( dt, obj ) \
 { \
 	(obj).info = ( (obj).info & ~BLIS_EXECUTION_DT_BITS ) | ( dt << BLIS_EXECUTION_DT_SHIFT ); \
 }
--- a/frame/include/bli_param_macro_defs.h
+++ b/frame/include/bli_param_macro_defs.h
@@ -99,11 +99,11 @@
    ( bli_is_double( dt ) || \
 	  bli_is_dcomplex( dt ) )

-#define bli_datatype_proj_to_real( dt ) \
+#define bli_dt_proj_to_real( dt ) \
 \
 	( dt & ~BLIS_BITVAL_COMPLEX )

-#define bli_datatype_proj_to_complex( dt ) \
+#define bli_dt_proj_to_complex( dt ) \
 \
 	( dt &  BLIS_BITVAL_COMPLEX )

@@ -753,7 +753,7 @@
 	} \
 	else \
 	{ \
-		dt_scalar  = bli_obj_datatype( *(obj_scalar) ); \
+		dt_scalar  = bli_obj_dt( *(obj_scalar) ); \
 		buf_scalar = bli_obj_buffer_at_off( *(obj_scalar) ); \
 	} \
 }
--- a/frame/ind/misc/bli_l3_ind_opt.h
+++ b/frame/ind/misc/bli_l3_ind_opt.h
@@ -52,7 +52,7 @@
 	if ( bli_obj_imag_equals( &beta, &BLIS_ZERO ) && \
 	     !bli_is_gen_stored( rs_c, cs_c ) ) \
 	{ \
-		dt_exec = bli_datatype_proj_to_real( dt_exec ); \
+		dt_exec = bli_dt_proj_to_real( dt_exec ); \
 \
 		if ( bli_is_1e_packed( schema_a ) ) \
 		{ \
--- a/frame/ind/oapi/bli_l3_3m4m1m_oapi.c
+++ b/frame/ind/oapi/bli_l3_3m4m1m_oapi.c
@@ -52,7 +52,7 @@ void PASTEMAC(opname,imeth) \
 	bli_init_once(); \
 \
 	ind_t   ind      = PASTEMAC0(imeth); \
-	num_t   dt       = bli_obj_datatype( *c ); \
+	num_t   dt       = bli_obj_dt( *c ); \
 	obj_t*  beta_use = beta; \
 \
 	dim_t   i; \
@@ -147,7 +147,7 @@ void PASTEMAC(opname,imeth) \
 	bli_init_once(); \
 \
 	ind_t   ind      = PASTEMAC0(imeth); \
-	num_t   dt       = bli_obj_datatype( *c ); \
+	num_t   dt       = bli_obj_dt( *c ); \
 	obj_t*  beta_use = beta; \
 \
 	dim_t   i; \
@@ -225,7 +225,7 @@ void PASTEMAC(opname,imeth) \
 	bli_init_once(); \
 \
 	ind_t   ind      = PASTEMAC0(imeth); \
-	num_t   dt       = bli_obj_datatype( *c ); \
+	num_t   dt       = bli_obj_dt( *c ); \
 	obj_t*  beta_use = beta; \
 \
 	dim_t   i; \
@@ -295,7 +295,7 @@ void PASTEMAC(opname,imeth) \
 	bli_init_once(); \
 \
 	ind_t   ind      = PASTEMAC0(imeth); \
-	num_t   dt       = bli_obj_datatype( *b ); \
+	num_t   dt       = bli_obj_dt( *b ); \
 \
 	dim_t   i; \
 \
@@ -352,7 +352,7 @@ void PASTEMAC(opname,imeth) \
 	bli_init_once(); \
 \
 	ind_t   ind      = PASTEMAC0(imeth); \
-	num_t   dt       = bli_obj_datatype( *b ); \
+	num_t   dt       = bli_obj_dt( *b ); \
 \
 	/* If the objects are in the real domain, execute the native
 	   implementation. */ \
--- a/frame/ind/oapi/bli_l3_ind_oapi.c
+++ b/frame/ind/oapi/bli_l3_ind_oapi.c
@@ -52,7 +52,7 @@ void PASTEMAC(opname,imeth) \
 { \
 	bli_init_once(); \
 \
-	num_t                dt   = bli_obj_datatype( *c ); \
+	num_t                dt   = bli_obj_dt( *c ); \
 	PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \
 \
 	func( alpha, a, b, beta, c, cntx ); \
@@ -81,7 +81,7 @@ void PASTEMAC(opname,imeth) \
 { \
 	bli_init_once(); \
 \
-	num_t                dt   = bli_obj_datatype( *c ); \
+	num_t                dt   = bli_obj_dt( *c ); \
 	PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \
 \
 	func( side, alpha, a, b, beta, c, cntx ); \
@@ -108,7 +108,7 @@ void PASTEMAC(opname,imeth) \
 { \
 	bli_init_once(); \
 \
-	num_t                dt   = bli_obj_datatype( *c ); \
+	num_t                dt   = bli_obj_dt( *c ); \
 	PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \
 \
 	func( alpha, a, beta, c, cntx ); \
@@ -134,7 +134,7 @@ void PASTEMAC(opname,imeth) \
 { \
 	bli_init_once(); \
 \
-	num_t                dt   = bli_obj_datatype( *b ); \
+	num_t                dt   = bli_obj_dt( *b ); \
 	PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \
 \
 	func( side, alpha, a, b, cntx ); \
--- a/frame/thread/bli_thread.c
+++ b/frame/thread/bli_thread.c
@@ -203,7 +203,7 @@ siz_t bli_thread_get_range_l2r
       dim_t*     end
     )
 {
-	num_t dt = bli_obj_datatype( *a );
+	num_t dt = bli_obj_dt( *a );
 	dim_t m  = bli_obj_length_after_trans( *a );
 	dim_t n  = bli_obj_width_after_trans( *a );
 	dim_t bf = bli_blksz_get_def( dt, bmult );
@@ -223,7 +223,7 @@ siz_t bli_thread_get_range_r2l
       dim_t*     end
     )
 {
-	num_t dt = bli_obj_datatype( *a );
+	num_t dt = bli_obj_dt( *a );
 	dim_t m  = bli_obj_length_after_trans( *a );
 	dim_t n  = bli_obj_width_after_trans( *a );
 	dim_t bf = bli_blksz_get_def( dt, bmult );
@@ -243,7 +243,7 @@ siz_t bli_thread_get_range_t2b
       dim_t*     end
     )
 {
-	num_t dt = bli_obj_datatype( *a );
+	num_t dt = bli_obj_dt( *a );
 	dim_t m  = bli_obj_length_after_trans( *a );
 	dim_t n  = bli_obj_width_after_trans( *a );
 	dim_t bf = bli_blksz_get_def( dt, bmult );
@@ -263,7 +263,7 @@ siz_t bli_thread_get_range_b2t
       dim_t*     end
     )
 {
-	num_t dt = bli_obj_datatype( *a );
+	num_t dt = bli_obj_dt( *a );
 	dim_t m  = bli_obj_length_after_trans( *a );
 	dim_t n  = bli_obj_width_after_trans( *a );
 	dim_t bf = bli_blksz_get_def( dt, bmult );
@@ -760,7 +760,7 @@ siz_t bli_thread_get_range_weighted_l2r
 	if ( bli_obj_intersects_diag( *a ) &&
 	     bli_obj_is_upper_or_lower( *a ) )
 	{
-		num_t  dt      = bli_obj_datatype( *a );
+		num_t  dt      = bli_obj_dt( *a );
 		doff_t diagoff = bli_obj_diag_offset( *a );
 		uplo_t uplo    = bli_obj_uplo( *a );
 		dim_t  m       = bli_obj_length( *a );
@@ -810,7 +810,7 @@ siz_t bli_thread_get_range_weighted_r2l
 	if ( bli_obj_intersects_diag( *a ) &&
 	     bli_obj_is_upper_or_lower( *a ) )
 	{
-		num_t  dt      = bli_obj_datatype( *a );
+		num_t  dt      = bli_obj_dt( *a );
 		doff_t diagoff = bli_obj_diag_offset( *a );
 		uplo_t uplo    = bli_obj_uplo( *a );
 		dim_t  m       = bli_obj_length( *a );
@@ -862,7 +862,7 @@ siz_t bli_thread_get_range_weighted_t2b
 	if ( bli_obj_intersects_diag( *a ) &&
 	     bli_obj_is_upper_or_lower( *a ) )
 	{
-		num_t  dt      = bli_obj_datatype( *a );
+		num_t  dt      = bli_obj_dt( *a );
 		doff_t diagoff = bli_obj_diag_offset( *a );
 		uplo_t uplo    = bli_obj_uplo( *a );
 		dim_t  m       = bli_obj_length( *a );
@@ -914,7 +914,7 @@ siz_t bli_thread_get_range_weighted_b2t
 	if ( bli_obj_intersects_diag( *a ) &&
 	     bli_obj_is_upper_or_lower( *a ) )
 	{
-		num_t  dt      = bli_obj_datatype( *a );
+		num_t  dt      = bli_obj_dt( *a );
 		doff_t diagoff = bli_obj_diag_offset( *a );
 		uplo_t uplo    = bli_obj_uplo( *a );
 		dim_t  m       = bli_obj_length( *a );
--- a/frame/util/bli_util_oapi.c
+++ b/frame/util/bli_util_oapi.c
@@ -54,7 +54,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	dim_t     n         = bli_obj_vector_dim( *x ); \
 	void*     buf_x     = bli_obj_buffer_at_off( *x ); \
@@ -93,7 +93,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *a ); \
+	num_t     dt        = bli_obj_dt( *a ); \
 \
 	uplo_t    uploa     = bli_obj_uplo( *a ); \
 	dim_t     m         = bli_obj_length( *a ); \
@@ -135,7 +135,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	dim_t     n         = bli_obj_vector_dim( *x ); \
 	void*     buf_x     = bli_obj_buffer_at_off( *x ); \
@@ -176,7 +176,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	doff_t    diagoffx  = bli_obj_diag_offset( *x ); \
 	diag_t    diagx     = bli_obj_diag( *x ); \
@@ -229,7 +229,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	dim_t     n         = bli_obj_vector_dim( *x ); \
 	void*     buf_x     = bli_obj_buffer_at_off( *x ); \
@@ -281,7 +281,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	dim_t     m         = bli_obj_length( *x ); \
 	dim_t     n         = bli_obj_width( *x ); \
@@ -381,7 +381,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	dim_t     n         = bli_obj_vector_dim( *x ); \
 	void*     buf_x     = bli_obj_buffer_at_off( *x ); \
@@ -418,7 +418,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	doff_t    diagoffx  = bli_obj_diag_offset( *x ); \
 	uplo_t    uplox     = bli_obj_uplo( *x ); \
@@ -464,7 +464,7 @@ void PASTEMAC(opname,EX_SUF) \
 \
 	BLIS_OAPI_CNTX_DECL \
 \
-	num_t     dt        = bli_obj_datatype( *x ); \
+	num_t     dt        = bli_obj_dt( *x ); \
 \
 	dim_t     n         = bli_obj_vector_dim( *x ); \
 	void*     buf_x     = bli_obj_buffer_at_off( *x ); \
--- a/kernels/penryn/1f/old/bli_axpyf_penryn_int.c.alt
+++ b/kernels/penryn/1f/old/bli_axpyf_penryn_int.c.alt
@@ -1,342 +0,0 @@
-/*
-
-   BLIS    
-   An object-based framework for developing high-performance BLAS-like
-   libraries.
-
-   Copyright (C) 2014, The University of Texas at Austin
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-    - Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    - Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    - Neither the name of The University of Texas at Austin nor the names
-      of its contributors may be used to endorse or promote products
-      derived derived from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-#include "blis.h"
-
-/*
-#define FUNCPTR_T axpyf_fp
-
-typedef void (*FUNCPTR_T)(
-                           conj_t conjx,
-                           dim_t  n,
-                           void*  alpha,
-                           void*  x, inc_t incx,
-                           void*  y, inc_t incy
-                         );
-
-// If some mixed datatype functions will not be compiled, we initialize
-// the corresponding elements of the function array to NULL.
-#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
-static FUNCPTR_T GENARRAY3_ALL(ftypes,axpyf_penryn_int);
-#else
-#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
-static FUNCPTR_T GENARRAY3_EXT(ftypes,axpyf_penryn_int);
-#else
-static FUNCPTR_T GENARRAY3_MIN(ftypes,axpyf_penryn_int);
-#endif
-#endif
-
-
-void bli_axpyf_penryn_int( obj_t*  alpha,
-                         obj_t*  x,
-                         obj_t*  y )
-{
-	num_t     dt_x      = bli_obj_datatype( *x );
-	num_t     dt_y      = bli_obj_datatype( *y );
-
-	conj_t    conjx     = bli_obj_conj_status( *x );
-	dim_t     n         = bli_obj_vector_dim( *x );
-
-	inc_t     inc_x     = bli_obj_vector_inc( *x );
-	void*     buf_x     = bli_obj_buffer_at_off( *x );
-
-	inc_t     inc_y     = bli_obj_vector_inc( *y );
-	void*     buf_y     = bli_obj_buffer_at_off( *y );
-
-	num_t     dt_alpha;
-	void*     buf_alpha;
-
-	FUNCPTR_T f;
-
-	// If alpha is a scalar constant, use dt_x to extract the address of the
-	// corresponding constant value; otherwise, use the datatype encoded
-	// within the alpha object and extract the buffer at the alpha offset.
-	bli_set_scalar_dt_buffer( alpha, dt_x, dt_alpha, buf_alpha );
-
-	// Index into the type combination array to extract the correct
-	// function pointer.
-	f = ftypes[dt_alpha][dt_x][dt_y];
-
-	// Invoke the function.
-	f( conjx,
-	   n,
-	   buf_alpha,
-	   buf_x, inc_x,
-	   buf_y, inc_y );
-}
-*/
-
-#undef  GENTFUNC3U12
-#define GENTFUNC3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, opname, varname ) \
-\
-void PASTEMAC3(cha,chx,chy,varname)( \
-                                     conj_t conja, \
-                                     conj_t conjx, \
-                                     dim_t  m, \
-                                     dim_t  b_n, \
-                                     void*  alpha, \
-                                     void*  a, inc_t inca, inc_t lda, \
-                                     void*  x, inc_t incx, \
-                                     void*  y, inc_t incy \
-                                   ) \
-{ \
-	ctype_ax* alpha_cast = alpha; \
-	ctype_a*  a_cast     = a; \
-	ctype_x*  x_cast     = x; \
-	ctype_y*  y_cast     = y; \
-	ctype_a*  a1; \
-	ctype_x*  chi1; \
-	ctype_y*  y1; \
-	ctype_ax  alpha_chi1; \
-	dim_t     i; \
-\
-	for ( i = 0; i < b_n; ++i ) \
-	{ \
-		a1   = a_cast + (0  )*inca + (i  )*lda; \
-		chi1 = x_cast + (i  )*incx; \
-		y1   = y_cast + (0  )*incy; \
-\
-		PASTEMAC2(chx,chax,copycjs)( conjx, *chi1, alpha_chi1 ); \
-		PASTEMAC2(chax,chax,scals)( *alpha_cast, alpha_chi1 ); \
-\
-		PASTEMAC3(chax,cha,chy,axpyv)( conja, \
-		                               m, \
-		                               &alpha_chi1, \
-		                               a1, inca, \
-		                               y1, incy ); \
-	} \
-}
-
-// Define the basic set of functions unconditionally, and then also some
-// mixed datatype functions if requested.
-//INSERT_GENTFUNC3U12_BASIC( axpyf, axpyf_penryn_int )
-GENTFUNC3U12( float,    float,    float,    float,    s, s, s, s, axpyf, axpyf_penryn_int )
-//GENTFUNC3U12( double,   double,   double,   double,   d, d, d, d, axpyf, axpyf_penryn_int )
-GENTFUNC3U12( scomplex, scomplex, scomplex, scomplex, c, c, c, c, axpyf, axpyf_penryn_int )
-GENTFUNC3U12( dcomplex, dcomplex, dcomplex, dcomplex, z, z, z, z, axpyf, axpyf_penryn_int )
-
-#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
-INSERT_GENTFUNC3U12_MIX_D( axpyf, axpyf_penryn_int )
-#endif
-
-#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
-INSERT_GENTFUNC3U12_MIX_P( axpyf, axpyf_penryn_int )
-#endif
-
-
-#include "pmmintrin.h"
-typedef union
-{
-    __m128d v;
-    double  d[2];
-} v2df_t;
-
-
-void bli_dddaxpyf_penryn_int(
-                            conj_t conja,
-                            conj_t conjx,
-                            dim_t  m,
-                            dim_t  b_n,
-                            void*  alpha,
-                            void*  a, inc_t inca, inc_t lda,
-                            void*  x, inc_t incx,
-                            void*  y, inc_t incy
-                          )
-{
-	double*  restrict alpha_cast = alpha;
-	double*  restrict a_cast = a;
-	double*  restrict x_cast = x;
-	double*  restrict y_cast = y;
-	dim_t             i;
-
-	const dim_t       n_elem_per_reg = 2;
-	const dim_t       n_iter_unroll  = 2;
-
-	dim_t             m_pre;
-	dim_t             m_run;
-	dim_t             m_left;
-
-    double*  restrict a0;
-    double*  restrict a1;
-    double*  restrict a2;
-    double*  restrict a3;
-    double*  restrict y0;
-    double            a0c, a1c, a2c, a3c;
-    double            chi0, chi1, chi2, chi3;
-
-	v2df_t            a00v, a01v, a02v, a03v, y0v;
-	v2df_t            a10v, a11v, a12v, a13v, y1v;
-	v2df_t            chi0v, chi1v, chi2v, chi3v;
-
-	if ( bli_zero_dim2( m, b_n ) ) return;
-
-	if ( b_n < PASTEMAC(d,axpyf_fusefac) )
-	{
-		PASTEMAC3(d,d,d,axpyf_unb_var1)( conja,
-		                                 conjx,
-		                                 m,
-		                                 b_n,
-		                                 alpha_cast,
-		                                 a_cast, inca, lda,
-		                                 x_cast, incx,
-		                                 y_cast, incy );
-		return;
-	}
-
-	if ( inca != 1 ||
-	     incx != 1 ||
-	     incy != 1 ) bli_abort();
-
-	m_pre = 0;
-	if ( ( unsigned long ) a % 16 != 0 )
-	{
-		if ( ( unsigned long ) x % 16 == 0 ||
-		     ( unsigned long ) y % 16 == 0 ) bli_abort();
-
-		m_pre = 1;
-	}
-
-	m_run       = ( m - m_pre ) / ( n_elem_per_reg * n_iter_unroll );
-	m_left      = ( m - m_pre ) % ( n_elem_per_reg * n_iter_unroll );
-
-	a0   = a_cast + 0*lda;
-	a1   = a_cast + 1*lda;
-	a2   = a_cast + 2*lda;
-	a3   = a_cast + 3*lda;
-	y0   = y_cast;
-
-	chi0 = *(x_cast + 0*incx);
-	chi1 = *(x_cast + 1*incx);
-	chi2 = *(x_cast + 2*incx);
-	chi3 = *(x_cast + 3*incx);
-
-	PASTEMAC2(d,d,scals)( *alpha_cast, chi0 );
-	PASTEMAC2(d,d,scals)( *alpha_cast, chi1 );
-	PASTEMAC2(d,d,scals)( *alpha_cast, chi2 );
-	PASTEMAC2(d,d,scals)( *alpha_cast, chi3 );
-
-	if ( m_pre == 1 )
-	{
-		a0c = *a0;
-		a1c = *a1;
-		a2c = *a2;
-		a3c = *a3;
-
-		*y0 += chi0 * a0c + 
-		       chi1 * a1c + 
-		       chi2 * a2c + 
-		       chi3 * a3c;
-
-		a0 += inca;
-		a1 += inca;
-		a2 += inca;
-		a3 += inca;
-		y0 += incy;
-	}
-
-	chi0v.v = _mm_loaddup_pd( ( double* )&chi0 );
-	chi1v.v = _mm_loaddup_pd( ( double* )&chi1 );
-	chi2v.v = _mm_loaddup_pd( ( double* )&chi2 );
-	chi3v.v = _mm_loaddup_pd( ( double* )&chi3 );
-
-	for ( i = 0; i < m_run; ++i )
-	{
-		y0v.v = _mm_load_pd( ( double* )(y0 + 0*n_elem_per_reg) );
-
-		a00v.v = _mm_load_pd( ( double* )(a0 + 0*n_elem_per_reg) );
-		//a01v.v = _mm_load_pd( ( double* )(a1 + 0*n_elem_per_reg) );
-		a01v.v = _mm_load_pd( ( double* )(a0 + 1*lda + 0*n_elem_per_reg) );
-
-		y0v.v += chi0v.v * a00v.v;
-		y0v.v += chi1v.v * a01v.v;
-
-		a02v.v = _mm_load_pd( ( double* )(a2 + 0*n_elem_per_reg) );
-		//a03v.v = _mm_load_pd( ( double* )(a3 + 0*n_elem_per_reg) );
-		a03v.v = _mm_load_pd( ( double* )(a2 + 1*lda + 0*n_elem_per_reg) );
-
-		y0v.v += chi2v.v * a02v.v;
-		y0v.v += chi3v.v * a03v.v;
-
-		_mm_store_pd( ( double* )(y0 + 0*n_elem_per_reg), y0v.v );
-
-
-		y1v.v = _mm_load_pd( ( double* )(y0 + 1*n_elem_per_reg) );
-
-		a10v.v = _mm_load_pd( ( double* )(a0 + 1*n_elem_per_reg) );
-		//a11v.v = _mm_load_pd( ( double* )(a1 + 1*n_elem_per_reg) );
-		a11v.v = _mm_load_pd( ( double* )(a0 + 1*lda + 1*n_elem_per_reg) );
-
-		y1v.v += chi0v.v * a10v.v;
-		y1v.v += chi1v.v * a11v.v;
-
-		a12v.v = _mm_load_pd( ( double* )(a2 + 1*n_elem_per_reg) );
-		//a13v.v = _mm_load_pd( ( double* )(a3 + 1*n_elem_per_reg) );
-		a13v.v = _mm_load_pd( ( double* )(a2 + 1*lda + 1*n_elem_per_reg) );
-
-		y1v.v += chi2v.v * a12v.v;
-		y1v.v += chi3v.v * a13v.v;
-
-		_mm_store_pd( ( double* )(y0 + 1*n_elem_per_reg), y1v.v );
-
-
-		a0 += n_elem_per_reg * n_iter_unroll;
-		//a1 += n_elem_per_reg * n_iter_unroll;
-		a2 += n_elem_per_reg * n_iter_unroll;
-		//a3 += n_elem_per_reg * n_iter_unroll;
-		y0 += n_elem_per_reg * n_iter_unroll;
-	}
-
-	if ( m_left > 0 )
-	{
-		for ( i = 0; i < m_left; ++i )
-		{
-			a0c = *a0;
-			a1c = *a1;
-			a2c = *a2;
-			a3c = *a3;
-
-			*y0 += chi0 * a0c + 
-			       chi1 * a1c + 
-			       chi2 * a2c + 
-			       chi3 * a3c;
-
-			a0 += inca;
-			a1 += inca;
-			a2 += inca;
-			a3 += inca;
-			y0 += incy;
-		}
-	}
-}
-
--- a/kernels/penryn/1f/old/bli_dotxf_penryn_int.c.alt
+++ b/kernels/penryn/1f/old/bli_dotxf_penryn_int.c.alt
@@ -1,421 +0,0 @@
-/*
-
-   BLIS    
-   An object-based framework for developing high-performance BLAS-like
-   libraries.
-
-   Copyright (C) 2014, The University of Texas at Austin
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-    - Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    - Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    - Neither the name of The University of Texas at Austin nor the names
-      of its contributors may be used to endorse or promote products
-      derived derived from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-#include "blis.h"
-
-/*
-#define FUNCPTR_T dotxf_fp
-
-typedef void (*FUNCPTR_T)(
-                           conj_t conjx,
-                           conj_t conjy,
-                           dim_t  n,
-                           void*  alpha,
-                           void*  x, inc_t incx,
-                           void*  y, inc_t incy,
-                           void*  beta,
-                           void*  rho
-                         );
-
-// If some mixed datatype functions will not be compiled, we initialize
-// the corresponding elements of the function array to NULL.
-#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
-static FUNCPTR_T GENARRAY3_ALL(ftypes,dotxf_penryn_int);
-#else
-#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
-static FUNCPTR_T GENARRAY3_EXT(ftypes,dotxf_penryn_int);
-#else
-static FUNCPTR_T GENARRAY3_MIN(ftypes,dotxf_penryn_int);
-#endif
-#endif
-
-
-void bli_dotxf_penryn_int( obj_t*  alpha,
-                         obj_t*  x,
-                         obj_t*  y,
-                         obj_t*  beta,
-                         obj_t*  rho )
-{
-	num_t     dt_x      = bli_obj_datatype( *x );
-	num_t     dt_y      = bli_obj_datatype( *y );
-	num_t     dt_rho    = bli_obj_datatype( *rho );
-
-	conj_t    conjx     = bli_obj_conj_status( *x );
-	conj_t    conjy     = bli_obj_conj_status( *y );
-	dim_t     n         = bli_obj_vector_dim( *x );
-
-	inc_t     inc_x     = bli_obj_vector_inc( *x );
-	void*     buf_x     = bli_obj_buffer_at_off( *x );
-
-	inc_t     inc_y     = bli_obj_vector_inc( *y );
-	void*     buf_y     = bli_obj_buffer_at_off( *y );
-
-	void*     buf_rho   = bli_obj_buffer_at_off( *rho );
-
-	num_t     dt_alpha;
-	void*     buf_alpha;
-
-	num_t     dt_beta;
-	void*     buf_beta;
-
-	FUNCPTR_T f;
-
-	// The datatype of alpha MUST be the type union of x and y. This is to
-	// prevent any unnecessary loss of information during computation.
-	dt_alpha  = bli_datatype_union( dt_x, dt_y );
-	buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
-
-	// The datatype of beta MUST be the same as the datatype of rho.
-	dt_beta   = dt_rho;
-	buf_beta  = bli_obj_buffer_for_1x1( dt_beta, *beta );
-
-	// Index into the type combination array to extract the correct
-	// function pointer.
-	f = ftypes[dt_x][dt_y][dt_rho];
-
-	// Invoke the function.
-	f( conjx,
-	   conjy,
-	   n,
-	   buf_alpha, 
-	   buf_x, inc_x, 
-	   buf_y, inc_y,
-	   buf_beta, 
-	   buf_rho );
-}
-*/
-
-#undef  GENTFUNC3U12
-#define GENTFUNC3U12( ctype_x, ctype_y, ctype_r, ctype_xy, chx, chy, chr, chxy, opname, varname ) \
-\
-void PASTEMAC3(chx,chy,chr,varname)( \
-                                     conj_t conjx, \
-                                     conj_t conjy, \
-                                     dim_t  b_m, \
-                                     dim_t  n, \
-                                     void*  alpha, \
-                                     void*  x, inc_t incx, inc_t ldx, \
-                                     void*  y, inc_t incy, \
-                                     void*  beta, \
-                                     void*  r, inc_t incr \
-                                   ) \
-{ \
-	ctype_xy* alpha_cast = alpha; \
-	ctype_x*  x_cast     = x; \
-	ctype_y*  y_cast     = y; \
-	ctype_r*  beta_cast  = beta; \
-	ctype_r*  r_cast     = r; \
-	ctype_x*  x1; \
-	ctype_y*  y1; \
-	ctype_r*  rho1; \
-	dim_t     i; \
-\
-	for ( i = 0; i < b_m; ++i ) \
-	{ \
-		x1   = x_cast + (0  )*incx + (i  )*ldx; \
-		y1   = y_cast + (0  )*incy; \
-		rho1 = r_cast + (i  )*incr; \
-\
-		PASTEMAC3(chx,chy,chr,dotxv)( conjx, \
-		                              conjy, \
-		                              n, \
-		                              alpha_cast, \
-		                              x1,   incx, \
-		                              y1,   incy, \
-		                              beta_cast, \
-		                              rho1 ); \
-	} \
-}
-
-// Define the basic set of functions unconditionally, and then also some
-// mixed datatype functions if requested.
-//INSERT_GENTFUNC3U12_BASIC( dotxf, dotxf_penryn_int )
-GENTFUNC3U12( float,    float,    float,    float,    s, s, s, s, dotxf, dotxf_penryn_int )
-//GENTFUNC3U12( double,   double,   double,   double,   d, d, d, d, dotxf, dotxf_penryn_int )
-GENTFUNC3U12( scomplex, scomplex, scomplex, scomplex, c, c, c, c, dotxf, dotxf_penryn_int )
-GENTFUNC3U12( dcomplex, dcomplex, dcomplex, dcomplex, z, z, z, z, dotxf, dotxf_penryn_int )
-
-#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
-INSERT_GENTFUNC3U12_MIX_D( dotxf, dotxf_penryn_int )
-#endif
-
-#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
-INSERT_GENTFUNC3U12_MIX_P( dotxf, dotxf_penryn_int )
-#endif
-
-
-#include "pmmintrin.h"
-typedef union
-{
-    __m128d v;
-    double  d[2];
-} v2df_t;
-
-
-void bli_ddddotxf_penryn_int(
-                            conj_t conjx,
-                            conj_t conjy,
-                            dim_t  b_m,
-                            dim_t  n,
-                            void*  alpha,
-                            void*  x, inc_t incx, inc_t ldx,
-                            void*  y, inc_t incy,
-                            void*  beta,
-                            void*  r, inc_t incr
-                          ) 
-{ 
-	double*  restrict alpha_cast = alpha; 
-	double*  restrict beta_cast = beta; 
-	double*  restrict x_cast = x; 
-	double*  restrict y_cast = y; 
-	double*  restrict r_cast = r; 
-	dim_t             i; 
-
-	const dim_t       n_elem_per_reg = 2;
-	const dim_t       n_iter_unroll  = 4;
-
-	dim_t             n_pre;
-	dim_t             n_run;
-	dim_t             n_left;
-
-	double*  restrict x0;
-	double*  restrict x1;
-	double*  restrict x2;
-	double*  restrict x3;
-	double*  restrict y0;
-	double            rho0, rho1, rho2, rho3;
-	double            x0c, x1c, x2c, x3c, y0c;
-
-	v2df_t            rho0v, rho1v, rho2v, rho3v;
-	v2df_t            x0v, x1v, x2v, x3v, y0v, betav, alphav;
-
-	if ( bli_zero_dim1( b_m ) ) return;
-
-	if ( bli_zero_dim1( n ) ) 
-	{ 
-		PASTEMAC(d,scals)( *beta_cast, *(r_cast  ) ); 
-		PASTEMAC(d,scals)( *beta_cast, *(r_cast+1) ); 
-		PASTEMAC(d,scals)( *beta_cast, *(r_cast+2) ); 
-		PASTEMAC(d,scals)( *beta_cast, *(r_cast+3) ); 
-		return; 
-	} 
-
-	if ( b_m < PASTEMAC(d,dotxf_fusefac) )
-	{
-		PASTEMAC3(d,d,d,dotxf_unb_var1)( conjx,
-		                                 conjy,
-		                                 b_m,
-		                                 n,
-		                                 alpha_cast,
-		                                 x_cast, incx, ldx,
-		                                 y_cast, incy,
-		                                 beta_cast,
-		                                 r_cast, incr );
-		return;
-	}
-
-
-	if ( incx != 1 ||
-	     incy != 1 ) bli_abort();
-
-	n_pre = 0;
-	if ( ( unsigned long ) y % 16 != 0 )
-	{
-		if ( ( unsigned long ) x % 16 == 0 )
-			bli_abort();
-
-		n_pre = 1;
-	}
-
-	n_run       = ( n - n_pre ) / ( n_elem_per_reg * n_iter_unroll );
-	n_left      = ( n - n_pre ) % ( n_elem_per_reg * n_iter_unroll );
-
-	x0 = x_cast;
-	x1 = x_cast +   ldx;
-	x2 = x_cast + 2*ldx;
-	x3 = x_cast + 3*ldx;
-	y0 = y_cast;
-
-	PASTEMAC(d,set0)( rho0 ); 
-	PASTEMAC(d,set0)( rho1 ); 
-	PASTEMAC(d,set0)( rho2 ); 
-	PASTEMAC(d,set0)( rho3 ); 
-
-	if ( n_pre == 1 )
-	{
-		x0c = *x0;
-		x1c = *x1;
-		x2c = *x2;
-		x3c = *x3;
-		y0c = *y0;
-
-		rho0 += x0c * y0c;
-		rho1 += x1c * y0c;
-		rho2 += x2c * y0c;
-		rho3 += x3c * y0c;
-
-		x0 += incx;
-		x1 += incx;
-		x2 += incx;
-		x3 += incx;
-		y0 += incy;
-	}
-
-	rho0v.v = _mm_setzero_pd();
-	rho1v.v = _mm_setzero_pd();
-	rho2v.v = _mm_setzero_pd();
-	rho3v.v = _mm_setzero_pd();
-
-	for ( i = 0; i < n_run; ++i )
-	{
-		x0v.v = _mm_load_pd( ( double* )(x0 + 0*n_elem_per_reg) );
-		//x1v.v = _mm_load_pd( ( double* )(x1 + 0*n_elem_per_reg) );
-		x1v.v = _mm_load_pd( ( double* )(x0 + 1*ldx + 0*n_elem_per_reg) );
-		x2v.v = _mm_load_pd( ( double* )(x2 + 0*n_elem_per_reg) );
-		//x3v.v = _mm_load_pd( ( double* )(x3 + 0*n_elem_per_reg) );
-		x3v.v = _mm_load_pd( ( double* )(x2 + 1*ldx + 0*n_elem_per_reg) );
-		y0v.v = _mm_load_pd( ( double* )(y0 + 0*n_elem_per_reg) );
-
-		rho0v.v += x0v.v * y0v.v;
-		rho1v.v += x1v.v * y0v.v;
-		rho2v.v += x2v.v * y0v.v;
-		rho3v.v += x3v.v * y0v.v;
-
-		x0v.v = _mm_load_pd( ( double* )(x0 + 1*n_elem_per_reg) );
-		//x1v.v = _mm_load_pd( ( double* )(x1 + 1*n_elem_per_reg) );
-		x1v.v = _mm_load_pd( ( double* )(x0 + 1*ldx + 1*n_elem_per_reg) );
-		x2v.v = _mm_load_pd( ( double* )(x2 + 1*n_elem_per_reg) );
-		//x3v.v = _mm_load_pd( ( double* )(x3 + 1*n_elem_per_reg) );
-		x3v.v = _mm_load_pd( ( double* )(x2 + 1*ldx + 1*n_elem_per_reg) );
-		y0v.v = _mm_load_pd( ( double* )(y0 + 1*n_elem_per_reg) );
-
-		rho0v.v += x0v.v * y0v.v;
-		rho1v.v += x1v.v * y0v.v;
-		rho2v.v += x2v.v * y0v.v;
-		rho3v.v += x3v.v * y0v.v;
-
-		x0v.v = _mm_load_pd( ( double* )(x0 + 2*n_elem_per_reg) );
-		//x1v.v = _mm_load_pd( ( double* )(x1 + 2*n_elem_per_reg) );
-		x1v.v = _mm_load_pd( ( double* )(x0 + 1*ldx + 2*n_elem_per_reg) );
-		x2v.v = _mm_load_pd( ( double* )(x2 + 2*n_elem_per_reg) );
-		//x3v.v = _mm_load_pd( ( double* )(x3 + 2*n_elem_per_reg) );
-		x3v.v = _mm_load_pd( ( double* )(x2 + 1*ldx + 2*n_elem_per_reg) );
-		y0v.v = _mm_load_pd( ( double* )(y0 + 2*n_elem_per_reg) );
-
-		rho0v.v += x0v.v * y0v.v;
-		rho1v.v += x1v.v * y0v.v;
-		rho2v.v += x2v.v * y0v.v;
-		rho3v.v += x3v.v * y0v.v;
-
-		x0v.v = _mm_load_pd( ( double* )(x0 + 3*n_elem_per_reg) );
-		//x1v.v = _mm_load_pd( ( double* )(x1 + 3*n_elem_per_reg) );
-		x1v.v = _mm_load_pd( ( double* )(x0 + 1*ldx + 3*n_elem_per_reg) );
-		x2v.v = _mm_load_pd( ( double* )(x2 + 3*n_elem_per_reg) );
-		//x3v.v = _mm_load_pd( ( double* )(x3 + 3*n_elem_per_reg) );
-		x3v.v = _mm_load_pd( ( double* )(x2 + 1*ldx + 3*n_elem_per_reg) );
-		y0v.v = _mm_load_pd( ( double* )(y0 + 3*n_elem_per_reg) );
-
-		rho0v.v += x0v.v * y0v.v;
-		rho1v.v += x1v.v * y0v.v;
-		rho2v.v += x2v.v * y0v.v;
-		rho3v.v += x3v.v * y0v.v;
-
-
-		x0 += n_elem_per_reg * n_iter_unroll;
-		//x1 += n_elem_per_reg * n_iter_unroll;
-		x2 += n_elem_per_reg * n_iter_unroll;
-		//x3 += n_elem_per_reg * n_iter_unroll;
-		y0 += n_elem_per_reg * n_iter_unroll;
-	}
-
-	rho0 += rho0v.d[0] + rho0v.d[1];
-	rho1 += rho1v.d[0] + rho1v.d[1];
-	rho2 += rho2v.d[0] + rho2v.d[1];
-	rho3 += rho3v.d[0] + rho3v.d[1];
-
-	if ( n_left > 0 )
-	{
-		for ( i = 0; i < n_left; ++i )
-		{
-			x0c = *x0;
-			x1c = *x1;
-			x2c = *x2;
-			x3c = *x3;
-			y0c = *y0;
-
-			rho0 += x0c * y0c;
-			rho1 += x1c * y0c;
-			rho2 += x2c * y0c;
-			rho3 += x3c * y0c;
-
-			x0 += incx;
-			x1 += incx;
-			x2 += incx;
-			x3 += incx;
-			y0 += incy;
-		}
-	}
-/*
-	PASTEMAC2(d,d,scals)( *beta_cast, *(r_cast  ) ); \
-	PASTEMAC2(d,d,scals)( *beta_cast, *(r_cast+1) ); \
-	PASTEMAC2(d,d,scals)( *beta_cast, *(r_cast+2) ); \
-	PASTEMAC2(d,d,scals)( *beta_cast, *(r_cast+3) ); \
-
-	PASTEMAC3(d,d,d,axpys)( *alpha_cast, rho1, *(r_cast  ) ); \
-	PASTEMAC3(d,d,d,axpys)( *alpha_cast, rho2, *(r_cast+1) ); \
-	PASTEMAC3(d,d,d,axpys)( *alpha_cast, rho3, *(r_cast+2) ); \
-	PASTEMAC3(d,d,d,axpys)( *alpha_cast, rho4, *(r_cast+3) ); \
-*/
-
-	rho1v.d[0] = rho0;
-	rho1v.d[1] = rho1;
-	rho3v.d[0] = rho2;
-	rho3v.d[1] = rho3;
-
-	betav.v  = _mm_loaddup_pd( ( double* ) beta_cast );
-	alphav.v = _mm_loaddup_pd( ( double* ) alpha_cast );
-
-	rho0v.v = _mm_load_pd( ( double* )(r_cast + 0*n_elem_per_reg) );
-	rho2v.v = _mm_load_pd( ( double* )(r_cast + 1*n_elem_per_reg) );
-
-	rho0v.v *= betav.v;
-	rho2v.v *= betav.v;
-
-	rho0v.v += alphav.v * rho1v.v;
-	rho2v.v += alphav.v * rho3v.v;
-
-	_mm_store_pd( ( double* )(r_cast + 0*n_elem_per_reg), rho0v.v );
-	_mm_store_pd( ( double* )(r_cast + 1*n_elem_per_reg), rho2v.v );
-
-}
--- a/mpi_test/test_hemm.c
+++ b/mpi_test/test_hemm.c
@@ -163,8 +163,8 @@ int main( int argc, char** argv )
 			obj_t ar, ai;
 			bli_obj_alias_to( a, ar );
 			bli_obj_alias_to( a, ai );
-			bli_obj_set_datatype( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
-			bli_obj_set_datatype( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
+			bli_obj_set_dt( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
+			bli_obj_set_dt( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
 			bli_printm( "ar", &ar, "%4.1f", "" );
 			bli_printm( "ai", &ai, "%4.1f", "" );
 */
--- a/mpi_test/test_trmm.c
+++ b/mpi_test/test_trmm.c
@@ -159,8 +159,8 @@ int main( int argc, char** argv )
 			obj_t ar, ai;
 			bli_obj_alias_to( a, ar );
 			bli_obj_alias_to( a, ai );
-			bli_obj_set_datatype( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
-			bli_obj_set_datatype( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
+			bli_obj_set_dt( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
+			bli_obj_set_dt( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
 			bli_printm( "ar", &ar, "%4.1f", "" );
 			bli_printm( "ai", &ai, "%4.1f", "" );
 */
--- a/mpi_test/test_trsm.c
+++ b/mpi_test/test_trsm.c
@@ -163,8 +163,8 @@ int main( int argc, char** argv )
 			obj_t ar, ai;
 			bli_obj_alias_to( a, ar );
 			bli_obj_alias_to( a, ai );
-			bli_obj_set_datatype( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
-			bli_obj_set_datatype( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
+			bli_obj_set_dt( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
+			bli_obj_set_dt( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;

 			bli_printm( "ar", &ar, "%4.1f", "" );
 			bli_printm( "ai", &ai, "%4.1f", "" );
--- a/testsuite/src/test_addm.c
+++ b/testsuite/src/test_addm.c
@@ -238,8 +238,8 @@ void libblis_test_addm_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *y );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );
+	num_t  dt      = bli_obj_dt( *y );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *y );
 	dim_t  m       = bli_obj_length( *y );
 	dim_t  n       = bli_obj_width( *y );

--- a/testsuite/src/test_addv.c
+++ b/testsuite/src/test_addv.c
@@ -234,8 +234,8 @@ void libblis_test_addv_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *x );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *x );
+	num_t  dt      = bli_obj_dt( *x );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *x );
 	dim_t  m       = bli_obj_vector_dim( *x );

 	conj_t conjx   = bli_obj_conj_status( *x );
--- a/testsuite/src/test_amaxv.c
+++ b/testsuite/src/test_amaxv.c
@@ -301,7 +301,7 @@ void PASTEMAC0(opname) \
       obj_t*  index  \
     ) \
 { \
-    num_t     dt        = bli_obj_datatype( *x ); \
+    num_t     dt        = bli_obj_dt( *x ); \
 \
    dim_t     n         = bli_obj_vector_dim( *x ); \
    void*     buf_x     = bli_obj_buffer_at_off( *x ); \
--- a/testsuite/src/test_axpbyv.c
+++ b/testsuite/src/test_axpbyv.c
@@ -262,8 +262,8 @@ void libblis_test_axpbyv_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *y );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );
+	num_t  dt      = bli_obj_dt( *y );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *y );

 	dim_t  m       = bli_obj_vector_dim( *y );

--- a/testsuite/src/test_axpy2v.c
+++ b/testsuite/src/test_axpy2v.c
@@ -278,8 +278,8 @@ void libblis_test_axpy2v_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *z );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *z );
+	num_t  dt      = bli_obj_dt( *z );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *z );

 	dim_t  m       = bli_obj_vector_dim( *z );

--- a/testsuite/src/test_axpyf.c
+++ b/testsuite/src/test_axpyf.c
@@ -279,8 +279,8 @@ void libblis_test_axpyf_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *y );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );
+	num_t  dt      = bli_obj_dt( *y );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *y );

 	dim_t  m       = bli_obj_vector_dim( *y );
 	dim_t  b_n     = bli_obj_width( *a );
--- a/testsuite/src/test_axpym.c
+++ b/testsuite/src/test_axpym.c
@@ -254,8 +254,8 @@ void libblis_test_axpym_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *y );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );
+	num_t  dt      = bli_obj_dt( *y );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *y );

 	dim_t  m       = bli_obj_length( *y );
 	dim_t  n       = bli_obj_width( *y );
--- a/testsuite/src/test_axpyv.c
+++ b/testsuite/src/test_axpyv.c
@@ -252,8 +252,8 @@ void libblis_test_axpyv_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *y );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );
+	num_t  dt      = bli_obj_dt( *y );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *y );

 	dim_t  m       = bli_obj_vector_dim( *y );

--- a/testsuite/src/test_copym.c
+++ b/testsuite/src/test_copym.c
@@ -226,7 +226,7 @@ void libblis_test_copym_check
       double*        resid
     )
 {
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *x );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *x );

 	obj_t  norm_y_r;

--- a/testsuite/src/test_copyv.c
+++ b/testsuite/src/test_copyv.c
@@ -223,7 +223,7 @@ void libblis_test_copyv_check
       double*        resid
     )
 {
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *x );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *x );

 	obj_t  norm_y_r;

--- a/testsuite/src/test_dotaxpyv.c
+++ b/testsuite/src/test_dotaxpyv.c
@@ -300,8 +300,8 @@ void libblis_test_dotaxpyv_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *z );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *z );
+	num_t  dt      = bli_obj_dt( *z );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *z );

 	dim_t  m       = bli_obj_vector_dim( *z );

--- a/testsuite/src/test_dotv.c
+++ b/testsuite/src/test_dotv.c
@@ -249,7 +249,7 @@ void libblis_test_dotv_check
       double*        resid
     )
 {
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *y );

 	obj_t  rho_r, rho_i;
 	obj_t  norm_x, norm_xy;
--- a/testsuite/src/test_dotxaxpyf.c
+++ b/testsuite/src/test_dotxaxpyf.c
@@ -321,8 +321,8 @@ void libblis_test_dotxaxpyf_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *y );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );
+	num_t  dt      = bli_obj_dt( *y );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *y );

 	dim_t  m       = bli_obj_vector_dim( *z );
 	dim_t  b_n     = bli_obj_vector_dim( *y );
--- a/testsuite/src/test_dotxf.c
+++ b/testsuite/src/test_dotxf.c
@@ -286,8 +286,8 @@ void libblis_test_dotxf_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *y );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );
+	num_t  dt      = bli_obj_dt( *y );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *y );

 	dim_t  b_n     = bli_obj_vector_dim( *y );

--- a/testsuite/src/test_dotxv.c
+++ b/testsuite/src/test_dotxv.c
@@ -269,7 +269,7 @@ void libblis_test_dotxv_check
       double*        resid
     )
 {
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *y );

 	obj_t  rho_r, rho_i;
 	obj_t  norm_x_r, norm_xy_r;
--- a/testsuite/src/test_gemm.c
+++ b/testsuite/src/test_gemm.c
@@ -280,8 +280,8 @@ void libblis_test_gemm_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *c );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *c );
+	num_t  dt      = bli_obj_dt( *c );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *c );

 	dim_t  m       = bli_obj_length( *c );
 	dim_t  n       = bli_obj_width( *c );
--- a/testsuite/src/test_gemm_ukr.c
+++ b/testsuite/src/test_gemm_ukr.c
@@ -352,8 +352,8 @@ void libblis_test_gemm_ukr_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *c );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *c );
+	num_t  dt      = bli_obj_dt( *c );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *c );

 	dim_t  m       = bli_obj_length( *c );
 	dim_t  n       = bli_obj_width( *c );
--- a/testsuite/src/test_gemmtrsm_ukr.c
+++ b/testsuite/src/test_gemmtrsm_ukr.c
@@ -428,8 +428,8 @@ void libblis_test_gemmtrsm_ukr_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *b11 );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *b11 );
+	num_t  dt      = bli_obj_dt( *b11 );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *b11 );

 	dim_t  m       = bli_obj_length( *b11 );
 	dim_t  n       = bli_obj_width( *b11 );
--- a/testsuite/src/test_gemv.c
+++ b/testsuite/src/test_gemv.c
@@ -283,8 +283,8 @@ void libblis_test_gemv_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *y );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );
+	num_t  dt      = bli_obj_dt( *y );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *y );

 	conj_t conja   = bli_obj_conj_status( *a );

--- a/testsuite/src/test_ger.c
+++ b/testsuite/src/test_ger.c
@@ -267,8 +267,8 @@ void libblis_test_ger_check
       double*        resid
     )
 {
-	num_t  dt      = bli_obj_datatype( *a );
-	num_t  dt_real = bli_obj_datatype_proj_to_real( *a );
+	num_t  dt      = bli_obj_dt( *a );
+	num_t  dt_real = bli_obj_dt_proj_to_real( *a );

 	dim_t  m_a     = bli_obj_length( *a );
 	dim_t  n_a     = bli_obj_width( *a );
--- a/Show More
+++ b/Show More