diff --git a/frame/1m/packm/bli_packm_init.c b/frame/1m/packm/bli_packm_init.c index 2e17632d9..8441fe6fd 100644 --- a/frame/1m/packm/bli_packm_init.c +++ b/frame/1m/packm/bli_packm_init.c @@ -95,6 +95,14 @@ void bli_packm_init( obj_t* a, return; } + // If the object is marked as being filled with zeros, then we can skip + // the packm operation entirely and alias. + if ( bli_obj_is_zeros( *a ) ) + { + bli_obj_alias_to( *a, *p ); + return; + } + // Now, if we are not skipping the pack operation, then the only question // left is whether we are to typecast matrix a before packing. if ( bli_obj_datatype( *a ) != bli_obj_target_datatype( *a ) ) diff --git a/frame/1m/packm/bli_packm_int.c b/frame/1m/packm/bli_packm_int.c index bd6b8ef73..f048b5fa0 100644 --- a/frame/1m/packm/bli_packm_int.c +++ b/frame/1m/packm/bli_packm_int.c @@ -102,34 +102,12 @@ void bli_packm_int( obj_t* a, return; } -/* - // The value for kappa we use will depend on whether the scalar - // attached to A has a nonzero imaginary component. If it does, - // then we will apply the scalar during packing to facilitate - // implementing complex domain micro-kernels in terms of their - // real domain counterparts. (In the aforementioned situation, - // applying a real scalar is easy, but applying a complex one is - // harder, so we avoid the need altogether with the code below.) - if ( bli_obj_scalar_has_nonzero_imag( a ) ) + // If the object is marked as being filled with zeros, then we can skip + // the packm operation entirely. + if ( bli_obj_is_zeros( *a ) ) { - bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - - // Detach the scalar. - bli_obj_scalar_detach( a, &kappa ); - - // Reset the attached scalar (to 1.0). - bli_obj_scalar_reset( a ); - - kappa_p = κ + return; } - else - { - // If the internal scalar of A has only a real component, then - // we will apply it later (in the micro-kernel), and so we will - // use BLIS_ONE to indicate no scaling during packing. - kappa_p = &BLIS_ONE; - } -*/ // Extract the variant number and implementation type. diff --git a/frame/3/gemm/bli_gemm_int.c b/frame/3/gemm/bli_gemm_int.c index bc0a35472..f6fc6d284 100644 --- a/frame/3/gemm/bli_gemm_int.c +++ b/frame/3/gemm/bli_gemm_int.c @@ -81,6 +81,15 @@ void bli_gemm_int( obj_t* alpha, return; } + // If A or B is marked as being filled with zeros, scale C by beta and + // return early. + if ( bli_obj_is_zeros( *a ) || + bli_obj_is_zeros( *b ) ) + { + bli_scalm( beta, c ); + return; + } + // Alias A and B in case we need to update attached scalars. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local );