From a86db60ee270cdeb745ae7cf68f9e0becc9f522d Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Mon, 23 Feb 2015 18:42:39 -0600 Subject: [PATCH] Extensive renaming of 3m/4m-related files, symbols. Details: - Renamed all remaining 3m/4m packing files and symbols to 3mi/4mi ('i' for "interleaved"). Similar changes to 3M/4M macros. - Renamed all 3m/4m files and functions to 3m1/4m1. - Whitespace changes. --- frame/1m/packm/bli_packm.h | 8 +- frame/1m/packm/bli_packm_blk_var1.c | 2 - frame/1m/packm/bli_packm_blk_var2.c | 8 +- frame/1m/packm/bli_packm_cntl.c | 28 +-- ...bli_packm_cxk_3m.c => bli_packm_cxk_3mi.c} | 38 +-- ...bli_packm_cxk_3m.h => bli_packm_cxk_3mi.h} | 4 +- ...bli_packm_cxk_4m.c => bli_packm_cxk_4mi.c} | 38 +-- ...bli_packm_cxk_4m.h => bli_packm_cxk_4mi.h} | 4 +- frame/1m/packm/bli_packm_init.c | 16 +- ...ruc_cxk_3m.c => bli_packm_struc_cxk_3mi.c} | 54 ++-- ...ruc_cxk_3m.h => bli_packm_struc_cxk_3mi.h} | 6 +- ...ruc_cxk_4m.c => bli_packm_struc_cxk_4mi.c} | 54 ++-- ...ruc_cxk_4m.h => bli_packm_struc_cxk_4mi.h} | 6 +- ...m_ref_cxk_3m.c => bli_packm_ref_cxk_3mi.c} | 18 +- ...m_ref_cxk_4m.h => bli_packm_ref_cxk_3mi.h} | 18 +- ...m_ref_cxk_4m.c => bli_packm_ref_cxk_4mi.c} | 18 +- ...m_ref_cxk_3m.h => bli_packm_ref_cxk_4mi.h} | 18 +- .../{3m/bli_gemm3m.c => 3m1/bli_gemm3m1.c} | 16 +- .../bli_syr2k3m.h => gemm/3m1/bli_gemm3m1.h} | 11 +- .../bli_gemm3m1_cntl.c} | 124 +++++----- .../bli_gemm3m1_cntl.h} | 4 +- .../3m1/bli_gemm3m1_entry.c} | 8 +- .../3m1/bli_gemm3m1_entry.h} | 2 +- .../ukernels/bli_gemm3m1_ukr_ref.c} | 2 +- .../ukernels/bli_gemm3m1_ukr_ref.h} | 2 +- frame/3/gemm/3mh/bli_gemm3mh.c | 8 +- frame/3/gemm/3mh/bli_gemm3mh.h | 8 +- frame/3/gemm/3mh/bli_gemm3mh_entry.c | 8 +- frame/3/gemm/3mh/bli_gemm3mh_entry.h | 8 +- .../{4m/bli_gemm4m.c => 4m1/bli_gemm4m1.c} | 14 +- .../bli_syr2k4m.h => gemm/4m1/bli_gemm4m1.h} | 11 +- .../bli_gemm4m1_cntl.c} | 124 +++++----- .../bli_gemm4m1_cntl.h} | 4 +- .../4m1/bli_gemm4m1_entry.c} | 8 +- .../4m1/bli_gemm4m1_entry.h} | 2 +- .../ukernels/bli_gemm4m1_ukr_ref.c} | 10 +- .../ukernels/bli_gemm4m1_ukr_ref.h} | 2 +- frame/3/gemm/4mb/bli_gemm4mb.c | 8 +- frame/3/gemm/4mb/bli_gemm4mb.h | 8 +- frame/3/gemm/4mb/bli_gemm4mb_entry.c | 8 +- frame/3/gemm/4mb/bli_gemm4mb_entry.h | 8 +- frame/3/gemm/4mh/bli_gemm4mh.c | 8 +- frame/3/gemm/4mh/bli_gemm4mh.h | 8 +- frame/3/gemm/4mh/bli_gemm4mh_entry.c | 8 +- frame/3/gemm/4mh/bli_gemm4mh_entry.h | 8 +- frame/3/gemm/bli_gemm.c | 4 +- frame/3/gemm/bli_gemm.h | 6 +- frame/3/gemm/bli_gemm_query.c | 20 +- frame/3/hemm/3m/bli_hemm3m.h | 69 ------ frame/3/hemm/3m/bli_hemm3m_entry.h | 41 ---- .../{3m/bli_hemm3m.c => 3m1/bli_hemm3m1.c} | 18 +- .../bli_trmm33m.h => hemm/3m1/bli_hemm3m1.h} | 10 +- .../3m1/bli_hemm3m1_entry.c} | 8 +- .../3m1/bli_hemm3m1_entry.h} | 2 +- frame/3/hemm/3mh/bli_hemm3mh.c | 10 +- frame/3/hemm/3mh/bli_hemm3mh.h | 10 +- frame/3/hemm/3mh/bli_hemm3mh_entry.c | 10 +- frame/3/hemm/3mh/bli_hemm3mh_entry.h | 10 +- frame/3/hemm/4m/bli_hemm4m_entry.h | 41 ---- .../{4m/bli_hemm4m.c => 4m1/bli_hemm4m1.c} | 16 +- .../bli_trmm34m.h => hemm/4m1/bli_hemm4m1.h} | 10 +- .../4m1/bli_hemm4m1_entry.c} | 8 +- .../4m1/bli_hemm4m1_entry.h} | 2 +- frame/3/hemm/4mh/bli_hemm4mh.c | 10 +- frame/3/hemm/4mh/bli_hemm4mh.h | 10 +- frame/3/hemm/4mh/bli_hemm4mh_entry.c | 10 +- frame/3/hemm/4mh/bli_hemm4mh_entry.h | 10 +- frame/3/hemm/bli_hemm.c | 4 +- frame/3/hemm/bli_hemm.h | 4 +- .../{3m/bli_her2k3m.c => 3m1/bli_her2k3m1.c} | 16 +- .../{3m/bli_her2k3m.h => 3m1/bli_her2k3m1.h} | 14 +- .../3m1/bli_her2k3m1_entry.c} | 16 +- .../3m1/bli_her2k3m1_entry.h} | 10 +- frame/3/her2k/3mh/bli_her2k3mh.c | 8 +- frame/3/her2k/3mh/bli_her2k3mh.h | 8 +- frame/3/her2k/3mh/bli_her2k3mh_entry.c | 8 +- frame/3/her2k/3mh/bli_her2k3mh_entry.h | 8 +- .../{4m/bli_her2k4m.c => 4m1/bli_her2k4m1.c} | 14 +- .../{4m/bli_her2k4m.h => 4m1/bli_her2k4m1.h} | 14 +- .../4m1/bli_her2k4m1_entry.c} | 16 +- .../4m1/bli_her2k4m1_entry.h} | 10 +- frame/3/her2k/4mh/bli_her2k4mh.c | 8 +- frame/3/her2k/4mh/bli_her2k4mh.h | 8 +- frame/3/her2k/4mh/bli_her2k4mh_entry.c | 8 +- frame/3/her2k/4mh/bli_her2k4mh_entry.h | 8 +- frame/3/her2k/bli_her2k.c | 4 +- frame/3/her2k/bli_her2k.h | 4 +- frame/3/herk/3m/bli_herk3m_entry.c | 47 ---- frame/3/herk/3m/bli_herk3m_entry.h | 39 --- .../{3m/bli_herk3m.c => 3m1/bli_herk3m1.c} | 14 +- .../{3m/bli_herk3m.h => 3m1/bli_herk3m1.h} | 12 +- .../3m1/bli_herk3m1_entry.c} | 9 +- .../3m1/bli_herk3m1_entry.h} | 3 +- .../old/bli_herk3m1_cntl.h} | 4 +- frame/3/herk/3mh/bli_herk3mh.c | 6 +- frame/3/herk/3mh/bli_herk3mh.h | 6 +- frame/3/herk/3mh/bli_herk3mh_entry.c | 6 +- frame/3/herk/3mh/bli_herk3mh_entry.h | 6 +- frame/3/herk/4m/bli_herk4m_entry.c | 47 ---- frame/3/herk/4m/bli_herk4m_entry.h | 39 --- .../{4m/bli_herk4m.c => 4m1/bli_herk4m1.c} | 12 +- .../{4m/bli_herk4m.h => 4m1/bli_herk4m1.h} | 12 +- .../4m1/bli_herk4m1_entry.c} | 9 +- .../4m1/bli_herk4m1_entry.h} | 3 +- .../old/bli_herk4m1_cntl.h} | 4 +- frame/3/herk/4mh/bli_herk4mh.c | 6 +- frame/3/herk/4mh/bli_herk4mh.h | 6 +- frame/3/herk/4mh/bli_herk4mh_entry.c | 6 +- frame/3/herk/4mh/bli_herk4mh_entry.h | 6 +- frame/3/herk/bli_herk.c | 4 +- frame/3/herk/bli_herk.h | 4 +- frame/3/symm/3m/bli_symm3m.h | 69 ------ frame/3/symm/3m/bli_symm3m_entry.c | 49 ---- frame/3/symm/3m/bli_symm3m_entry.h | 41 ---- .../{3m/bli_symm3m.c => 3m1/bli_symm3m1.c} | 18 +- .../bli_hemm4m.h => symm/3m1/bli_symm3m1.h} | 16 +- frame/3/symm/3m1/bli_symm3m1_entry.c | 49 ++++ frame/3/symm/3m1/bli_symm3m1_entry.h | 41 ++++ frame/3/symm/3mh/bli_symm3mh.c | 10 +- frame/3/symm/3mh/bli_symm3mh.h | 10 +- frame/3/symm/3mh/bli_symm3mh_entry.c | 10 +- frame/3/symm/3mh/bli_symm3mh_entry.h | 10 +- frame/3/symm/4m/bli_symm4m_entry.c | 49 ---- frame/3/symm/4m/bli_symm4m_entry.h | 41 ---- .../{4m/bli_symm4m.c => 4m1/bli_symm4m1.c} | 16 +- .../{4m/bli_symm4m.h => 4m1/bli_symm4m1.h} | 16 +- frame/3/symm/4m1/bli_symm4m1_entry.c | 49 ++++ frame/3/symm/4m1/bli_symm4m1_entry.h | 41 ++++ frame/3/symm/4mh/bli_symm4mh.c | 10 +- frame/3/symm/4mh/bli_symm4mh.h | 10 +- frame/3/symm/4mh/bli_symm4mh_entry.c | 10 +- frame/3/symm/4mh/bli_symm4mh_entry.h | 10 +- frame/3/symm/bli_symm.c | 4 +- frame/3/symm/bli_symm.h | 4 +- .../{3m/bli_syr2k3m.c => 3m1/bli_syr2k3m1.c} | 16 +- .../bli_gemm3m.h => syr2k/3m1/bli_syr2k3m1.h} | 19 +- frame/3/syr2k/3m1/bli_syr2k3m1_entry.c | 48 ++++ frame/3/syr2k/3m1/bli_syr2k3m1_entry.h | 40 +++ frame/3/syr2k/3mh/bli_syr2k3mh.c | 8 +- frame/3/syr2k/3mh/bli_syr2k3mh.h | 8 +- frame/3/syr2k/3mh/bli_syr2k3mh_entry.c | 8 +- frame/3/syr2k/3mh/bli_syr2k3mh_entry.h | 8 +- .../{4m/bli_syr2k4m.c => 4m1/bli_syr2k4m1.c} | 14 +- .../bli_gemm4m.h => syr2k/4m1/bli_syr2k4m1.h} | 19 +- frame/3/syr2k/4m1/bli_syr2k4m1_entry.c | 48 ++++ frame/3/syr2k/4m1/bli_syr2k4m1_entry.h | 40 +++ frame/3/syr2k/4mh/bli_syr2k4mh.c | 8 +- frame/3/syr2k/4mh/bli_syr2k4mh.h | 8 +- frame/3/syr2k/4mh/bli_syr2k4mh_entry.c | 8 +- frame/3/syr2k/4mh/bli_syr2k4mh_entry.h | 8 +- frame/3/syr2k/bli_syr2k.c | 4 +- frame/3/syr2k/bli_syr2k.h | 4 +- frame/3/syrk/3m/bli_syrk3m_entry.c | 47 ---- frame/3/syrk/3m/bli_syrk3m_entry.h | 39 --- .../{3m/bli_syrk3m.c => 3m1/bli_syrk3m1.c} | 14 +- .../{3m/bli_syrk3m.h => 3m1/bli_syrk3m1.h} | 12 +- frame/3/syrk/3m1/bli_syrk3m1_entry.c | 47 ++++ frame/3/syrk/3m1/bli_syrk3m1_entry.h | 39 +++ frame/3/syrk/3mh/bli_syrk3mh.c | 6 +- frame/3/syrk/3mh/bli_syrk3mh.h | 6 +- frame/3/syrk/3mh/bli_syrk3mh_entry.c | 6 +- frame/3/syrk/3mh/bli_syrk3mh_entry.h | 6 +- frame/3/syrk/4m/bli_syrk4m_entry.c | 47 ---- frame/3/syrk/4m/bli_syrk4m_entry.h | 39 --- .../{4m/bli_syrk4m.c => 4m1/bli_syrk4m1.c} | 12 +- .../{4m/bli_syrk4m.h => 4m1/bli_syrk4m1.h} | 12 +- frame/3/syrk/4m1/bli_syrk4m1_entry.c | 47 ++++ frame/3/syrk/4m1/bli_syrk4m1_entry.h | 39 +++ frame/3/syrk/4mh/bli_syrk4mh.c | 6 +- frame/3/syrk/4mh/bli_syrk4mh.h | 6 +- frame/3/syrk/4mh/bli_syrk4mh_entry.c | 6 +- frame/3/syrk/4mh/bli_syrk4mh_entry.h | 6 +- frame/3/syrk/bli_syrk.c | 4 +- frame/3/syrk/bli_syrk.h | 4 +- frame/3/trmm/3m/bli_trmm3m_entry.h | 39 --- frame/3/trmm/3m/old/bli_trmm3m_cntl.h | 36 --- .../{3m/bli_trmm3m.c => 3m1/bli_trmm3m1.c} | 14 +- .../{3m/bli_trmm3m.h => 3m1/bli_trmm3m1.h} | 12 +- .../bli_trmm3m1_entry.c} | 12 +- frame/3/trmm/3m1/bli_trmm3m1_entry.h | 39 +++ frame/3/trmm/3m1/old/bli_trmm3m1_cntl.h | 36 +++ frame/3/trmm/4m/bli_trmm4m_entry.h | 39 --- frame/3/trmm/4m/old/bli_trmm4m_cntl.h | 36 --- .../{4m/bli_trmm4m.c => 4m1/bli_trmm4m1.c} | 12 +- .../{4m/bli_trmm4m.h => 4m1/bli_trmm4m1.h} | 12 +- .../bli_trmm4m1_entry.c} | 12 +- frame/3/trmm/4m1/bli_trmm4m1_entry.h | 39 +++ frame/3/trmm/4m1/old/bli_trmm4m1_cntl.h | 36 +++ frame/3/trmm/bli_trmm.c | 4 +- frame/3/trmm/bli_trmm.h | 4 +- frame/3/trmm/bli_trmm_ll_ker_var2.c | 6 +- frame/3/trmm/bli_trmm_lu_ker_var2.c | 6 +- frame/3/trmm/bli_trmm_query.c | 12 +- frame/3/trmm/bli_trmm_rl_ker_var2.c | 6 +- frame/3/trmm/bli_trmm_ru_ker_var2.c | 6 +- .../{3m/bli_trmm33m.c => 3m1/bli_trmm33m1.c} | 18 +- frame/3/trmm3/3m1/bli_trmm33m1.h | 71 ++++++ .../3m1/bli_trmm33m1_entry.c} | 18 +- frame/3/trmm3/3m1/bli_trmm33m1_entry.h | 41 ++++ frame/3/trmm3/3mh/bli_trmm33mh.c | 10 +- frame/3/trmm3/3mh/bli_trmm33mh.h | 10 +- frame/3/trmm3/3mh/bli_trmm33mh_entry.c | 10 +- frame/3/trmm3/3mh/bli_trmm33mh_entry.h | 10 +- .../{4m/bli_trmm34m.c => 4m1/bli_trmm34m1.c} | 16 +- frame/3/trmm3/4m1/bli_trmm34m1.h | 71 ++++++ frame/3/trmm3/4m1/bli_trmm34m1_entry.c | 49 ++++ frame/3/trmm3/4m1/bli_trmm34m1_entry.h | 41 ++++ frame/3/trmm3/4mh/bli_trmm34mh.c | 10 +- frame/3/trmm3/4mh/bli_trmm34mh.h | 10 +- frame/3/trmm3/4mh/bli_trmm34mh_entry.c | 10 +- frame/3/trmm3/4mh/bli_trmm34mh_entry.h | 10 +- frame/3/trmm3/bli_trmm3.c | 4 +- frame/3/trmm3/bli_trmm3.h | 4 +- frame/3/trsm/3m/bli_trsm3m_cntl.h | 36 --- frame/3/trsm/3m/bli_trsm3m_entry.c | 49 ---- frame/3/trsm/3m/bli_trsm3m_entry.h | 39 --- .../{3m/bli_trsm3m.c => 3m1/bli_trsm3m1.c} | 14 +- .../{3m/bli_trsm3m.h => 3m1/bli_trsm3m1.h} | 22 +- .../bli_trsm3m1_cntl.c} | 190 +++++++------- frame/3/trsm/3m1/bli_trsm3m1_cntl.h | 36 +++ frame/3/trsm/3m1/bli_trsm3m1_entry.c | 49 ++++ frame/3/trsm/3m1/bli_trsm3m1_entry.h | 39 +++ .../ukernels/bli_gemmtrsm3m1_l_ukr_ref.c} | 14 +- .../ukernels/bli_gemmtrsm3m1_l_ukr_ref.h} | 2 +- .../ukernels/bli_gemmtrsm3m1_u_ukr_ref.c} | 2 +- .../ukernels/bli_gemmtrsm3m1_u_ukr_ref.h} | 2 +- .../ukernels/bli_trsm3m1_l_ukr_ref.c} | 2 +- .../ukernels/bli_trsm3m1_l_ukr_ref.h} | 2 +- .../ukernels/bli_trsm3m1_u_ukr_ref.c} | 2 +- .../ukernels/bli_trsm3m1_u_ukr_ref.h} | 2 +- frame/3/trsm/4m/bli_trsm4m_cntl.h | 36 --- frame/3/trsm/4m/bli_trsm4m_entry.c | 49 ---- frame/3/trsm/4m/bli_trsm4m_entry.h | 39 --- .../{4m/bli_trsm4m.c => 4m1/bli_trsm4m1.c} | 12 +- .../{4m/bli_trsm4m.h => 4m1/bli_trsm4m1.h} | 22 +- .../bli_trsm4m1_cntl.c} | 190 +++++++------- frame/3/trsm/4m1/bli_trsm4m1_cntl.h | 36 +++ frame/3/trsm/4m1/bli_trsm4m1_entry.c | 49 ++++ frame/3/trsm/4m1/bli_trsm4m1_entry.h | 39 +++ .../ukernels/bli_gemmtrsm4m1_l_ukr_ref.c} | 2 +- .../ukernels/bli_gemmtrsm4m1_l_ukr_ref.h} | 2 +- .../ukernels/bli_gemmtrsm4m1_u_ukr_ref.c} | 10 +- .../ukernels/bli_gemmtrsm4m1_u_ukr_ref.h} | 2 +- .../ukernels/bli_trsm4m1_l_ukr_ref.c} | 2 +- .../ukernels/bli_trsm4m1_l_ukr_ref.h} | 2 +- .../ukernels/bli_trsm4m1_u_ukr_ref.c} | 2 +- .../ukernels/bli_trsm4m1_u_ukr_ref.h} | 2 +- frame/3/trsm/bli_trsm.c | 4 +- frame/3/trsm/bli_trsm.h | 4 +- frame/3/trsm/bli_trsm_ll_ker_var2.c | 6 +- frame/3/trsm/bli_trsm_lu_ker_var2.c | 6 +- frame/3/trsm/bli_trsm_query.c | 78 +++--- frame/3/trsm/bli_trsm_rl_ker_var2.c | 8 +- frame/3/trsm/bli_trsm_ru_ker_var2.c | 8 +- frame/base/bli_info.c | 56 ++--- frame/base/induced/bli_3m.h | 49 ---- frame/base/induced/{bli_3m.c => bli_3m1.c} | 46 ++-- .../induced/bli_3m1.h} | 24 +- frame/base/induced/bli_4m.c | 50 ++-- frame/base/induced/bli_4m.h | 24 +- frame/base/induced/bli_xm.c | 8 +- frame/cntl/bli_cntl_init.c | 40 +-- frame/include/bli_kernel_3m1_macro_defs.h | 107 ++++++++ frame/include/bli_kernel_3m_macro_defs.h | 201 --------------- frame/include/bli_kernel_3mh_macro_defs.h | 67 +++++ frame/include/bli_kernel_3mi_macro_defs.h | 137 +++++++++++ frame/include/bli_kernel_4m1_macro_defs.h | 107 ++++++++ frame/include/bli_kernel_4m_macro_defs.h | 218 ---------------- frame/include/bli_kernel_4mb_macro_defs.h | 67 +++++ frame/include/bli_kernel_4mh_macro_defs.h | 67 +++++ frame/include/bli_kernel_4mi_macro_defs.h | 137 +++++++++++ frame/include/bli_kernel_macro_defs.h | 12 +- frame/include/bli_kernel_post_macro_defs.h | 40 +-- frame/include/bli_kernel_pre_macro_defs.h | 232 +++++++++--------- frame/include/bli_kernel_prototypes.h | 60 ++--- frame/include/bli_kernel_rih_macro_defs.h | 43 +--- frame/include/blis.h | 15 +- test/3m4m/Makefile | 82 +++---- test/3m4m/runme.sh | 9 +- test/3m4m/test_gemm.c | 16 +- testsuite/input.general | 6 +- testsuite/src/test_libblis.c | 20 +- testsuite/src/test_libblis.h | 4 +- 283 files changed, 3546 insertions(+), 3303 deletions(-) rename frame/1m/packm/{bli_packm_cxk_3m.c => bli_packm_cxk_3mi.c} (91%) rename frame/1m/packm/{bli_packm_cxk_3m.h => bli_packm_cxk_3mi.h} (96%) rename frame/1m/packm/{bli_packm_cxk_4m.c => bli_packm_cxk_4mi.c} (91%) rename frame/1m/packm/{bli_packm_cxk_4m.h => bli_packm_cxk_4mi.h} (96%) rename frame/1m/packm/{bli_packm_struc_cxk_3m.c => bli_packm_struc_cxk_3mi.c} (95%) rename frame/1m/packm/{bli_packm_struc_cxk_3m.h => bli_packm_struc_cxk_3mi.h} (97%) rename frame/1m/packm/{bli_packm_struc_cxk_4m.c => bli_packm_struc_cxk_4mi.c} (95%) rename frame/1m/packm/{bli_packm_struc_cxk_4m.h => bli_packm_struc_cxk_4mi.h} (97%) rename frame/1m/packm/ukernels/{bli_packm_ref_cxk_3m.c => bli_packm_ref_cxk_3mi.c} (99%) rename frame/1m/packm/ukernels/{bli_packm_ref_cxk_4m.h => bli_packm_ref_cxk_3mi.h} (83%) rename frame/1m/packm/ukernels/{bli_packm_ref_cxk_4m.c => bli_packm_ref_cxk_4mi.c} (99%) rename frame/1m/packm/ukernels/{bli_packm_ref_cxk_3m.h => bli_packm_ref_cxk_4mi.h} (85%) rename frame/3/gemm/{3m/bli_gemm3m.c => 3m1/bli_gemm3m1.c} (91%) rename frame/3/{syr2k/3m/bli_syr2k3m.h => gemm/3m1/bli_gemm3m1.h} (92%) rename frame/3/gemm/{3m/bli_gemm3m_cntl.c => 3m1/bli_gemm3m1_cntl.c} (73%) rename frame/3/gemm/{3m/bli_gemm3m_cntl.h => 3m1/bli_gemm3m1_cntl.h} (95%) rename frame/3/{her2k/4m/bli_her2k4m_entry.c => gemm/3m1/bli_gemm3m1_entry.c} (92%) rename frame/3/{her2k/3m/bli_her2k3m_entry.h => gemm/3m1/bli_gemm3m1_entry.h} (97%) rename frame/3/gemm/{3m/ukernels/bli_gemm3m_ukr_ref.c => 3m1/ukernels/bli_gemm3m1_ukr_ref.c} (99%) rename frame/3/gemm/{3m/ukernels/bli_gemm3m_ukr_ref.h => 3m1/ukernels/bli_gemm3m1_ukr_ref.h} (98%) rename frame/3/gemm/{4m/bli_gemm4m.c => 4m1/bli_gemm4m1.c} (93%) rename frame/3/{syr2k/4m/bli_syr2k4m.h => gemm/4m1/bli_gemm4m1.h} (92%) rename frame/3/gemm/{4m/bli_gemm4m_cntl.c => 4m1/bli_gemm4m1_cntl.c} (73%) rename frame/3/gemm/{4m/bli_gemm4m_cntl.h => 4m1/bli_gemm4m1_cntl.h} (95%) rename frame/3/{her2k/3m/bli_her2k3m_entry.c => gemm/4m1/bli_gemm4m1_entry.c} (92%) rename frame/3/{her2k/4m/bli_her2k4m_entry.h => gemm/4m1/bli_gemm4m1_entry.h} (97%) rename frame/3/gemm/{4m/ukernels/bli_gemm4m_ukr_ref.c => 4m1/ukernels/bli_gemm4m1_ukr_ref.c} (96%) rename frame/3/gemm/{4m/ukernels/bli_gemm4m_ukr_ref.h => 4m1/ukernels/bli_gemm4m1_ukr_ref.h} (98%) delete mode 100644 frame/3/hemm/3m/bli_hemm3m.h delete mode 100644 frame/3/hemm/3m/bli_hemm3m_entry.h rename frame/3/hemm/{3m/bli_hemm3m.c => 3m1/bli_hemm3m1.c} (90%) rename frame/3/{trmm3/3m/bli_trmm33m.h => hemm/3m1/bli_hemm3m1.h} (93%) rename frame/3/{trmm3/3m/bli_trmm33m_entry.c => hemm/3m1/bli_hemm3m1_entry.c} (92%) rename frame/3/{trmm3/3m/bli_trmm33m_entry.h => hemm/3m1/bli_hemm3m1_entry.h} (98%) delete mode 100644 frame/3/hemm/4m/bli_hemm4m_entry.h rename frame/3/hemm/{4m/bli_hemm4m.c => 4m1/bli_hemm4m1.c} (92%) rename frame/3/{trmm3/4m/bli_trmm34m.h => hemm/4m1/bli_hemm4m1.h} (93%) rename frame/3/{trmm3/4m/bli_trmm34m_entry.c => hemm/4m1/bli_hemm4m1_entry.c} (92%) rename frame/3/{trmm3/4m/bli_trmm34m_entry.h => hemm/4m1/bli_hemm4m1_entry.h} (98%) rename frame/3/her2k/{3m/bli_her2k3m.c => 3m1/bli_her2k3m1.c} (91%) rename frame/3/her2k/{3m/bli_her2k3m.h => 3m1/bli_her2k3m1.h} (91%) rename frame/3/{gemm/4m/bli_gemm4m_entry.c => her2k/3m1/bli_her2k3m1_entry.c} (85%) rename frame/3/{gemm/4m/bli_gemm4m_entry.h => her2k/3m1/bli_her2k3m1_entry.h} (89%) rename frame/3/her2k/{4m/bli_her2k4m.c => 4m1/bli_her2k4m1.c} (93%) rename frame/3/her2k/{4m/bli_her2k4m.h => 4m1/bli_her2k4m1.h} (91%) rename frame/3/{gemm/3m/bli_gemm3m_entry.c => her2k/4m1/bli_her2k4m1_entry.c} (85%) rename frame/3/{gemm/3m/bli_gemm3m_entry.h => her2k/4m1/bli_her2k4m1_entry.h} (89%) delete mode 100644 frame/3/herk/3m/bli_herk3m_entry.c delete mode 100644 frame/3/herk/3m/bli_herk3m_entry.h rename frame/3/herk/{3m/bli_herk3m.c => 3m1/bli_herk3m1.c} (92%) rename frame/3/herk/{3m/bli_herk3m.h => 3m1/bli_herk3m1.h} (92%) rename frame/3/{syr2k/3m/bli_syr2k3m_entry.c => herk/3m1/bli_herk3m1_entry.c} (91%) rename frame/3/{syr2k/3m/bli_syr2k3m_entry.h => herk/3m1/bli_herk3m1_entry.h} (96%) rename frame/3/herk/{4m/old/bli_herk4m_cntl.h => 3m1/old/bli_herk3m1_cntl.h} (95%) delete mode 100644 frame/3/herk/4m/bli_herk4m_entry.c delete mode 100644 frame/3/herk/4m/bli_herk4m_entry.h rename frame/3/herk/{4m/bli_herk4m.c => 4m1/bli_herk4m1.c} (94%) rename frame/3/herk/{4m/bli_herk4m.h => 4m1/bli_herk4m1.h} (92%) rename frame/3/{syr2k/4m/bli_syr2k4m_entry.c => herk/4m1/bli_herk4m1_entry.c} (91%) rename frame/3/{syr2k/4m/bli_syr2k4m_entry.h => herk/4m1/bli_herk4m1_entry.h} (96%) rename frame/3/herk/{3m/old/bli_herk3m_cntl.h => 4m1/old/bli_herk4m1_cntl.h} (95%) delete mode 100644 frame/3/symm/3m/bli_symm3m.h delete mode 100644 frame/3/symm/3m/bli_symm3m_entry.c delete mode 100644 frame/3/symm/3m/bli_symm3m_entry.h rename frame/3/symm/{3m/bli_symm3m.c => 3m1/bli_symm3m1.c} (91%) rename frame/3/{hemm/4m/bli_hemm4m.h => symm/3m1/bli_symm3m1.h} (90%) create mode 100644 frame/3/symm/3m1/bli_symm3m1_entry.c create mode 100644 frame/3/symm/3m1/bli_symm3m1_entry.h delete mode 100644 frame/3/symm/4m/bli_symm4m_entry.c delete mode 100644 frame/3/symm/4m/bli_symm4m_entry.h rename frame/3/symm/{4m/bli_symm4m.c => 4m1/bli_symm4m1.c} (92%) rename frame/3/symm/{4m/bli_symm4m.h => 4m1/bli_symm4m1.h} (90%) create mode 100644 frame/3/symm/4m1/bli_symm4m1_entry.c create mode 100644 frame/3/symm/4m1/bli_symm4m1_entry.h rename frame/3/syr2k/{3m/bli_syr2k3m.c => 3m1/bli_syr2k3m1.c} (91%) rename frame/3/{gemm/3m/bli_gemm3m.h => syr2k/3m1/bli_syr2k3m1.h} (88%) create mode 100644 frame/3/syr2k/3m1/bli_syr2k3m1_entry.c create mode 100644 frame/3/syr2k/3m1/bli_syr2k3m1_entry.h rename frame/3/syr2k/{4m/bli_syr2k4m.c => 4m1/bli_syr2k4m1.c} (93%) rename frame/3/{gemm/4m/bli_gemm4m.h => syr2k/4m1/bli_syr2k4m1.h} (88%) create mode 100644 frame/3/syr2k/4m1/bli_syr2k4m1_entry.c create mode 100644 frame/3/syr2k/4m1/bli_syr2k4m1_entry.h delete mode 100644 frame/3/syrk/3m/bli_syrk3m_entry.c delete mode 100644 frame/3/syrk/3m/bli_syrk3m_entry.h rename frame/3/syrk/{3m/bli_syrk3m.c => 3m1/bli_syrk3m1.c} (91%) rename frame/3/syrk/{3m/bli_syrk3m.h => 3m1/bli_syrk3m1.h} (92%) create mode 100644 frame/3/syrk/3m1/bli_syrk3m1_entry.c create mode 100644 frame/3/syrk/3m1/bli_syrk3m1_entry.h delete mode 100644 frame/3/syrk/4m/bli_syrk4m_entry.c delete mode 100644 frame/3/syrk/4m/bli_syrk4m_entry.h rename frame/3/syrk/{4m/bli_syrk4m.c => 4m1/bli_syrk4m1.c} (93%) rename frame/3/syrk/{4m/bli_syrk4m.h => 4m1/bli_syrk4m1.h} (92%) create mode 100644 frame/3/syrk/4m1/bli_syrk4m1_entry.c create mode 100644 frame/3/syrk/4m1/bli_syrk4m1_entry.h delete mode 100644 frame/3/trmm/3m/bli_trmm3m_entry.h delete mode 100644 frame/3/trmm/3m/old/bli_trmm3m_cntl.h rename frame/3/trmm/{3m/bli_trmm3m.c => 3m1/bli_trmm3m1.c} (91%) rename frame/3/trmm/{3m/bli_trmm3m.h => 3m1/bli_trmm3m1.h} (92%) rename frame/3/trmm/{3m/bli_trmm3m_entry.c => 3m1/bli_trmm3m1_entry.c} (89%) create mode 100644 frame/3/trmm/3m1/bli_trmm3m1_entry.h create mode 100644 frame/3/trmm/3m1/old/bli_trmm3m1_cntl.h delete mode 100644 frame/3/trmm/4m/bli_trmm4m_entry.h delete mode 100644 frame/3/trmm/4m/old/bli_trmm4m_cntl.h rename frame/3/trmm/{4m/bli_trmm4m.c => 4m1/bli_trmm4m1.c} (93%) rename frame/3/trmm/{4m/bli_trmm4m.h => 4m1/bli_trmm4m1.h} (92%) rename frame/3/trmm/{4m/bli_trmm4m_entry.c => 4m1/bli_trmm4m1_entry.c} (89%) create mode 100644 frame/3/trmm/4m1/bli_trmm4m1_entry.h create mode 100644 frame/3/trmm/4m1/old/bli_trmm4m1_cntl.h rename frame/3/trmm3/{3m/bli_trmm33m.c => 3m1/bli_trmm33m1.c} (91%) create mode 100644 frame/3/trmm3/3m1/bli_trmm33m1.h rename frame/3/{hemm/4m/bli_hemm4m_entry.c => trmm3/3m1/bli_trmm33m1_entry.c} (83%) create mode 100644 frame/3/trmm3/3m1/bli_trmm33m1_entry.h rename frame/3/trmm3/{4m/bli_trmm34m.c => 4m1/bli_trmm34m1.c} (92%) create mode 100644 frame/3/trmm3/4m1/bli_trmm34m1.h create mode 100644 frame/3/trmm3/4m1/bli_trmm34m1_entry.c create mode 100644 frame/3/trmm3/4m1/bli_trmm34m1_entry.h delete mode 100644 frame/3/trsm/3m/bli_trsm3m_cntl.h delete mode 100644 frame/3/trsm/3m/bli_trsm3m_entry.c delete mode 100644 frame/3/trsm/3m/bli_trsm3m_entry.h rename frame/3/trsm/{3m/bli_trsm3m.c => 3m1/bli_trsm3m1.c} (91%) rename frame/3/trsm/{3m/bli_trsm3m.h => 3m1/bli_trsm3m1.h} (86%) rename frame/3/trsm/{3m/bli_trsm3m_cntl.c => 3m1/bli_trsm3m1_cntl.c} (65%) create mode 100644 frame/3/trsm/3m1/bli_trsm3m1_cntl.h create mode 100644 frame/3/trsm/3m1/bli_trsm3m1_entry.c create mode 100644 frame/3/trsm/3m1/bli_trsm3m1_entry.h rename frame/3/trsm/{3m/ukernels/bli_gemmtrsm3m_l_ukr_ref.c => 3m1/ukernels/bli_gemmtrsm3m1_l_ukr_ref.c} (93%) rename frame/3/trsm/{4m/ukernels/bli_gemmtrsm4m_l_ukr_ref.h => 3m1/ukernels/bli_gemmtrsm3m1_l_ukr_ref.h} (97%) rename frame/3/trsm/{3m/ukernels/bli_gemmtrsm3m_u_ukr_ref.c => 3m1/ukernels/bli_gemmtrsm3m1_u_ukr_ref.c} (98%) rename frame/3/trsm/{4m/ukernels/bli_gemmtrsm4m_u_ukr_ref.h => 3m1/ukernels/bli_gemmtrsm3m1_u_ukr_ref.h} (97%) rename frame/3/trsm/{3m/ukernels/bli_trsm3m_l_ukr_ref.c => 3m1/ukernels/bli_trsm3m1_l_ukr_ref.c} (99%) rename frame/3/trsm/{3m/ukernels/bli_trsm3m_l_ukr_ref.h => 3m1/ukernels/bli_trsm3m1_l_ukr_ref.h} (97%) rename frame/3/trsm/{3m/ukernels/bli_trsm3m_u_ukr_ref.c => 3m1/ukernels/bli_trsm3m1_u_ukr_ref.c} (99%) rename frame/3/trsm/{4m/ukernels/bli_trsm4m_u_ukr_ref.h => 3m1/ukernels/bli_trsm3m1_u_ukr_ref.h} (97%) delete mode 100644 frame/3/trsm/4m/bli_trsm4m_cntl.h delete mode 100644 frame/3/trsm/4m/bli_trsm4m_entry.c delete mode 100644 frame/3/trsm/4m/bli_trsm4m_entry.h rename frame/3/trsm/{4m/bli_trsm4m.c => 4m1/bli_trsm4m1.c} (93%) rename frame/3/trsm/{4m/bli_trsm4m.h => 4m1/bli_trsm4m1.h} (86%) rename frame/3/trsm/{4m/bli_trsm4m_cntl.c => 4m1/bli_trsm4m1_cntl.c} (65%) create mode 100644 frame/3/trsm/4m1/bli_trsm4m1_cntl.h create mode 100644 frame/3/trsm/4m1/bli_trsm4m1_entry.c create mode 100644 frame/3/trsm/4m1/bli_trsm4m1_entry.h rename frame/3/trsm/{4m/ukernels/bli_gemmtrsm4m_l_ukr_ref.c => 4m1/ukernels/bli_gemmtrsm4m1_l_ukr_ref.c} (98%) rename frame/3/trsm/{3m/ukernels/bli_gemmtrsm3m_l_ukr_ref.h => 4m1/ukernels/bli_gemmtrsm4m1_l_ukr_ref.h} (97%) rename frame/3/trsm/{4m/ukernels/bli_gemmtrsm4m_u_ukr_ref.c => 4m1/ukernels/bli_gemmtrsm4m1_u_ukr_ref.c} (94%) rename frame/3/trsm/{3m/ukernels/bli_gemmtrsm3m_u_ukr_ref.h => 4m1/ukernels/bli_gemmtrsm4m1_u_ukr_ref.h} (97%) rename frame/3/trsm/{4m/ukernels/bli_trsm4m_l_ukr_ref.c => 4m1/ukernels/bli_trsm4m1_l_ukr_ref.c} (99%) rename frame/3/trsm/{3m/ukernels/bli_trsm3m_u_ukr_ref.h => 4m1/ukernels/bli_trsm4m1_l_ukr_ref.h} (97%) rename frame/3/trsm/{4m/ukernels/bli_trsm4m_u_ukr_ref.c => 4m1/ukernels/bli_trsm4m1_u_ukr_ref.c} (99%) rename frame/3/trsm/{4m/ukernels/bli_trsm4m_l_ukr_ref.h => 4m1/ukernels/bli_trsm4m1_u_ukr_ref.h} (97%) delete mode 100644 frame/base/induced/bli_3m.h rename frame/base/induced/{bli_3m.c => bli_3m1.c} (58%) rename frame/{3/hemm/3m/bli_hemm3m_entry.c => base/induced/bli_3m1.h} (79%) create mode 100644 frame/include/bli_kernel_3m1_macro_defs.h delete mode 100644 frame/include/bli_kernel_3m_macro_defs.h create mode 100644 frame/include/bli_kernel_3mh_macro_defs.h create mode 100644 frame/include/bli_kernel_3mi_macro_defs.h create mode 100644 frame/include/bli_kernel_4m1_macro_defs.h delete mode 100644 frame/include/bli_kernel_4m_macro_defs.h create mode 100644 frame/include/bli_kernel_4mb_macro_defs.h create mode 100644 frame/include/bli_kernel_4mh_macro_defs.h create mode 100644 frame/include/bli_kernel_4mi_macro_defs.h diff --git a/frame/1m/packm/bli_packm.h b/frame/1m/packm/bli_packm.h index 6fecc035c..95efe21f1 100644 --- a/frame/1m/packm/bli_packm.h +++ b/frame/1m/packm/bli_packm.h @@ -45,12 +45,12 @@ #include "bli_packm_blk_var2.h" #include "bli_packm_struc_cxk.h" -#include "bli_packm_struc_cxk_4m.h" -#include "bli_packm_struc_cxk_3m.h" +#include "bli_packm_struc_cxk_4mi.h" +#include "bli_packm_struc_cxk_3mi.h" #include "bli_packm_struc_cxk_rih.h" #include "bli_packm_cxk.h" -#include "bli_packm_cxk_4m.h" -#include "bli_packm_cxk_3m.h" +#include "bli_packm_cxk_4mi.h" +#include "bli_packm_cxk_3mi.h" #include "bli_packm_cxk_rih.h" diff --git a/frame/1m/packm/bli_packm_blk_var1.c b/frame/1m/packm/bli_packm_blk_var1.c index 8d2289dff..89f6b350f 100644 --- a/frame/1m/packm/bli_packm_blk_var1.c +++ b/frame/1m/packm/bli_packm_blk_var1.c @@ -61,8 +61,6 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,packm_blk_var1); extern func_t* packm_struc_cxk_kers; -extern func_t* packm_struc_cxk_4m_kers; -extern func_t* packm_struc_cxk_3m_kers; void bli_packm_blk_var1( obj_t* c, diff --git a/frame/1m/packm/bli_packm_blk_var2.c b/frame/1m/packm/bli_packm_blk_var2.c index 199a2ca4c..82ef69216 100644 --- a/frame/1m/packm/bli_packm_blk_var2.c +++ b/frame/1m/packm/bli_packm_blk_var2.c @@ -61,8 +61,8 @@ typedef void (*FUNCPTR_T)( //static FUNCPTR_T GENARRAY(ftypes,packm_blk_var2); extern func_t* packm_struc_cxk_kers; -extern func_t* packm_struc_cxk_4m_kers; -extern func_t* packm_struc_cxk_3m_kers; +extern func_t* packm_struc_cxk_4mi_kers; +extern func_t* packm_struc_cxk_3mi_kers; extern func_t* packm_struc_cxk_rih_kers; @@ -152,8 +152,8 @@ void bli_packm_blk_var2( obj_t* c, // Choose the correct func_t object based on the pack_t schema. - if ( bli_is_4mi_packed( schema ) ) packm_kers = packm_struc_cxk_4m_kers; - else if ( bli_is_3mi_packed( schema ) ) packm_kers = packm_struc_cxk_3m_kers; + if ( bli_is_4mi_packed( schema ) ) packm_kers = packm_struc_cxk_4mi_kers; + else if ( bli_is_3mi_packed( schema ) ) packm_kers = packm_struc_cxk_3mi_kers; else if ( bli_is_ro_packed( schema ) || bli_is_io_packed( schema ) || bli_is_rpi_packed( schema ) ) packm_kers = packm_struc_cxk_rih_kers; diff --git a/frame/1m/packm/bli_packm_cntl.c b/frame/1m/packm/bli_packm_cntl.c index 0147cc2f1..c1e39f5a6 100644 --- a/frame/1m/packm/bli_packm_cntl.c +++ b/frame/1m/packm/bli_packm_cntl.c @@ -38,8 +38,8 @@ blksz_t* packm_mult_ldim; blksz_t* packm_mult_nvec; func_t* packm_struc_cxk_kers; -func_t* packm_struc_cxk_4m_kers; -func_t* packm_struc_cxk_3m_kers; +func_t* packm_struc_cxk_4mi_kers; +func_t* packm_struc_cxk_3mi_kers; func_t* packm_struc_cxk_rih_kers; packm_t* packm_cntl_row; @@ -61,19 +61,19 @@ void bli_packm_cntl_init() bli_cpackm_struc_cxk, FALSE, bli_zpackm_struc_cxk, FALSE ); - packm_struc_cxk_4m_kers + packm_struc_cxk_4mi_kers = - bli_func_obj_create( NULL, FALSE, - NULL, FALSE, - bli_cpackm_struc_cxk_4m, FALSE, - bli_zpackm_struc_cxk_4m, FALSE ); + bli_func_obj_create( NULL, FALSE, + NULL, FALSE, + bli_cpackm_struc_cxk_4mi, FALSE, + bli_zpackm_struc_cxk_4mi, FALSE ); - packm_struc_cxk_3m_kers + packm_struc_cxk_3mi_kers = - bli_func_obj_create( NULL, FALSE, - NULL, FALSE, - bli_cpackm_struc_cxk_3m, FALSE, - bli_zpackm_struc_cxk_3m, FALSE ); + bli_func_obj_create( NULL, FALSE, + NULL, FALSE, + bli_cpackm_struc_cxk_3mi, FALSE, + bli_zpackm_struc_cxk_3mi, FALSE ); packm_struc_cxk_rih_kers = @@ -152,8 +152,8 @@ void bli_packm_cntl_init() void bli_packm_cntl_finalize() { bli_func_obj_free( packm_struc_cxk_kers ); - bli_func_obj_free( packm_struc_cxk_4m_kers ); - bli_func_obj_free( packm_struc_cxk_3m_kers ); + bli_func_obj_free( packm_struc_cxk_4mi_kers ); + bli_func_obj_free( packm_struc_cxk_3mi_kers ); bli_func_obj_free( packm_struc_cxk_rih_kers ); bli_cntl_obj_free( packm_cntl_row ); diff --git a/frame/1m/packm/bli_packm_cxk_3m.c b/frame/1m/packm/bli_packm_cxk_3mi.c similarity index 91% rename from frame/1m/packm/bli_packm_cxk_3m.c rename to frame/1m/packm/bli_packm_cxk_3mi.c index 94cd923ab..fde09c006 100644 --- a/frame/1m/packm/bli_packm_cxk_3m.c +++ b/frame/1m/packm/bli_packm_cxk_3mi.c @@ -59,8 +59,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 2 */ { - NULL, BLIS_CPACKM_2XK_3M_KERNEL, - NULL, BLIS_ZPACKM_2XK_3M_KERNEL, + NULL, BLIS_CPACKM_2XK_3MI_KERNEL, + NULL, BLIS_ZPACKM_2XK_3MI_KERNEL, }, /* micro-panel width = 3 */ { @@ -68,8 +68,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 4 */ { - NULL, BLIS_CPACKM_4XK_3M_KERNEL, - NULL, BLIS_ZPACKM_4XK_3M_KERNEL, + NULL, BLIS_CPACKM_4XK_3MI_KERNEL, + NULL, BLIS_ZPACKM_4XK_3MI_KERNEL, }, /* micro-panel width = 5 */ { @@ -77,8 +77,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 6 */ { - NULL, BLIS_CPACKM_6XK_3M_KERNEL, - NULL, BLIS_ZPACKM_6XK_3M_KERNEL, + NULL, BLIS_CPACKM_6XK_3MI_KERNEL, + NULL, BLIS_ZPACKM_6XK_3MI_KERNEL, }, /* micro-panel width = 7 */ { @@ -86,8 +86,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 8 */ { - NULL, BLIS_CPACKM_8XK_3M_KERNEL, - NULL, BLIS_ZPACKM_8XK_3M_KERNEL, + NULL, BLIS_CPACKM_8XK_3MI_KERNEL, + NULL, BLIS_ZPACKM_8XK_3MI_KERNEL, }, /* micro-panel width = 9 */ { @@ -95,8 +95,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 10 */ { - NULL, BLIS_CPACKM_10XK_3M_KERNEL, - NULL, BLIS_ZPACKM_10XK_3M_KERNEL, + NULL, BLIS_CPACKM_10XK_3MI_KERNEL, + NULL, BLIS_ZPACKM_10XK_3MI_KERNEL, }, /* micro-panel width = 11 */ { @@ -104,8 +104,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 12 */ { - NULL, BLIS_CPACKM_12XK_3M_KERNEL, - NULL, BLIS_ZPACKM_12XK_3M_KERNEL, + NULL, BLIS_CPACKM_12XK_3MI_KERNEL, + NULL, BLIS_ZPACKM_12XK_3MI_KERNEL, }, /* micro-panel width = 13 */ { @@ -113,8 +113,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 14 */ { - NULL, BLIS_CPACKM_14XK_3M_KERNEL, - NULL, BLIS_ZPACKM_14XK_3M_KERNEL, + NULL, BLIS_CPACKM_14XK_3MI_KERNEL, + NULL, BLIS_ZPACKM_14XK_3MI_KERNEL, }, /* micro-panel width = 15 */ { @@ -122,8 +122,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 16 */ { - NULL, BLIS_CPACKM_16XK_3M_KERNEL, - NULL, BLIS_ZPACKM_16XK_3M_KERNEL, + NULL, BLIS_CPACKM_16XK_3MI_KERNEL, + NULL, BLIS_ZPACKM_16XK_3MI_KERNEL, }, /* micro-panel width = 17 */ { @@ -180,9 +180,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = /* micro-panel width = 30 */ { NULL, - BLIS_CPACKM_30XK_3M_KERNEL, + BLIS_CPACKM_30XK_3MI_KERNEL, NULL, - BLIS_ZPACKM_30XK_3M_KERNEL, + BLIS_ZPACKM_30XK_3MI_KERNEL, }, /* micro-panel width = 31 */ { @@ -293,5 +293,5 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_cxk_3m ) +INSERT_GENTFUNCCO_BASIC0( packm_cxk_3mi ) diff --git a/frame/1m/packm/bli_packm_cxk_3m.h b/frame/1m/packm/bli_packm_cxk_3mi.h similarity index 96% rename from frame/1m/packm/bli_packm_cxk_3m.h rename to frame/1m/packm/bli_packm_cxk_3mi.h index 30475aefa..4934000e1 100644 --- a/frame/1m/packm/bli_packm_cxk_3m.h +++ b/frame/1m/packm/bli_packm_cxk_3mi.h @@ -32,7 +32,7 @@ */ -#include "bli_packm_ref_cxk_3m.h" +#include "bli_packm_ref_cxk_3mi.h" #undef GENTPROTCO @@ -47,5 +47,5 @@ void PASTEMAC(ch,varname)( \ void* p, inc_t is_p, inc_t ldp \ ); -INSERT_GENTPROTCO_BASIC( packm_cxk_3m ) +INSERT_GENTPROTCO_BASIC( packm_cxk_3mi ) diff --git a/frame/1m/packm/bli_packm_cxk_4m.c b/frame/1m/packm/bli_packm_cxk_4mi.c similarity index 91% rename from frame/1m/packm/bli_packm_cxk_4m.c rename to frame/1m/packm/bli_packm_cxk_4mi.c index 9bc05817b..45b0f6a22 100644 --- a/frame/1m/packm/bli_packm_cxk_4m.c +++ b/frame/1m/packm/bli_packm_cxk_4mi.c @@ -59,8 +59,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 2 */ { - NULL, BLIS_CPACKM_2XK_4M_KERNEL, - NULL, BLIS_ZPACKM_2XK_4M_KERNEL, + NULL, BLIS_CPACKM_2XK_4MI_KERNEL, + NULL, BLIS_ZPACKM_2XK_4MI_KERNEL, }, /* micro-panel width = 3 */ { @@ -68,8 +68,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 4 */ { - NULL, BLIS_CPACKM_4XK_4M_KERNEL, - NULL, BLIS_ZPACKM_4XK_4M_KERNEL, + NULL, BLIS_CPACKM_4XK_4MI_KERNEL, + NULL, BLIS_ZPACKM_4XK_4MI_KERNEL, }, /* micro-panel width = 5 */ { @@ -77,8 +77,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 6 */ { - NULL, BLIS_CPACKM_6XK_4M_KERNEL, - NULL, BLIS_ZPACKM_6XK_4M_KERNEL, + NULL, BLIS_CPACKM_6XK_4MI_KERNEL, + NULL, BLIS_ZPACKM_6XK_4MI_KERNEL, }, /* micro-panel width = 7 */ { @@ -86,8 +86,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 8 */ { - NULL, BLIS_CPACKM_8XK_4M_KERNEL, - NULL, BLIS_ZPACKM_8XK_4M_KERNEL, + NULL, BLIS_CPACKM_8XK_4MI_KERNEL, + NULL, BLIS_ZPACKM_8XK_4MI_KERNEL, }, /* micro-panel width = 9 */ { @@ -95,8 +95,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 10 */ { - NULL, BLIS_CPACKM_10XK_4M_KERNEL, - NULL, BLIS_ZPACKM_10XK_4M_KERNEL, + NULL, BLIS_CPACKM_10XK_4MI_KERNEL, + NULL, BLIS_ZPACKM_10XK_4MI_KERNEL, }, /* micro-panel width = 11 */ { @@ -104,8 +104,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 12 */ { - NULL, BLIS_CPACKM_12XK_4M_KERNEL, - NULL, BLIS_ZPACKM_12XK_4M_KERNEL, + NULL, BLIS_CPACKM_12XK_4MI_KERNEL, + NULL, BLIS_ZPACKM_12XK_4MI_KERNEL, }, /* micro-panel width = 13 */ { @@ -113,8 +113,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 14 */ { - NULL, BLIS_CPACKM_14XK_4M_KERNEL, - NULL, BLIS_ZPACKM_14XK_4M_KERNEL, + NULL, BLIS_CPACKM_14XK_4MI_KERNEL, + NULL, BLIS_ZPACKM_14XK_4MI_KERNEL, }, /* micro-panel width = 15 */ { @@ -122,8 +122,8 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = }, /* micro-panel width = 16 */ { - NULL, BLIS_CPACKM_16XK_4M_KERNEL, - NULL, BLIS_ZPACKM_16XK_4M_KERNEL, + NULL, BLIS_CPACKM_16XK_4MI_KERNEL, + NULL, BLIS_ZPACKM_16XK_4MI_KERNEL, }, /* micro-panel width = 17 */ { @@ -180,9 +180,9 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = /* micro-panel width = 30 */ { NULL, - BLIS_CPACKM_30XK_4M_KERNEL, + BLIS_CPACKM_30XK_4MI_KERNEL, NULL, - BLIS_ZPACKM_30XK_4M_KERNEL, + BLIS_ZPACKM_30XK_4MI_KERNEL, }, /* micro-panel width = 31 */ { @@ -289,5 +289,5 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_cxk_4m ) +INSERT_GENTFUNCCO_BASIC0( packm_cxk_4mi ) diff --git a/frame/1m/packm/bli_packm_cxk_4m.h b/frame/1m/packm/bli_packm_cxk_4mi.h similarity index 96% rename from frame/1m/packm/bli_packm_cxk_4m.h rename to frame/1m/packm/bli_packm_cxk_4mi.h index 41bfc49e7..388829ae8 100644 --- a/frame/1m/packm/bli_packm_cxk_4m.h +++ b/frame/1m/packm/bli_packm_cxk_4mi.h @@ -32,7 +32,7 @@ */ -#include "bli_packm_ref_cxk_4m.h" +#include "bli_packm_ref_cxk_4mi.h" #undef GENTPROTCO @@ -47,5 +47,5 @@ void PASTEMAC(ch,varname)( \ void* p, inc_t is_p, inc_t ldp \ ); -INSERT_GENTPROTCO_BASIC( packm_cxk_4m ) +INSERT_GENTPROTCO_BASIC( packm_cxk_4mi ) diff --git a/frame/1m/packm/bli_packm_init.c b/frame/1m/packm/bli_packm_init.c index 10f6e1bdf..b9bf2af8b 100644 --- a/frame/1m/packm/bli_packm_init.c +++ b/frame/1m/packm/bli_packm_init.c @@ -358,10 +358,10 @@ void bli_packm_init_pack( invdiag_t invert_diag, // Here, we adjust the panel stride, if necessary. Remember: ps_p is // always interpreted as being in units of the datatype of the object // which is not necessarily how the micro-panels will be stored. For - // 3m, we will increase ps_p by 50%, and for ro/io/rpi, we halve ps_p - // Why? Because the macro-kernel indexes in units of the complex - // datatype. So these changes "trick" it into indexing the correct - // amount. + // interleaved 3m, we will increase ps_p by 50%, and for ro/io/rpi, + // we halve ps_p. Why? Because the macro-kernel indexes in units of + // the complex datatype. So these changes "trick" it into indexing + // the correct amount. if ( bli_is_3mi_packed( pack_schema ) ) { ps_p = ( ps_p * 3 ) / 2; @@ -456,10 +456,10 @@ void bli_packm_init_pack( invdiag_t invert_diag, // Here, we adjust the panel stride, if necessary. Remember: ps_p is // always interpreted as being in units of the datatype of the object // which is not necessarily how the micro-panels will be stored. For - // 3m, we will increase ps_p by 50%, and for ro/io/rpi, we halve ps_p - // Why? Because the macro-kernel indexes in units of the complex - // datatype. So these changes "trick" it into indexing the correct - // amount. + // interleaved 3m, we will increase ps_p by 50%, and for ro/io/rpi, + // we halve ps_p. Why? Because the macro-kernel indexes in units of + // the complex datatype. So these changes "trick" it into indexing + // the correct amount. if ( bli_is_3mi_packed( pack_schema ) ) { ps_p = ( ps_p * 3 ) / 2; diff --git a/frame/1m/packm/bli_packm_struc_cxk_3m.c b/frame/1m/packm/bli_packm_struc_cxk_3mi.c similarity index 95% rename from frame/1m/packm/bli_packm_struc_cxk_3m.c rename to frame/1m/packm/bli_packm_struc_cxk_3mi.c index a3c32116b..034d7935d 100644 --- a/frame/1m/packm/bli_packm_struc_cxk_3m.c +++ b/frame/1m/packm/bli_packm_struc_cxk_3mi.c @@ -106,11 +106,34 @@ void PASTEMAC(ch,varname)( \ { \ /* Call a helper function for micro-panels of Hermitian/symmetric matrices. */ \ - PASTEMAC(ch,packm_herm_cxk_3m)( strucc, \ + PASTEMAC(ch,packm_herm_cxk_3mi)( strucc, \ + diagoffc, \ + uploc, \ + conjc, \ + schema, \ + m_panel, \ + n_panel, \ + m_panel_max, \ + n_panel_max, \ + panel_dim, \ + panel_len, \ + kappa, \ + c, rs_c, cs_c, \ + incc, ldc, \ + p, rs_p, cs_p, \ + is_p, ldp ); \ + } \ + else /* ( bli_is_triangular( strucc ) ) */ \ + { \ + /* Call a helper function for micro-panels of triangular + matrices. */ \ + PASTEMAC(ch,packm_tri_cxk_3mi)( strucc, \ diagoffc, \ + diagc, \ uploc, \ conjc, \ schema, \ + invdiag, \ m_panel, \ n_panel, \ m_panel_max, \ @@ -123,29 +146,6 @@ void PASTEMAC(ch,varname)( \ p, rs_p, cs_p, \ is_p, ldp ); \ } \ - else /* ( bli_is_triangular( strucc ) ) */ \ - { \ - /* Call a helper function for micro-panels of triangular - matrices. */ \ - PASTEMAC(ch,packm_tri_cxk_3m)( strucc, \ - diagoffc, \ - diagc, \ - uploc, \ - conjc, \ - schema, \ - invdiag, \ - m_panel, \ - n_panel, \ - m_panel_max, \ - n_panel_max, \ - panel_dim, \ - panel_len, \ - kappa, \ - c, rs_c, cs_c, \ - incc, ldc, \ - p, rs_p, cs_p, \ - is_p, ldp ); \ - } \ \ \ /* The packed memory region was acquired/allocated with "aligned" @@ -258,7 +258,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC( packm_struc_cxk_3m, packm_cxk_3m ) +INSERT_GENTFUNCCO_BASIC( packm_struc_cxk_3mi, packm_cxk_3mi ) @@ -527,7 +527,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC( packm_herm_cxk_3m, packm_cxk_3m ) +INSERT_GENTFUNCCO_BASIC( packm_herm_cxk_3mi, packm_cxk_3mi ) @@ -677,5 +677,5 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC( packm_tri_cxk_3m, packm_cxk_3m ) +INSERT_GENTFUNCCO_BASIC( packm_tri_cxk_3mi, packm_cxk_3mi ) diff --git a/frame/1m/packm/bli_packm_struc_cxk_3m.h b/frame/1m/packm/bli_packm_struc_cxk_3mi.h similarity index 97% rename from frame/1m/packm/bli_packm_struc_cxk_3m.h rename to frame/1m/packm/bli_packm_struc_cxk_3mi.h index c5168864c..59c8c2609 100644 --- a/frame/1m/packm/bli_packm_struc_cxk_3m.h +++ b/frame/1m/packm/bli_packm_struc_cxk_3mi.h @@ -52,7 +52,7 @@ void PASTEMAC(ch,varname)( \ ctype* restrict p, inc_t rs_p, inc_t cs_p \ ); -INSERT_GENTPROTCO_BASIC( packm_struc_cxk_3m ) +INSERT_GENTPROTCO_BASIC( packm_struc_cxk_3mi ) @@ -78,7 +78,7 @@ void PASTEMAC(ch,varname)( \ inc_t is_p, inc_t ldp \ ); -INSERT_GENTPROTCO_BASIC( packm_herm_cxk_3m ) +INSERT_GENTPROTCO_BASIC( packm_herm_cxk_3mi ) @@ -106,5 +106,5 @@ void PASTEMAC(ch,varname)( \ inc_t is_p, inc_t ldp \ ); -INSERT_GENTPROTCO_BASIC( packm_tri_cxk_3m ) +INSERT_GENTPROTCO_BASIC( packm_tri_cxk_3mi ) diff --git a/frame/1m/packm/bli_packm_struc_cxk_4m.c b/frame/1m/packm/bli_packm_struc_cxk_4mi.c similarity index 95% rename from frame/1m/packm/bli_packm_struc_cxk_4m.c rename to frame/1m/packm/bli_packm_struc_cxk_4mi.c index c3c3a811c..2dbf6be82 100644 --- a/frame/1m/packm/bli_packm_struc_cxk_4m.c +++ b/frame/1m/packm/bli_packm_struc_cxk_4mi.c @@ -106,11 +106,34 @@ void PASTEMAC(ch,varname)( \ { \ /* Call a helper function for micro-panels of Hermitian/symmetric matrices. */ \ - PASTEMAC(ch,packm_herm_cxk_4m)( strucc, \ + PASTEMAC(ch,packm_herm_cxk_4mi)( strucc, \ + diagoffc, \ + uploc, \ + conjc, \ + schema, \ + m_panel, \ + n_panel, \ + m_panel_max, \ + n_panel_max, \ + panel_dim, \ + panel_len, \ + kappa, \ + c, rs_c, cs_c, \ + incc, ldc, \ + p, rs_p, cs_p, \ + is_p, ldp ); \ + } \ + else /* ( bli_is_triangular( strucc ) ) */ \ + { \ + /* Call a helper function for micro-panels of triangular + matrices. */ \ + PASTEMAC(ch,packm_tri_cxk_4mi)( strucc, \ diagoffc, \ + diagc, \ uploc, \ conjc, \ schema, \ + invdiag, \ m_panel, \ n_panel, \ m_panel_max, \ @@ -123,29 +146,6 @@ void PASTEMAC(ch,varname)( \ p, rs_p, cs_p, \ is_p, ldp ); \ } \ - else /* ( bli_is_triangular( strucc ) ) */ \ - { \ - /* Call a helper function for micro-panels of triangular - matrices. */ \ - PASTEMAC(ch,packm_tri_cxk_4m)( strucc, \ - diagoffc, \ - diagc, \ - uploc, \ - conjc, \ - schema, \ - invdiag, \ - m_panel, \ - n_panel, \ - m_panel_max, \ - n_panel_max, \ - panel_dim, \ - panel_len, \ - kappa, \ - c, rs_c, cs_c, \ - incc, ldc, \ - p, rs_p, cs_p, \ - is_p, ldp ); \ - } \ \ \ /* The packed memory region was acquired/allocated with "aligned" @@ -242,7 +242,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC( packm_struc_cxk_4m, packm_cxk_4m ) +INSERT_GENTFUNCCO_BASIC( packm_struc_cxk_4mi, packm_cxk_4mi ) @@ -499,7 +499,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC( packm_herm_cxk_4m, packm_cxk_4m ) +INSERT_GENTFUNCCO_BASIC( packm_herm_cxk_4mi, packm_cxk_4mi ) @@ -615,5 +615,5 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC( packm_tri_cxk_4m, packm_cxk_4m ) +INSERT_GENTFUNCCO_BASIC( packm_tri_cxk_4mi, packm_cxk_4mi ) diff --git a/frame/1m/packm/bli_packm_struc_cxk_4m.h b/frame/1m/packm/bli_packm_struc_cxk_4mi.h similarity index 97% rename from frame/1m/packm/bli_packm_struc_cxk_4m.h rename to frame/1m/packm/bli_packm_struc_cxk_4mi.h index 320d0ddcc..8c85250c9 100644 --- a/frame/1m/packm/bli_packm_struc_cxk_4m.h +++ b/frame/1m/packm/bli_packm_struc_cxk_4mi.h @@ -52,7 +52,7 @@ void PASTEMAC(ch,varname)( \ ctype* restrict p, inc_t rs_p, inc_t cs_p \ ); -INSERT_GENTPROTCO_BASIC( packm_struc_cxk_4m ) +INSERT_GENTPROTCO_BASIC( packm_struc_cxk_4mi ) @@ -78,7 +78,7 @@ void PASTEMAC(ch,varname)( \ inc_t is_p, inc_t ldp \ ); -INSERT_GENTPROTCO_BASIC( packm_herm_cxk_4m ) +INSERT_GENTPROTCO_BASIC( packm_herm_cxk_4mi ) @@ -106,5 +106,5 @@ void PASTEMAC(ch,varname)( \ inc_t is_p, inc_t ldp \ ); -INSERT_GENTPROTCO_BASIC( packm_tri_cxk_4m ) +INSERT_GENTPROTCO_BASIC( packm_tri_cxk_4mi ) diff --git a/frame/1m/packm/ukernels/bli_packm_ref_cxk_3m.c b/frame/1m/packm/ukernels/bli_packm_ref_cxk_3mi.c similarity index 99% rename from frame/1m/packm/ukernels/bli_packm_ref_cxk_3m.c rename to frame/1m/packm/ukernels/bli_packm_ref_cxk_3mi.c index 9586ca753..fa6bca5e9 100644 --- a/frame/1m/packm/ukernels/bli_packm_ref_cxk_3m.c +++ b/frame/1m/packm/ukernels/bli_packm_ref_cxk_3mi.c @@ -121,7 +121,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_2xk_3m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_2xk_3mi ) @@ -220,7 +220,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_4xk_3m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_4xk_3mi ) @@ -327,7 +327,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_6xk_3m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_6xk_3mi ) @@ -442,7 +442,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_8xk_3m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_8xk_3mi ) @@ -565,7 +565,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_10xk_3m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_10xk_3mi ) @@ -696,7 +696,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_12xk_3m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_12xk_3mi ) @@ -835,7 +835,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_14xk_3m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_14xk_3mi ) @@ -982,7 +982,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_16xk_3m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_16xk_3mi ) @@ -1186,5 +1186,5 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_30xk_3m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_30xk_3mi ) diff --git a/frame/1m/packm/ukernels/bli_packm_ref_cxk_4m.h b/frame/1m/packm/ukernels/bli_packm_ref_cxk_3mi.h similarity index 83% rename from frame/1m/packm/ukernels/bli_packm_ref_cxk_4m.h rename to frame/1m/packm/ukernels/bli_packm_ref_cxk_3mi.h index 66967ce71..a9b5df496 100644 --- a/frame/1m/packm/ukernels/bli_packm_ref_cxk_4m.h +++ b/frame/1m/packm/ukernels/bli_packm_ref_cxk_3mi.h @@ -43,13 +43,13 @@ void PASTEMAC(ch,varname)( \ void* p, inc_t is_p, inc_t ldp \ ); -INSERT_GENTPROT_BASIC( packm_ref_2xk_4m ) -INSERT_GENTPROT_BASIC( packm_ref_4xk_4m ) -INSERT_GENTPROT_BASIC( packm_ref_6xk_4m ) -INSERT_GENTPROT_BASIC( packm_ref_8xk_4m ) -INSERT_GENTPROT_BASIC( packm_ref_10xk_4m ) -INSERT_GENTPROT_BASIC( packm_ref_12xk_4m ) -INSERT_GENTPROT_BASIC( packm_ref_14xk_4m ) -INSERT_GENTPROT_BASIC( packm_ref_16xk_4m ) -INSERT_GENTPROT_BASIC( packm_ref_30xk_4m ) +INSERT_GENTPROT_BASIC( packm_ref_2xk_3mi ) +INSERT_GENTPROT_BASIC( packm_ref_4xk_3mi ) +INSERT_GENTPROT_BASIC( packm_ref_6xk_3mi ) +INSERT_GENTPROT_BASIC( packm_ref_8xk_3mi ) +INSERT_GENTPROT_BASIC( packm_ref_10xk_3mi ) +INSERT_GENTPROT_BASIC( packm_ref_12xk_3mi ) +INSERT_GENTPROT_BASIC( packm_ref_14xk_3mi ) +INSERT_GENTPROT_BASIC( packm_ref_16xk_3mi ) +INSERT_GENTPROT_BASIC( packm_ref_30xk_3mi ) diff --git a/frame/1m/packm/ukernels/bli_packm_ref_cxk_4m.c b/frame/1m/packm/ukernels/bli_packm_ref_cxk_4mi.c similarity index 99% rename from frame/1m/packm/ukernels/bli_packm_ref_cxk_4m.c rename to frame/1m/packm/ukernels/bli_packm_ref_cxk_4mi.c index c77f730ca..b2dce50a3 100644 --- a/frame/1m/packm/ukernels/bli_packm_ref_cxk_4m.c +++ b/frame/1m/packm/ukernels/bli_packm_ref_cxk_4mi.c @@ -116,7 +116,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_2xk_4m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_2xk_4mi ) @@ -210,7 +210,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_4xk_4m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_4xk_4mi ) @@ -312,7 +312,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_6xk_4m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_6xk_4mi ) @@ -422,7 +422,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_8xk_4m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_8xk_4mi ) @@ -540,7 +540,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_10xk_4m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_10xk_4mi ) @@ -666,7 +666,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_12xk_4m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_12xk_4mi ) @@ -800,7 +800,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_14xk_4m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_14xk_4mi ) @@ -942,7 +942,7 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_16xk_4m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_16xk_4mi ) @@ -1140,5 +1140,5 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( packm_ref_30xk_4m ) +INSERT_GENTFUNCCO_BASIC0( packm_ref_30xk_4mi ) diff --git a/frame/1m/packm/ukernels/bli_packm_ref_cxk_3m.h b/frame/1m/packm/ukernels/bli_packm_ref_cxk_4mi.h similarity index 85% rename from frame/1m/packm/ukernels/bli_packm_ref_cxk_3m.h rename to frame/1m/packm/ukernels/bli_packm_ref_cxk_4mi.h index 0a8ec8e12..ec81874a3 100644 --- a/frame/1m/packm/ukernels/bli_packm_ref_cxk_3m.h +++ b/frame/1m/packm/ukernels/bli_packm_ref_cxk_4mi.h @@ -43,13 +43,13 @@ void PASTEMAC(ch,varname)( \ void* p, inc_t is_p, inc_t ldp \ ); -INSERT_GENTPROT_BASIC( packm_ref_2xk_3m ) -INSERT_GENTPROT_BASIC( packm_ref_4xk_3m ) -INSERT_GENTPROT_BASIC( packm_ref_6xk_3m ) -INSERT_GENTPROT_BASIC( packm_ref_8xk_3m ) -INSERT_GENTPROT_BASIC( packm_ref_10xk_3m ) -INSERT_GENTPROT_BASIC( packm_ref_12xk_3m ) -INSERT_GENTPROT_BASIC( packm_ref_14xk_3m ) -INSERT_GENTPROT_BASIC( packm_ref_16xk_3m ) -INSERT_GENTPROT_BASIC( packm_ref_30xk_3m ) +INSERT_GENTPROT_BASIC( packm_ref_2xk_4mi ) +INSERT_GENTPROT_BASIC( packm_ref_4xk_4mi ) +INSERT_GENTPROT_BASIC( packm_ref_6xk_4mi ) +INSERT_GENTPROT_BASIC( packm_ref_8xk_4mi ) +INSERT_GENTPROT_BASIC( packm_ref_10xk_4mi ) +INSERT_GENTPROT_BASIC( packm_ref_12xk_4mi ) +INSERT_GENTPROT_BASIC( packm_ref_14xk_4mi ) +INSERT_GENTPROT_BASIC( packm_ref_16xk_4mi ) +INSERT_GENTPROT_BASIC( packm_ref_30xk_4mi ) diff --git a/frame/3/gemm/3m/bli_gemm3m.c b/frame/3/gemm/3m1/bli_gemm3m1.c similarity index 91% rename from frame/3/gemm/3m/bli_gemm3m.c rename to frame/3/gemm/3m1/bli_gemm3m1.c index b8806305a..1c6f85ff3 100644 --- a/frame/3/gemm/3m/bli_gemm3m.c +++ b/frame/3/gemm/3m1/bli_gemm3m1.c @@ -37,16 +37,16 @@ // // Define object-based interface. // -void bli_gemm3m( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_gemm3m1( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { - // Since 3m only applies to the complex domain, we use the regular + // Since 3m1 only applies to the complex domain, we use the regular // implementation for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_gemm3m_entry( alpha, a, b, beta, c ); + bli_gemm3m1_entry( alpha, a, b, beta, c ); else bli_gemm_entry( alpha, a, b, beta, c ); } @@ -97,5 +97,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNC_BASIC( gemm3m, gemm3m ) +INSERT_GENTFUNC_BASIC( gemm3m1, gemm3m1 ) diff --git a/frame/3/syr2k/3m/bli_syr2k3m.h b/frame/3/gemm/3m1/bli_gemm3m1.h similarity index 92% rename from frame/3/syr2k/3m/bli_syr2k3m.h rename to frame/3/gemm/3m1/bli_gemm3m1.h index 1d50c32e5..1c0664b28 100644 --- a/frame/3/syr2k/3m/bli_syr2k3m.h +++ b/frame/3/gemm/3m1/bli_gemm3m1.h @@ -32,13 +32,16 @@ */ -#include "bli_syr2k3m_entry.h" +#include "bli_gemm3m1_cntl.h" +#include "bli_gemm3m1_entry.h" + +#include "bli_gemm3m1_ukr_ref.h" // // Prototype object-based interface. // -void bli_syr2k3m( obj_t* alpha, +void bli_gemm3m1( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, @@ -52,10 +55,10 @@ void bli_syr2k3m( obj_t* alpha, #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname)( \ - uplo_t uploc, \ trans_t transa, \ trans_t transb, \ dim_t m, \ + dim_t n, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ @@ -64,5 +67,5 @@ void PASTEMAC(ch,opname)( \ ctype* c, inc_t rs_c, inc_t cs_c \ ); -INSERT_GENTPROT_BASIC( syr2k3m ) +INSERT_GENTPROT_BASIC( gemm3m1 ) diff --git a/frame/3/gemm/3m/bli_gemm3m_cntl.c b/frame/3/gemm/3m1/bli_gemm3m1_cntl.c similarity index 73% rename from frame/3/gemm/3m/bli_gemm3m_cntl.c rename to frame/3/gemm/3m1/bli_gemm3m1_cntl.c index 75326dc7f..917e43aff 100644 --- a/frame/3/gemm/3m/bli_gemm3m_cntl.c +++ b/frame/3/gemm/3m1/bli_gemm3m1_cntl.c @@ -36,30 +36,30 @@ extern scalm_t* scalm_cntl; -blksz_t* gemm3m_mc; -blksz_t* gemm3m_nc; -blksz_t* gemm3m_kc; -blksz_t* gemm3m_mr; -blksz_t* gemm3m_nr; -blksz_t* gemm3m_kr; +blksz_t* gemm3m1_mc; +blksz_t* gemm3m1_nc; +blksz_t* gemm3m1_kc; +blksz_t* gemm3m1_mr; +blksz_t* gemm3m1_nr; +blksz_t* gemm3m1_kr; -func_t* gemm3m_ukrs; +func_t* gemm3m1_ukrs; -packm_t* gemm3m_packa_cntl; -packm_t* gemm3m_packb_cntl; +packm_t* gemm3m1_packa_cntl; +packm_t* gemm3m1_packb_cntl; -gemm_t* gemm3m_cntl_bp_ke; -gemm_t* gemm3m_cntl_op_bp; -gemm_t* gemm3m_cntl_mm_op; -gemm_t* gemm3m_cntl_vl_mm; +gemm_t* gemm3m1_cntl_bp_ke; +gemm_t* gemm3m1_cntl_op_bp; +gemm_t* gemm3m1_cntl_mm_op; +gemm_t* gemm3m1_cntl_vl_mm; -gemm_t* gemm3m_cntl; +gemm_t* gemm3m1_cntl; -void bli_gemm3m_cntl_init() +void bli_gemm3m1_cntl_init() { // Create blocksize objects for each dimension. - // NOTE: the complex blocksizes for 3m are generally equal to their + // NOTE: the complex blocksizes for 3m1 are generally equal to their // corresponding real domain counterparts. However, we want to promote // similar cache footprints for the micro-panels of A and B (when // compared to executing in the real domain), and since the complex @@ -68,37 +68,37 @@ void bli_gemm3m_cntl_init() // compensate. Ideally, we would reduce by a factor of 3, but that // could get messy vis-a-vis keeping KC a multiple of the register // blocksizes. - gemm3m_mc + gemm3m1_mc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S, BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D ); - gemm3m_nc + gemm3m1_nc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S, BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D ); - gemm3m_kc + gemm3m1_kc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_KC_S/3, BLIS_MAXIMUM_KC_S/3, BLIS_DEFAULT_KC_D/3, BLIS_MAXIMUM_KC_D/3 ); - gemm3m_mr + gemm3m1_mr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S, BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D ); - gemm3m_nr + gemm3m1_nr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S, BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D ); - gemm3m_kr + gemm3m1_kr = bli_blksz_obj_create( 0, 0, 0, 0, @@ -108,41 +108,41 @@ void bli_gemm3m_cntl_init() // Attach the register blksz_t objects as sub-blocksizes to the cache // blksz_t objects. - bli_blksz_obj_attach_to( gemm3m_mr, gemm3m_mc ); - bli_blksz_obj_attach_to( gemm3m_nr, gemm3m_nc ); - bli_blksz_obj_attach_to( gemm3m_kr, gemm3m_kc ); + bli_blksz_obj_attach_to( gemm3m1_mr, gemm3m1_mc ); + bli_blksz_obj_attach_to( gemm3m1_nr, gemm3m1_nc ); + bli_blksz_obj_attach_to( gemm3m1_kr, gemm3m1_kc ); // Create function pointer object for each datatype-specific gemm // micro-kernel. - gemm3m_ukrs + gemm3m1_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, - BLIS_CGEMM3M_UKERNEL, BLIS_CGEMM3M_UKERNEL_PREFERS_CONTIG_ROWS, - BLIS_ZGEMM3M_UKERNEL, BLIS_ZGEMM3M_UKERNEL_PREFERS_CONTIG_ROWS ); + BLIS_CGEMM3M1_UKERNEL, BLIS_CGEMM3M1_UKERNEL_PREFERS_CONTIG_ROWS, + BLIS_ZGEMM3M1_UKERNEL, BLIS_ZGEMM3M1_UKERNEL_PREFERS_CONTIG_ROWS ); // Create control tree objects for packm operations. - gemm3m_packa_cntl + gemm3m1_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - gemm3m_mr, - gemm3m_kr, + gemm3m1_mr, + gemm3m1_kr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_3MI, BLIS_BUFFER_FOR_A_BLOCK ); - gemm3m_packb_cntl + gemm3m1_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - gemm3m_kr, - gemm3m_nr, + gemm3m1_kr, + gemm3m1_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? @@ -155,83 +155,83 @@ void bli_gemm3m_cntl_init() // // Create control tree object for lowest-level block-panel kernel. - gemm3m_cntl_bp_ke + gemm3m1_cntl_bp_ke = bli_gemm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, - gemm3m_ukrs, + gemm3m1_ukrs, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem. - gemm3m_cntl_op_bp + gemm3m1_cntl_op_bp = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, - gemm3m_mc, + gemm3m1_mc, NULL, NULL, - gemm3m_packa_cntl, - gemm3m_packb_cntl, + gemm3m1_packa_cntl, + gemm3m1_packb_cntl, NULL, - gemm3m_cntl_bp_ke, + gemm3m1_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. - gemm3m_cntl_mm_op + gemm3m1_cntl_mm_op = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, - gemm3m_kc, + gemm3m1_kc, NULL, NULL, NULL, NULL, NULL, - gemm3m_cntl_op_bp, + gemm3m1_cntl_op_bp, NULL ); // Create control tree object for very large problem via multiple // general problems. - gemm3m_cntl_vl_mm + gemm3m1_cntl_vl_mm = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - gemm3m_nc, + gemm3m1_nc, NULL, NULL, NULL, NULL, NULL, - gemm3m_cntl_mm_op, + gemm3m1_cntl_mm_op, NULL ); // Alias the "master" gemm control tree to a shorter name. - gemm3m_cntl = gemm3m_cntl_vl_mm; + gemm3m1_cntl = gemm3m1_cntl_vl_mm; } -void bli_gemm3m_cntl_finalize() +void bli_gemm3m1_cntl_finalize() { - bli_blksz_obj_free( gemm3m_mc ); - bli_blksz_obj_free( gemm3m_nc ); - bli_blksz_obj_free( gemm3m_kc ); - bli_blksz_obj_free( gemm3m_mr ); - bli_blksz_obj_free( gemm3m_nr ); - bli_blksz_obj_free( gemm3m_kr ); + bli_blksz_obj_free( gemm3m1_mc ); + bli_blksz_obj_free( gemm3m1_nc ); + bli_blksz_obj_free( gemm3m1_kc ); + bli_blksz_obj_free( gemm3m1_mr ); + bli_blksz_obj_free( gemm3m1_nr ); + bli_blksz_obj_free( gemm3m1_kr ); - bli_func_obj_free( gemm3m_ukrs ); + bli_func_obj_free( gemm3m1_ukrs ); - bli_cntl_obj_free( gemm3m_packa_cntl ); - bli_cntl_obj_free( gemm3m_packb_cntl ); + bli_cntl_obj_free( gemm3m1_packa_cntl ); + bli_cntl_obj_free( gemm3m1_packb_cntl ); - bli_cntl_obj_free( gemm3m_cntl_bp_ke ); - bli_cntl_obj_free( gemm3m_cntl_op_bp ); - bli_cntl_obj_free( gemm3m_cntl_mm_op ); - bli_cntl_obj_free( gemm3m_cntl_vl_mm ); + bli_cntl_obj_free( gemm3m1_cntl_bp_ke ); + bli_cntl_obj_free( gemm3m1_cntl_op_bp ); + bli_cntl_obj_free( gemm3m1_cntl_mm_op ); + bli_cntl_obj_free( gemm3m1_cntl_vl_mm ); } diff --git a/frame/3/gemm/3m/bli_gemm3m_cntl.h b/frame/3/gemm/3m1/bli_gemm3m1_cntl.h similarity index 95% rename from frame/3/gemm/3m/bli_gemm3m_cntl.h rename to frame/3/gemm/3m1/bli_gemm3m1_cntl.h index eb21d6e67..a938ad694 100644 --- a/frame/3/gemm/3m/bli_gemm3m_cntl.h +++ b/frame/3/gemm/3m1/bli_gemm3m1_cntl.h @@ -32,6 +32,6 @@ */ -void bli_gemm3m_cntl_init( void ); -void bli_gemm3m_cntl_finalize( void ); +void bli_gemm3m1_cntl_init( void ); +void bli_gemm3m1_cntl_finalize( void ); diff --git a/frame/3/her2k/4m/bli_her2k4m_entry.c b/frame/3/gemm/3m1/bli_gemm3m1_entry.c similarity index 92% rename from frame/3/her2k/4m/bli_her2k4m_entry.c rename to frame/3/gemm/3m1/bli_gemm3m1_entry.c index 439c884f1..9f36513c8 100644 --- a/frame/3/her2k/4m/bli_her2k4m_entry.c +++ b/frame/3/gemm/3m1/bli_gemm3m1_entry.c @@ -34,15 +34,15 @@ #include "blis.h" -extern gemm_t* gemm4m_cntl; +extern gemm_t* gemm3m1_cntl; -void bli_her2k4m_entry( obj_t* alpha, +void bli_gemm3m1_entry( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ) { - bli_her2k_front( alpha, a, b, beta, c, - gemm4m_cntl ); + bli_gemm_front( alpha, a, b, beta, c, + gemm3m1_cntl ); } diff --git a/frame/3/her2k/3m/bli_her2k3m_entry.h b/frame/3/gemm/3m1/bli_gemm3m1_entry.h similarity index 97% rename from frame/3/her2k/3m/bli_her2k3m_entry.h rename to frame/3/gemm/3m1/bli_gemm3m1_entry.h index c27e274da..7df1749f0 100644 --- a/frame/3/her2k/3m/bli_her2k3m_entry.h +++ b/frame/3/gemm/3m1/bli_gemm3m1_entry.h @@ -32,7 +32,7 @@ */ -void bli_her2k3m_entry( obj_t* alpha, +void bli_gemm3m1_entry( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, diff --git a/frame/3/gemm/3m/ukernels/bli_gemm3m_ukr_ref.c b/frame/3/gemm/3m1/ukernels/bli_gemm3m1_ukr_ref.c similarity index 99% rename from frame/3/gemm/3m/ukernels/bli_gemm3m_ukr_ref.c rename to frame/3/gemm/3m1/ukernels/bli_gemm3m1_ukr_ref.c index f554d7f5e..373031bc8 100644 --- a/frame/3/gemm/3m/ukernels/bli_gemm3m_ukr_ref.c +++ b/frame/3/gemm/3m1/ukernels/bli_gemm3m1_ukr_ref.c @@ -309,5 +309,5 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC( gemm3m_ukr_ref, GEMM_UKERNEL ) +INSERT_GENTFUNCCO_BASIC( gemm3m1_ukr_ref, GEMM_UKERNEL ) diff --git a/frame/3/gemm/3m/ukernels/bli_gemm3m_ukr_ref.h b/frame/3/gemm/3m1/ukernels/bli_gemm3m1_ukr_ref.h similarity index 98% rename from frame/3/gemm/3m/ukernels/bli_gemm3m_ukr_ref.h rename to frame/3/gemm/3m1/ukernels/bli_gemm3m1_ukr_ref.h index 2beaab14d..7b7ddbc49 100644 --- a/frame/3/gemm/3m/ukernels/bli_gemm3m_ukr_ref.h +++ b/frame/3/gemm/3m1/ukernels/bli_gemm3m1_ukr_ref.h @@ -46,5 +46,5 @@ void PASTEMAC(ch,varname)( \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( gemm3m_ukr_ref ) +INSERT_GENTPROTCO_BASIC( gemm3m1_ukr_ref ) diff --git a/frame/3/gemm/3mh/bli_gemm3mh.c b/frame/3/gemm/3mh/bli_gemm3mh.c index b7450bd1a..41e68d519 100644 --- a/frame/3/gemm/3mh/bli_gemm3mh.c +++ b/frame/3/gemm/3mh/bli_gemm3mh.c @@ -38,10 +38,10 @@ // Define object-based interface. // void bli_gemm3mh( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 3mh only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/gemm/3mh/bli_gemm3mh.h b/frame/3/gemm/3mh/bli_gemm3mh.h index 0d236a39e..b1b40b885 100644 --- a/frame/3/gemm/3mh/bli_gemm3mh.h +++ b/frame/3/gemm/3mh/bli_gemm3mh.h @@ -42,10 +42,10 @@ // Prototype object-based interface. // void bli_gemm3mh( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // diff --git a/frame/3/gemm/3mh/bli_gemm3mh_entry.c b/frame/3/gemm/3mh/bli_gemm3mh_entry.c index 3ae00de3d..2f65aeb8c 100644 --- a/frame/3/gemm/3mh/bli_gemm3mh_entry.c +++ b/frame/3/gemm/3mh/bli_gemm3mh_entry.c @@ -39,10 +39,10 @@ extern gemm_t* gemm3mh_cntl_io; extern gemm_t* gemm3mh_cntl_rpi; void bli_gemm3mh_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { bli_gemm_front( alpha, a, b, beta, c, gemm3mh_cntl_ro ); bli_gemm_front( alpha, a, b, &BLIS_ONE, c, gemm3mh_cntl_io ); diff --git a/frame/3/gemm/3mh/bli_gemm3mh_entry.h b/frame/3/gemm/3mh/bli_gemm3mh_entry.h index 9c200db67..1bfac10df 100644 --- a/frame/3/gemm/3mh/bli_gemm3mh_entry.h +++ b/frame/3/gemm/3mh/bli_gemm3mh_entry.h @@ -33,8 +33,8 @@ */ void bli_gemm3mh_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/gemm/4m/bli_gemm4m.c b/frame/3/gemm/4m1/bli_gemm4m1.c similarity index 93% rename from frame/3/gemm/4m/bli_gemm4m.c rename to frame/3/gemm/4m1/bli_gemm4m1.c index ff10d44da..4c9655719 100644 --- a/frame/3/gemm/4m/bli_gemm4m.c +++ b/frame/3/gemm/4m1/bli_gemm4m1.c @@ -37,16 +37,16 @@ // // Define object-based interface. // -void bli_gemm4m( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_gemm4m1( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 4m only applies to the complex domain, we use the regular // implementation for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_gemm4m_entry( alpha, a, b, beta, c ); + bli_gemm4m1_entry( alpha, a, b, beta, c ); else bli_gemm_entry( alpha, a, b, beta, c ); } @@ -97,5 +97,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNC_BASIC( gemm4m, gemm4m ) +INSERT_GENTFUNC_BASIC( gemm4m1, gemm4m1 ) diff --git a/frame/3/syr2k/4m/bli_syr2k4m.h b/frame/3/gemm/4m1/bli_gemm4m1.h similarity index 92% rename from frame/3/syr2k/4m/bli_syr2k4m.h rename to frame/3/gemm/4m1/bli_gemm4m1.h index 66a6319d1..c8c42a7c2 100644 --- a/frame/3/syr2k/4m/bli_syr2k4m.h +++ b/frame/3/gemm/4m1/bli_gemm4m1.h @@ -32,13 +32,16 @@ */ -#include "bli_syr2k4m_entry.h" +#include "bli_gemm4m1_cntl.h" +#include "bli_gemm4m1_entry.h" + +#include "bli_gemm4m1_ukr_ref.h" // // Prototype object-based interface. // -void bli_syr2k4m( obj_t* alpha, +void bli_gemm4m1( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, @@ -52,10 +55,10 @@ void bli_syr2k4m( obj_t* alpha, #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname)( \ - uplo_t uploc, \ trans_t transa, \ trans_t transb, \ dim_t m, \ + dim_t n, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ @@ -64,5 +67,5 @@ void PASTEMAC(ch,opname)( \ ctype* c, inc_t rs_c, inc_t cs_c \ ); -INSERT_GENTPROT_BASIC( syr2k4m ) +INSERT_GENTPROT_BASIC( gemm4m1 ) diff --git a/frame/3/gemm/4m/bli_gemm4m_cntl.c b/frame/3/gemm/4m1/bli_gemm4m1_cntl.c similarity index 73% rename from frame/3/gemm/4m/bli_gemm4m_cntl.c rename to frame/3/gemm/4m1/bli_gemm4m1_cntl.c index 2b86ff693..7965f7512 100644 --- a/frame/3/gemm/4m/bli_gemm4m_cntl.c +++ b/frame/3/gemm/4m1/bli_gemm4m1_cntl.c @@ -36,66 +36,66 @@ extern scalm_t* scalm_cntl; -blksz_t* gemm4m_mc; -blksz_t* gemm4m_nc; -blksz_t* gemm4m_kc; -blksz_t* gemm4m_mr; -blksz_t* gemm4m_nr; -blksz_t* gemm4m_kr; +blksz_t* gemm4m1_mc; +blksz_t* gemm4m1_nc; +blksz_t* gemm4m1_kc; +blksz_t* gemm4m1_mr; +blksz_t* gemm4m1_nr; +blksz_t* gemm4m1_kr; -func_t* gemm4m_ukrs; +func_t* gemm4m1_ukrs; -packm_t* gemm4m_packa_cntl; -packm_t* gemm4m_packb_cntl; +packm_t* gemm4m1_packa_cntl; +packm_t* gemm4m1_packb_cntl; -gemm_t* gemm4m_cntl_bp_ke; -gemm_t* gemm4m_cntl_op_bp; -gemm_t* gemm4m_cntl_mm_op; -gemm_t* gemm4m_cntl_vl_mm; +gemm_t* gemm4m1_cntl_bp_ke; +gemm_t* gemm4m1_cntl_op_bp; +gemm_t* gemm4m1_cntl_mm_op; +gemm_t* gemm4m1_cntl_vl_mm; -gemm_t* gemm4m_cntl; +gemm_t* gemm4m1_cntl; -void bli_gemm4m_cntl_init() +void bli_gemm4m1_cntl_init() { // Create blocksize objects for each dimension. - // NOTE: the complex blocksizes for 4m are generally equal to their + // NOTE: the complex blocksizes for 4m1 are generally equal to their // corresponding real domain counterparts. However, we want to promote // similar cache footprints for the micro-panels of A and B (when // compared to executing in the real domain), and since the complex // micro-panels are twice as "fat" (due to storing real and imaginary // parts), we reduce KC by a factor of 2 to compensate. - gemm4m_mc + gemm4m1_mc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_MC_S, BLIS_MAXIMUM_MC_S, BLIS_DEFAULT_MC_D, BLIS_MAXIMUM_MC_D ); - gemm4m_nc + gemm4m1_nc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_NC_S, BLIS_MAXIMUM_NC_S, BLIS_DEFAULT_NC_D, BLIS_MAXIMUM_NC_D ); - gemm4m_kc + gemm4m1_kc = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_KC_S/2, BLIS_MAXIMUM_KC_S/2, BLIS_DEFAULT_KC_D/2, BLIS_MAXIMUM_KC_D/2 ); - gemm4m_mr + gemm4m1_mr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_MR_S, BLIS_PACKDIM_MR_S, BLIS_DEFAULT_MR_D, BLIS_PACKDIM_MR_D ); - gemm4m_nr + gemm4m1_nr = bli_blksz_obj_create( 0, 0, 0, 0, BLIS_DEFAULT_NR_S, BLIS_PACKDIM_NR_S, BLIS_DEFAULT_NR_D, BLIS_PACKDIM_NR_D ); - gemm4m_kr + gemm4m1_kr = bli_blksz_obj_create( 0, 0, 0, 0, @@ -105,41 +105,41 @@ void bli_gemm4m_cntl_init() // Attach the register blksz_t objects as sub-blocksizes to the cache // blksz_t objects. - bli_blksz_obj_attach_to( gemm4m_mr, gemm4m_mc ); - bli_blksz_obj_attach_to( gemm4m_nr, gemm4m_nc ); - bli_blksz_obj_attach_to( gemm4m_kr, gemm4m_kc ); + bli_blksz_obj_attach_to( gemm4m1_mr, gemm4m1_mc ); + bli_blksz_obj_attach_to( gemm4m1_nr, gemm4m1_nc ); + bli_blksz_obj_attach_to( gemm4m1_kr, gemm4m1_kc ); // Create function pointer object for each datatype-specific gemm // micro-kernel. - gemm4m_ukrs + gemm4m1_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, - BLIS_CGEMM4M_UKERNEL, BLIS_CGEMM4M_UKERNEL_PREFERS_CONTIG_ROWS, - BLIS_ZGEMM4M_UKERNEL, BLIS_ZGEMM4M_UKERNEL_PREFERS_CONTIG_ROWS ); + BLIS_CGEMM4M1_UKERNEL, BLIS_CGEMM4M1_UKERNEL_PREFERS_CONTIG_ROWS, + BLIS_ZGEMM4M1_UKERNEL, BLIS_ZGEMM4M1_UKERNEL_PREFERS_CONTIG_ROWS ); // Create control tree objects for packm operations. - gemm4m_packa_cntl + gemm4m1_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - gemm4m_mr, - gemm4m_kr, + gemm4m1_mr, + gemm4m1_kr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_4MI, BLIS_BUFFER_FOR_A_BLOCK ); - gemm4m_packb_cntl + gemm4m1_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - gemm4m_kr, - gemm4m_nr, + gemm4m1_kr, + gemm4m1_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? @@ -152,82 +152,82 @@ void bli_gemm4m_cntl_init() // // Create control tree object for lowest-level block-panel kernel. - gemm4m_cntl_bp_ke + gemm4m1_cntl_bp_ke = bli_gemm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, - gemm4m_ukrs, + gemm4m1_ukrs, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem. - gemm4m_cntl_op_bp + gemm4m1_cntl_op_bp = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, - gemm4m_mc, + gemm4m1_mc, NULL, NULL, - gemm4m_packa_cntl, - gemm4m_packb_cntl, + gemm4m1_packa_cntl, + gemm4m1_packb_cntl, NULL, - gemm4m_cntl_bp_ke, + gemm4m1_cntl_bp_ke, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates. - gemm4m_cntl_mm_op + gemm4m1_cntl_mm_op = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, - gemm4m_kc, + gemm4m1_kc, NULL, NULL, NULL, NULL, NULL, - gemm4m_cntl_op_bp, + gemm4m1_cntl_op_bp, NULL ); // Create control tree object for very large problem via multiple // general problems. - gemm4m_cntl_vl_mm + gemm4m1_cntl_vl_mm = bli_gemm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - gemm4m_nc, + gemm4m1_nc, NULL, NULL, NULL, NULL, NULL, - gemm4m_cntl_mm_op, + gemm4m1_cntl_mm_op, NULL ); // Alias the "master" gemm control tree to a shorter name. - gemm4m_cntl = gemm4m_cntl_vl_mm; + gemm4m1_cntl = gemm4m1_cntl_vl_mm; } -void bli_gemm4m_cntl_finalize() +void bli_gemm4m1_cntl_finalize() { - bli_blksz_obj_free( gemm4m_mc ); - bli_blksz_obj_free( gemm4m_nc ); - bli_blksz_obj_free( gemm4m_kc ); - bli_blksz_obj_free( gemm4m_mr ); - bli_blksz_obj_free( gemm4m_nr ); - bli_blksz_obj_free( gemm4m_kr ); + bli_blksz_obj_free( gemm4m1_mc ); + bli_blksz_obj_free( gemm4m1_nc ); + bli_blksz_obj_free( gemm4m1_kc ); + bli_blksz_obj_free( gemm4m1_mr ); + bli_blksz_obj_free( gemm4m1_nr ); + bli_blksz_obj_free( gemm4m1_kr ); - bli_func_obj_free( gemm4m_ukrs ); + bli_func_obj_free( gemm4m1_ukrs ); - bli_cntl_obj_free( gemm4m_packa_cntl ); - bli_cntl_obj_free( gemm4m_packb_cntl ); + bli_cntl_obj_free( gemm4m1_packa_cntl ); + bli_cntl_obj_free( gemm4m1_packb_cntl ); - bli_cntl_obj_free( gemm4m_cntl_bp_ke ); - bli_cntl_obj_free( gemm4m_cntl_op_bp ); - bli_cntl_obj_free( gemm4m_cntl_mm_op ); - bli_cntl_obj_free( gemm4m_cntl_vl_mm ); + bli_cntl_obj_free( gemm4m1_cntl_bp_ke ); + bli_cntl_obj_free( gemm4m1_cntl_op_bp ); + bli_cntl_obj_free( gemm4m1_cntl_mm_op ); + bli_cntl_obj_free( gemm4m1_cntl_vl_mm ); } diff --git a/frame/3/gemm/4m/bli_gemm4m_cntl.h b/frame/3/gemm/4m1/bli_gemm4m1_cntl.h similarity index 95% rename from frame/3/gemm/4m/bli_gemm4m_cntl.h rename to frame/3/gemm/4m1/bli_gemm4m1_cntl.h index 2740bb80a..5385a8d81 100644 --- a/frame/3/gemm/4m/bli_gemm4m_cntl.h +++ b/frame/3/gemm/4m1/bli_gemm4m1_cntl.h @@ -32,6 +32,6 @@ */ -void bli_gemm4m_cntl_init( void ); -void bli_gemm4m_cntl_finalize( void ); +void bli_gemm4m1_cntl_init( void ); +void bli_gemm4m1_cntl_finalize( void ); diff --git a/frame/3/her2k/3m/bli_her2k3m_entry.c b/frame/3/gemm/4m1/bli_gemm4m1_entry.c similarity index 92% rename from frame/3/her2k/3m/bli_her2k3m_entry.c rename to frame/3/gemm/4m1/bli_gemm4m1_entry.c index 764df11f0..ebf0a44e2 100644 --- a/frame/3/her2k/3m/bli_her2k3m_entry.c +++ b/frame/3/gemm/4m1/bli_gemm4m1_entry.c @@ -34,15 +34,15 @@ #include "blis.h" -extern gemm_t* gemm3m_cntl; +extern gemm_t* gemm4m1_cntl; -void bli_her2k3m_entry( obj_t* alpha, +void bli_gemm4m1_entry( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ) { - bli_her2k_front( alpha, a, b, beta, c, - gemm3m_cntl ); + bli_gemm_front( alpha, a, b, beta, c, + gemm4m1_cntl ); } diff --git a/frame/3/her2k/4m/bli_her2k4m_entry.h b/frame/3/gemm/4m1/bli_gemm4m1_entry.h similarity index 97% rename from frame/3/her2k/4m/bli_her2k4m_entry.h rename to frame/3/gemm/4m1/bli_gemm4m1_entry.h index 0c711e4f9..b74045ab1 100644 --- a/frame/3/her2k/4m/bli_her2k4m_entry.h +++ b/frame/3/gemm/4m1/bli_gemm4m1_entry.h @@ -32,7 +32,7 @@ */ -void bli_her2k4m_entry( obj_t* alpha, +void bli_gemm4m1_entry( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, diff --git a/frame/3/gemm/4m/ukernels/bli_gemm4m_ukr_ref.c b/frame/3/gemm/4m1/ukernels/bli_gemm4m1_ukr_ref.c similarity index 96% rename from frame/3/gemm/4m/ukernels/bli_gemm4m_ukr_ref.c rename to frame/3/gemm/4m1/ukernels/bli_gemm4m1_ukr_ref.c index 2359bbc09..b90ff4f79 100644 --- a/frame/3/gemm/4m/ukernels/bli_gemm4m_ukr_ref.c +++ b/frame/3/gemm/4m1/ukernels/bli_gemm4m1_ukr_ref.c @@ -92,13 +92,13 @@ void PASTEMAC(ch,varname)( \ \ \ /* -PASTEMAC(chr,fprintm)( stdout, "gemm4m_ukr: ap_r", m, k, \ +PASTEMAC(chr,fprintm)( stdout, "gemm4m1_ukr: ap_r", m, k, \ a_r, 1, PASTEMAC(chr,packmr), "%4.1f", "" ); \ -PASTEMAC(chr,fprintm)( stdout, "gemm4m_ukr: ap_i", m, k, \ +PASTEMAC(chr,fprintm)( stdout, "gemm4m1_ukr: ap_i", m, k, \ a_i, 1, PASTEMAC(chr,packmr), "%4.1f", "" ); \ -PASTEMAC(chr,fprintm)( stdout, "gemm4m_ukr: bp_r", k, n, \ +PASTEMAC(chr,fprintm)( stdout, "gemm4m1_ukr: bp_r", k, n, \ b_r, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ -PASTEMAC(chr,fprintm)( stdout, "gemm4m_ukr: bp_i", k, n, \ +PASTEMAC(chr,fprintm)( stdout, "gemm4m1_ukr: bp_i", k, n, \ b_i, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ */ \ \ @@ -260,5 +260,5 @@ PASTEMAC(chr,fprintm)( stdout, "gemm4m_ukr: bp_i", k, n, \ } \ } -INSERT_GENTFUNCCO_BASIC( gemm4m_ukr_ref, GEMM_UKERNEL ) +INSERT_GENTFUNCCO_BASIC( gemm4m1_ukr_ref, GEMM_UKERNEL ) diff --git a/frame/3/gemm/4m/ukernels/bli_gemm4m_ukr_ref.h b/frame/3/gemm/4m1/ukernels/bli_gemm4m1_ukr_ref.h similarity index 98% rename from frame/3/gemm/4m/ukernels/bli_gemm4m_ukr_ref.h rename to frame/3/gemm/4m1/ukernels/bli_gemm4m1_ukr_ref.h index cabc89736..28be6f7f9 100644 --- a/frame/3/gemm/4m/ukernels/bli_gemm4m_ukr_ref.h +++ b/frame/3/gemm/4m1/ukernels/bli_gemm4m1_ukr_ref.h @@ -46,5 +46,5 @@ void PASTEMAC(ch,varname)( \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( gemm4m_ukr_ref ) +INSERT_GENTPROTCO_BASIC( gemm4m1_ukr_ref ) diff --git a/frame/3/gemm/4mb/bli_gemm4mb.c b/frame/3/gemm/4mb/bli_gemm4mb.c index 8121e7483..7a8f11560 100644 --- a/frame/3/gemm/4mb/bli_gemm4mb.c +++ b/frame/3/gemm/4mb/bli_gemm4mb.c @@ -38,10 +38,10 @@ // Define object-based interface. // void bli_gemm4mb( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 4m only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/gemm/4mb/bli_gemm4mb.h b/frame/3/gemm/4mb/bli_gemm4mb.h index b44d6b4f3..154bb99d4 100644 --- a/frame/3/gemm/4mb/bli_gemm4mb.h +++ b/frame/3/gemm/4mb/bli_gemm4mb.h @@ -42,10 +42,10 @@ // Prototype object-based interface. // void bli_gemm4mb( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // diff --git a/frame/3/gemm/4mb/bli_gemm4mb_entry.c b/frame/3/gemm/4mb/bli_gemm4mb_entry.c index e860ca2d3..e4df24022 100644 --- a/frame/3/gemm/4mb/bli_gemm4mb_entry.c +++ b/frame/3/gemm/4mb/bli_gemm4mb_entry.c @@ -37,10 +37,10 @@ extern gemm_t* gemm4mb_cntl; void bli_gemm4mb_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { bli_gemm_front( alpha, a, b, beta, c, gemm4mb_cntl ); diff --git a/frame/3/gemm/4mb/bli_gemm4mb_entry.h b/frame/3/gemm/4mb/bli_gemm4mb_entry.h index 84bbeb42b..c4c8fa38e 100644 --- a/frame/3/gemm/4mb/bli_gemm4mb_entry.h +++ b/frame/3/gemm/4mb/bli_gemm4mb_entry.h @@ -33,8 +33,8 @@ */ void bli_gemm4mb_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/gemm/4mh/bli_gemm4mh.c b/frame/3/gemm/4mh/bli_gemm4mh.c index c5d2b3fcb..c71dc3d79 100644 --- a/frame/3/gemm/4mh/bli_gemm4mh.c +++ b/frame/3/gemm/4mh/bli_gemm4mh.c @@ -38,10 +38,10 @@ // Define object-based interface. // void bli_gemm4mh( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 4mh only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/gemm/4mh/bli_gemm4mh.h b/frame/3/gemm/4mh/bli_gemm4mh.h index 54fb0ece0..cabf31fc0 100644 --- a/frame/3/gemm/4mh/bli_gemm4mh.h +++ b/frame/3/gemm/4mh/bli_gemm4mh.h @@ -42,10 +42,10 @@ // Prototype object-based interface. // void bli_gemm4mh( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // diff --git a/frame/3/gemm/4mh/bli_gemm4mh_entry.c b/frame/3/gemm/4mh/bli_gemm4mh_entry.c index e3bf76e13..c57395400 100644 --- a/frame/3/gemm/4mh/bli_gemm4mh_entry.c +++ b/frame/3/gemm/4mh/bli_gemm4mh_entry.c @@ -40,10 +40,10 @@ extern gemm_t* gemm4mh_cntl_ri; extern gemm_t* gemm4mh_cntl_ir; void bli_gemm4mh_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { bli_gemm_front( alpha, a, b, beta, c, gemm4mh_cntl_rr ); bli_gemm_front( alpha, a, b, &BLIS_ONE, c, gemm4mh_cntl_ii ); diff --git a/frame/3/gemm/4mh/bli_gemm4mh_entry.h b/frame/3/gemm/4mh/bli_gemm4mh_entry.h index 904b7c7b7..7fd94fa37 100644 --- a/frame/3/gemm/4mh/bli_gemm4mh_entry.h +++ b/frame/3/gemm/4mh/bli_gemm4mh_entry.h @@ -33,8 +33,8 @@ */ void bli_gemm4mh_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/gemm/bli_gemm.c b/frame/3/gemm/bli_gemm.c index c0665b8ca..71e0e716f 100644 --- a/frame/3/gemm/bli_gemm.c +++ b/frame/3/gemm/bli_gemm.c @@ -46,10 +46,10 @@ void bli_gemm( obj_t* alpha, num_t dt = bli_obj_datatype( *c ); if ( bli_3mh_is_enabled_dt( dt ) ) bli_gemm3mh_entry( alpha, a, b, beta, c ); - else if ( bli_3m_is_enabled_dt( dt ) ) bli_gemm3m_entry( alpha, a, b, beta, c ); + else if ( bli_3m1_is_enabled_dt( dt ) ) bli_gemm3m1_entry( alpha, a, b, beta, c ); else if ( bli_4mh_is_enabled_dt( dt ) ) bli_gemm4mh_entry( alpha, a, b, beta, c ); else if ( bli_4mb_is_enabled_dt( dt ) ) bli_gemm4mb_entry( alpha, a, b, beta, c ); - else if ( bli_4m_is_enabled_dt( dt ) ) bli_gemm4m_entry( alpha, a, b, beta, c ); + else if ( bli_4m1_is_enabled_dt( dt ) ) bli_gemm4m1_entry( alpha, a, b, beta, c ); else bli_gemm_entry( alpha, a, b, beta, c ); } diff --git a/frame/3/gemm/bli_gemm.h b/frame/3/gemm/bli_gemm.h index 8c1c99579..88c6e1619 100644 --- a/frame/3/gemm/bli_gemm.h +++ b/frame/3/gemm/bli_gemm.h @@ -51,11 +51,11 @@ #include "bli_gemm_ukr_ref.h" -#include "bli_gemm4m.h" -#include "bli_gemm3m.h" #include "bli_gemm4mh.h" -#include "bli_gemm3mh.h" #include "bli_gemm4mb.h" +#include "bli_gemm4m1.h" +#include "bli_gemm3mh.h" +#include "bli_gemm3m1.h" // // Prototype object-based interface. diff --git a/frame/3/gemm/bli_gemm_query.c b/frame/3/gemm/bli_gemm_query.c index eaf8f916c..cb4311fe3 100644 --- a/frame/3/gemm/bli_gemm_query.c +++ b/frame/3/gemm/bli_gemm_query.c @@ -35,29 +35,29 @@ #include "blis.h" extern func_t* gemm3mh_ukrs; -extern func_t* gemm3m_ukrs; +extern func_t* gemm3m1_ukrs; extern func_t* gemm4mh_ukrs; extern func_t* gemm4mb_ukrs; -extern func_t* gemm4m_ukrs; +extern func_t* gemm4m1_ukrs; extern func_t* gemm_ukrs; func_t* bli_gemm_query_ukrs( num_t dt ) { if ( bli_3mh_is_enabled_dt( dt ) ) return gemm3mh_ukrs; - else if ( bli_3m_is_enabled_dt( dt ) ) return gemm3m_ukrs; + else if ( bli_3m1_is_enabled_dt( dt ) ) return gemm3m1_ukrs; else if ( bli_4mh_is_enabled_dt( dt ) ) return gemm4mh_ukrs; else if ( bli_4mb_is_enabled_dt( dt ) ) return gemm4mb_ukrs; - else if ( bli_4m_is_enabled_dt( dt ) ) return gemm4m_ukrs; + else if ( bli_4m1_is_enabled_dt( dt ) ) return gemm4m1_ukrs; else return gemm_ukrs; } char* bli_gemm_query_impl_string( num_t dt ) { if ( bli_3mh_is_enabled_dt( dt ) ) return bli_3mh_get_string(); - else if ( bli_3m_is_enabled_dt( dt ) ) return bli_3m_get_string(); + else if ( bli_3m1_is_enabled_dt( dt ) ) return bli_3m1_get_string(); else if ( bli_4mh_is_enabled_dt( dt ) ) return bli_4mh_get_string(); else if ( bli_4mb_is_enabled_dt( dt ) ) return bli_4mb_get_string(); - else if ( bli_4m_is_enabled_dt( dt ) ) return bli_4m_get_string(); + else if ( bli_4m1_is_enabled_dt( dt ) ) return bli_4m1_get_string(); else return bli_native_get_string(); } @@ -74,16 +74,16 @@ kimpl_t bli_gemm_ukernel_impl_type( num_t dt ) else if ( p == BLIS_CGEMM3MH_UKERNEL_REF || p == BLIS_ZGEMM3MH_UKERNEL_REF || - p == BLIS_CGEMM3M_UKERNEL_REF || - p == BLIS_ZGEMM3M_UKERNEL_REF + p == BLIS_CGEMM3M1_UKERNEL_REF || + p == BLIS_ZGEMM3M1_UKERNEL_REF ) return BLIS_VIRTUAL3M_UKERNEL; else if ( p == BLIS_CGEMM4MH_UKERNEL_REF || p == BLIS_ZGEMM4MH_UKERNEL_REF || p == BLIS_CGEMM4MB_UKERNEL_REF || p == BLIS_ZGEMM4MB_UKERNEL_REF || - p == BLIS_CGEMM4M_UKERNEL_REF || - p == BLIS_ZGEMM4M_UKERNEL_REF + p == BLIS_CGEMM4M1_UKERNEL_REF || + p == BLIS_ZGEMM4M1_UKERNEL_REF ) return BLIS_VIRTUAL4M_UKERNEL; else return BLIS_OPTIMIZED_UKERNEL; diff --git a/frame/3/hemm/3m/bli_hemm3m.h b/frame/3/hemm/3m/bli_hemm3m.h deleted file mode 100644 index c9e08cb26..000000000 --- a/frame/3/hemm/3m/bli_hemm3m.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "bli_hemm3m_entry.h" - - -// -// Prototype object-based interface. -// -void bli_hemm3m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); - -// -// Prototype BLAS-like interfaces with homogeneous-typed operands. -// -#undef GENTPROT -#define GENTPROT( ctype, ch, opname ) \ -\ -void PASTEMAC(ch,opname)( \ - side_t side, \ - uplo_t uploa, \ - conj_t conja, \ - trans_t transb, \ - dim_t m, \ - dim_t n, \ - ctype* alpha, \ - ctype* a, inc_t rs_a, inc_t cs_a, \ - ctype* b, inc_t rs_b, inc_t cs_b, \ - ctype* beta, \ - ctype* c, inc_t rs_c, inc_t cs_c \ - ); - -INSERT_GENTPROT_BASIC( hemm3m ) - diff --git a/frame/3/hemm/3m/bli_hemm3m_entry.h b/frame/3/hemm/3m/bli_hemm3m_entry.h deleted file mode 100644 index e00b84d29..000000000 --- a/frame/3/hemm/3m/bli_hemm3m_entry.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_hemm3m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); - diff --git a/frame/3/hemm/3m/bli_hemm3m.c b/frame/3/hemm/3m1/bli_hemm3m1.c similarity index 90% rename from frame/3/hemm/3m/bli_hemm3m.c rename to frame/3/hemm/3m1/bli_hemm3m1.c index 7572799e0..aad8039a5 100644 --- a/frame/3/hemm/3m/bli_hemm3m.c +++ b/frame/3/hemm/3m1/bli_hemm3m1.c @@ -37,17 +37,17 @@ // // Define object-based interface. // -void bli_hemm3m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_hemm3m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { - // Since 3m only applies to the complex domain, we use the regular + // Since 3m1 only applies to the complex domain, we use the regular // implementation for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_hemm3m_entry( side, alpha, a, b, beta, c ); + bli_hemm3m1_entry( side, alpha, a, b, beta, c ); else bli_hemm_entry( side, alpha, a, b, beta, c ); } @@ -103,5 +103,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNC_BASIC( hemm3m, hemm3m ) +INSERT_GENTFUNC_BASIC( hemm3m1, hemm3m1 ) diff --git a/frame/3/trmm3/3m/bli_trmm33m.h b/frame/3/hemm/3m1/bli_hemm3m1.h similarity index 93% rename from frame/3/trmm3/3m/bli_trmm33m.h rename to frame/3/hemm/3m1/bli_hemm3m1.h index 1d88e4d72..414f9924b 100644 --- a/frame/3/trmm3/3m/bli_trmm33m.h +++ b/frame/3/hemm/3m1/bli_hemm3m1.h @@ -32,20 +32,19 @@ */ -#include "bli_trmm33m_entry.h" +#include "bli_hemm3m1_entry.h" // // Prototype object-based interface. // -void bli_trmm33m( side_t side, +void bli_hemm3m1( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); - // // Prototype BLAS-like interfaces with homogeneous-typed operands. // @@ -55,8 +54,7 @@ void bli_trmm33m( side_t side, void PASTEMAC(ch,opname)( \ side_t side, \ uplo_t uploa, \ - trans_t transa, \ - diag_t diaga, \ + conj_t conja, \ trans_t transb, \ dim_t m, \ dim_t n, \ @@ -67,5 +65,5 @@ void PASTEMAC(ch,opname)( \ ctype* c, inc_t rs_c, inc_t cs_c \ ); -INSERT_GENTPROT_BASIC( trmm33m ) +INSERT_GENTPROT_BASIC( hemm3m1 ) diff --git a/frame/3/trmm3/3m/bli_trmm33m_entry.c b/frame/3/hemm/3m1/bli_hemm3m1_entry.c similarity index 92% rename from frame/3/trmm3/3m/bli_trmm33m_entry.c rename to frame/3/hemm/3m1/bli_hemm3m1_entry.c index 2cf4ed12e..b4f3c572b 100644 --- a/frame/3/trmm3/3m/bli_trmm33m_entry.c +++ b/frame/3/hemm/3m1/bli_hemm3m1_entry.c @@ -34,16 +34,16 @@ #include "blis.h" -extern gemm_t* gemm3m_cntl; +extern gemm_t* gemm3m1_cntl; -void bli_trmm33m_entry( side_t side, +void bli_hemm3m1_entry( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ) { - bli_trmm3_front( side, alpha, a, b, beta, c, - gemm3m_cntl ); + bli_hemm_front( side, alpha, a, b, beta, c, + gemm3m1_cntl ); } diff --git a/frame/3/trmm3/3m/bli_trmm33m_entry.h b/frame/3/hemm/3m1/bli_hemm3m1_entry.h similarity index 98% rename from frame/3/trmm3/3m/bli_trmm33m_entry.h rename to frame/3/hemm/3m1/bli_hemm3m1_entry.h index 72224d521..262363b89 100644 --- a/frame/3/trmm3/3m/bli_trmm33m_entry.h +++ b/frame/3/hemm/3m1/bli_hemm3m1_entry.h @@ -32,7 +32,7 @@ */ -void bli_trmm33m_entry( side_t side, +void bli_hemm3m1_entry( side_t side, obj_t* alpha, obj_t* a, obj_t* b, diff --git a/frame/3/hemm/3mh/bli_hemm3mh.c b/frame/3/hemm/3mh/bli_hemm3mh.c index 337ab28d9..fd9613e5d 100644 --- a/frame/3/hemm/3mh/bli_hemm3mh.c +++ b/frame/3/hemm/3mh/bli_hemm3mh.c @@ -38,11 +38,11 @@ // Define object-based interface. // void bli_hemm3mh( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 3mh only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/hemm/3mh/bli_hemm3mh.h b/frame/3/hemm/3mh/bli_hemm3mh.h index 60168cfb0..92ab3e9e5 100644 --- a/frame/3/hemm/3mh/bli_hemm3mh.h +++ b/frame/3/hemm/3mh/bli_hemm3mh.h @@ -39,11 +39,11 @@ // Prototype object-based interface. // void bli_hemm3mh( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // // Prototype BLAS-like interfaces with homogeneous-typed operands. diff --git a/frame/3/hemm/3mh/bli_hemm3mh_entry.c b/frame/3/hemm/3mh/bli_hemm3mh_entry.c index 2444e6b3f..480c8a7be 100644 --- a/frame/3/hemm/3mh/bli_hemm3mh_entry.c +++ b/frame/3/hemm/3mh/bli_hemm3mh_entry.c @@ -39,11 +39,11 @@ extern gemm_t* gemm3mh_cntl_io; extern gemm_t* gemm3mh_cntl_rpi; void bli_hemm3mh_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { bli_hemm_front( side, alpha, a, b, beta, c, gemm3mh_cntl_ro ); bli_hemm_front( side, alpha, a, b, &BLIS_ONE, c, gemm3mh_cntl_io ); diff --git a/frame/3/hemm/3mh/bli_hemm3mh_entry.h b/frame/3/hemm/3mh/bli_hemm3mh_entry.h index 08cb026a1..c01f01a22 100644 --- a/frame/3/hemm/3mh/bli_hemm3mh_entry.h +++ b/frame/3/hemm/3mh/bli_hemm3mh_entry.h @@ -33,9 +33,9 @@ */ void bli_hemm3mh_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/hemm/4m/bli_hemm4m_entry.h b/frame/3/hemm/4m/bli_hemm4m_entry.h deleted file mode 100644 index 0aa26c96c..000000000 --- a/frame/3/hemm/4m/bli_hemm4m_entry.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_hemm4m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); - diff --git a/frame/3/hemm/4m/bli_hemm4m.c b/frame/3/hemm/4m1/bli_hemm4m1.c similarity index 92% rename from frame/3/hemm/4m/bli_hemm4m.c rename to frame/3/hemm/4m1/bli_hemm4m1.c index f2aea859e..8f2f1c6dc 100644 --- a/frame/3/hemm/4m/bli_hemm4m.c +++ b/frame/3/hemm/4m1/bli_hemm4m1.c @@ -37,17 +37,17 @@ // // Define object-based interface. // -void bli_hemm4m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_hemm4m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 4m only applies to the complex domain, we use the regular // implementation for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_hemm4m_entry( side, alpha, a, b, beta, c ); + bli_hemm4m1_entry( side, alpha, a, b, beta, c ); else bli_hemm_entry( side, alpha, a, b, beta, c ); } @@ -103,5 +103,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNC_BASIC( hemm4m, hemm4m ) +INSERT_GENTFUNC_BASIC( hemm4m1, hemm4m1 ) diff --git a/frame/3/trmm3/4m/bli_trmm34m.h b/frame/3/hemm/4m1/bli_hemm4m1.h similarity index 93% rename from frame/3/trmm3/4m/bli_trmm34m.h rename to frame/3/hemm/4m1/bli_hemm4m1.h index 71c0e099b..0e38d1a4f 100644 --- a/frame/3/trmm3/4m/bli_trmm34m.h +++ b/frame/3/hemm/4m1/bli_hemm4m1.h @@ -32,20 +32,19 @@ */ -#include "bli_trmm34m_entry.h" +#include "bli_hemm4m1_entry.h" // // Prototype object-based interface. // -void bli_trmm34m( side_t side, +void bli_hemm4m1( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); - // // Prototype BLAS-like interfaces with homogeneous-typed operands. // @@ -55,8 +54,7 @@ void bli_trmm34m( side_t side, void PASTEMAC(ch,opname)( \ side_t side, \ uplo_t uploa, \ - trans_t transa, \ - diag_t diaga, \ + conj_t conja, \ trans_t transb, \ dim_t m, \ dim_t n, \ @@ -67,5 +65,5 @@ void PASTEMAC(ch,opname)( \ ctype* c, inc_t rs_c, inc_t cs_c \ ); -INSERT_GENTPROT_BASIC( trmm34m ) +INSERT_GENTPROT_BASIC( hemm4m1 ) diff --git a/frame/3/trmm3/4m/bli_trmm34m_entry.c b/frame/3/hemm/4m1/bli_hemm4m1_entry.c similarity index 92% rename from frame/3/trmm3/4m/bli_trmm34m_entry.c rename to frame/3/hemm/4m1/bli_hemm4m1_entry.c index c6a2b8b51..d955321c0 100644 --- a/frame/3/trmm3/4m/bli_trmm34m_entry.c +++ b/frame/3/hemm/4m1/bli_hemm4m1_entry.c @@ -34,16 +34,16 @@ #include "blis.h" -extern gemm_t* gemm4m_cntl; +extern gemm_t* gemm4m1_cntl; -void bli_trmm34m_entry( side_t side, +void bli_hemm4m1_entry( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ) { - bli_trmm3_front( side, alpha, a, b, beta, c, - gemm4m_cntl ); + bli_hemm_front( side, alpha, a, b, beta, c, + gemm4m1_cntl ); } diff --git a/frame/3/trmm3/4m/bli_trmm34m_entry.h b/frame/3/hemm/4m1/bli_hemm4m1_entry.h similarity index 98% rename from frame/3/trmm3/4m/bli_trmm34m_entry.h rename to frame/3/hemm/4m1/bli_hemm4m1_entry.h index 8b273330f..fc5e2d006 100644 --- a/frame/3/trmm3/4m/bli_trmm34m_entry.h +++ b/frame/3/hemm/4m1/bli_hemm4m1_entry.h @@ -32,7 +32,7 @@ */ -void bli_trmm34m_entry( side_t side, +void bli_hemm4m1_entry( side_t side, obj_t* alpha, obj_t* a, obj_t* b, diff --git a/frame/3/hemm/4mh/bli_hemm4mh.c b/frame/3/hemm/4mh/bli_hemm4mh.c index 4aec7b157..f5eb1a5e2 100644 --- a/frame/3/hemm/4mh/bli_hemm4mh.c +++ b/frame/3/hemm/4mh/bli_hemm4mh.c @@ -38,11 +38,11 @@ // Define object-based interface. // void bli_hemm4mh( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 4mh only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/hemm/4mh/bli_hemm4mh.h b/frame/3/hemm/4mh/bli_hemm4mh.h index e055ee347..dc11aa698 100644 --- a/frame/3/hemm/4mh/bli_hemm4mh.h +++ b/frame/3/hemm/4mh/bli_hemm4mh.h @@ -39,11 +39,11 @@ // Prototype object-based interface. // void bli_hemm4mh( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // // Prototype BLAS-like interfaces with homogeneous-typed operands. diff --git a/frame/3/hemm/4mh/bli_hemm4mh_entry.c b/frame/3/hemm/4mh/bli_hemm4mh_entry.c index f9545ca85..e734d2186 100644 --- a/frame/3/hemm/4mh/bli_hemm4mh_entry.c +++ b/frame/3/hemm/4mh/bli_hemm4mh_entry.c @@ -40,11 +40,11 @@ extern gemm_t* gemm4mh_cntl_ir; extern gemm_t* gemm4mh_cntl_ii; void bli_hemm4mh_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { bli_hemm_front( side, alpha, a, b, beta, c, gemm4mh_cntl_rr ); bli_hemm_front( side, alpha, a, b, &BLIS_ONE, c, gemm4mh_cntl_ii ); diff --git a/frame/3/hemm/4mh/bli_hemm4mh_entry.h b/frame/3/hemm/4mh/bli_hemm4mh_entry.h index 0f5ee375f..a5f031b02 100644 --- a/frame/3/hemm/4mh/bli_hemm4mh_entry.h +++ b/frame/3/hemm/4mh/bli_hemm4mh_entry.h @@ -33,9 +33,9 @@ */ void bli_hemm4mh_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/hemm/bli_hemm.c b/frame/3/hemm/bli_hemm.c index c1c62a0b8..7dbc585d8 100644 --- a/frame/3/hemm/bli_hemm.c +++ b/frame/3/hemm/bli_hemm.c @@ -47,9 +47,9 @@ void bli_hemm( side_t side, num_t dt = bli_obj_datatype( *c ); if ( bli_3mh_is_enabled_dt( dt ) ) bli_hemm3mh_entry( side, alpha, a, b, beta, c ); - else if ( bli_3m_is_enabled_dt( dt ) ) bli_hemm3m_entry( side, alpha, a, b, beta, c ); + else if ( bli_3m1_is_enabled_dt( dt ) ) bli_hemm3m1_entry( side, alpha, a, b, beta, c ); else if ( bli_4mh_is_enabled_dt( dt ) ) bli_hemm4mh_entry( side, alpha, a, b, beta, c ); - else if ( bli_4m_is_enabled_dt( dt ) ) bli_hemm4m_entry( side, alpha, a, b, beta, c ); + else if ( bli_4m1_is_enabled_dt( dt ) ) bli_hemm4m1_entry( side, alpha, a, b, beta, c ); else bli_hemm_entry( side, alpha, a, b, beta, c ); } diff --git a/frame/3/hemm/bli_hemm.h b/frame/3/hemm/bli_hemm.h index 3dba760a9..fea541fea 100644 --- a/frame/3/hemm/bli_hemm.h +++ b/frame/3/hemm/bli_hemm.h @@ -36,10 +36,10 @@ #include "bli_hemm_entry.h" #include "bli_hemm_front.h" -#include "bli_hemm4m.h" -#include "bli_hemm3m.h" #include "bli_hemm4mh.h" +#include "bli_hemm4m1.h" #include "bli_hemm3mh.h" +#include "bli_hemm3m1.h" // diff --git a/frame/3/her2k/3m/bli_her2k3m.c b/frame/3/her2k/3m1/bli_her2k3m1.c similarity index 91% rename from frame/3/her2k/3m/bli_her2k3m.c rename to frame/3/her2k/3m1/bli_her2k3m1.c index 75d8167b9..a2dc93cbf 100644 --- a/frame/3/her2k/3m/bli_her2k3m.c +++ b/frame/3/her2k/3m1/bli_her2k3m1.c @@ -37,16 +37,16 @@ // // Define object-based interface. // -void bli_her2k3m( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_her2k3m1( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { - // Since 3m only applies to the complex domain, we use the regular + // Since 3m1 only applies to the complex domain, we use the regular // implementation for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_her2k3m_entry( alpha, a, b, beta, c ); + bli_her2k3m1_entry( alpha, a, b, beta, c ); else bli_her2k_entry( alpha, a, b, beta, c ); } @@ -101,5 +101,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNCR_BASIC( her2k3m, her2k3m ) +INSERT_GENTFUNCR_BASIC( her2k3m1, her2k3m1 ) diff --git a/frame/3/her2k/3m/bli_her2k3m.h b/frame/3/her2k/3m1/bli_her2k3m1.h similarity index 91% rename from frame/3/her2k/3m/bli_her2k3m.h rename to frame/3/her2k/3m1/bli_her2k3m1.h index 8cb4696db..88fbdf2d7 100644 --- a/frame/3/her2k/3m/bli_her2k3m.h +++ b/frame/3/her2k/3m1/bli_her2k3m1.h @@ -32,17 +32,17 @@ */ -#include "bli_her2k3m_entry.h" +#include "bli_her2k3m1_entry.h" // // Prototype object-based interface. // -void bli_her2k3m( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); +void bli_her2k3m1( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // @@ -64,5 +64,5 @@ void PASTEMAC(ch,opname)( \ ctype* c, inc_t rs_c, inc_t cs_c \ ); -INSERT_GENTPROTR_BASIC( her2k3m ) +INSERT_GENTPROTR_BASIC( her2k3m1 ) diff --git a/frame/3/gemm/4m/bli_gemm4m_entry.c b/frame/3/her2k/3m1/bli_her2k3m1_entry.c similarity index 85% rename from frame/3/gemm/4m/bli_gemm4m_entry.c rename to frame/3/her2k/3m1/bli_her2k3m1_entry.c index cfc430c4e..88eb932ea 100644 --- a/frame/3/gemm/4m/bli_gemm4m_entry.c +++ b/frame/3/her2k/3m1/bli_her2k3m1_entry.c @@ -34,15 +34,15 @@ #include "blis.h" -extern gemm_t* gemm4m_cntl; +extern gemm_t* gemm3m1_cntl; -void bli_gemm4m_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_her2k3m1_entry( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { - bli_gemm_front( alpha, a, b, beta, c, - gemm4m_cntl ); + bli_her2k_front( alpha, a, b, beta, c, + gemm3m1_cntl ); } diff --git a/frame/3/gemm/4m/bli_gemm4m_entry.h b/frame/3/her2k/3m1/bli_her2k3m1_entry.h similarity index 89% rename from frame/3/gemm/4m/bli_gemm4m_entry.h rename to frame/3/her2k/3m1/bli_her2k3m1_entry.h index 9ce5c4a4c..7ab5b727b 100644 --- a/frame/3/gemm/4m/bli_gemm4m_entry.h +++ b/frame/3/her2k/3m1/bli_her2k3m1_entry.h @@ -32,9 +32,9 @@ */ -void bli_gemm4m_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); +void bli_her2k3m1_entry( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/her2k/3mh/bli_her2k3mh.c b/frame/3/her2k/3mh/bli_her2k3mh.c index c4a501c8c..7a52ee88d 100644 --- a/frame/3/her2k/3mh/bli_her2k3mh.c +++ b/frame/3/her2k/3mh/bli_her2k3mh.c @@ -38,10 +38,10 @@ // Define object-based interface. // void bli_her2k3mh( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 3mh only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/her2k/3mh/bli_her2k3mh.h b/frame/3/her2k/3mh/bli_her2k3mh.h index 123dc5847..0838fcb1a 100644 --- a/frame/3/her2k/3mh/bli_her2k3mh.h +++ b/frame/3/her2k/3mh/bli_her2k3mh.h @@ -39,10 +39,10 @@ // Prototype object-based interface. // void bli_her2k3mh( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // diff --git a/frame/3/her2k/3mh/bli_her2k3mh_entry.c b/frame/3/her2k/3mh/bli_her2k3mh_entry.c index e95ad844d..a8ccf0cd6 100644 --- a/frame/3/her2k/3mh/bli_her2k3mh_entry.c +++ b/frame/3/her2k/3mh/bli_her2k3mh_entry.c @@ -39,10 +39,10 @@ extern gemm_t* gemm3mh_cntl_io; extern gemm_t* gemm3mh_cntl_rpi; void bli_her2k3mh_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { bli_her2k_front( alpha, a, b, beta, c, gemm3mh_cntl_ro ); bli_her2k_front( alpha, a, b, &BLIS_ONE, c, gemm3mh_cntl_io ); diff --git a/frame/3/her2k/3mh/bli_her2k3mh_entry.h b/frame/3/her2k/3mh/bli_her2k3mh_entry.h index 699705637..b6e33a40c 100644 --- a/frame/3/her2k/3mh/bli_her2k3mh_entry.h +++ b/frame/3/her2k/3mh/bli_her2k3mh_entry.h @@ -33,8 +33,8 @@ */ void bli_her2k3mh_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/her2k/4m/bli_her2k4m.c b/frame/3/her2k/4m1/bli_her2k4m1.c similarity index 93% rename from frame/3/her2k/4m/bli_her2k4m.c rename to frame/3/her2k/4m1/bli_her2k4m1.c index 25eb2ee2d..383597851 100644 --- a/frame/3/her2k/4m/bli_her2k4m.c +++ b/frame/3/her2k/4m1/bli_her2k4m1.c @@ -37,16 +37,16 @@ // // Define object-based interface. // -void bli_her2k4m( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_her2k4m1( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 4m only applies to the complex domain, we use the regular // implementation for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_her2k4m_entry( alpha, a, b, beta, c ); + bli_her2k4m1_entry( alpha, a, b, beta, c ); else bli_her2k_entry( alpha, a, b, beta, c ); } @@ -101,5 +101,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNCR_BASIC( her2k4m, her2k4m ) +INSERT_GENTFUNCR_BASIC( her2k4m1, her2k4m1 ) diff --git a/frame/3/her2k/4m/bli_her2k4m.h b/frame/3/her2k/4m1/bli_her2k4m1.h similarity index 91% rename from frame/3/her2k/4m/bli_her2k4m.h rename to frame/3/her2k/4m1/bli_her2k4m1.h index 424743126..0b453b902 100644 --- a/frame/3/her2k/4m/bli_her2k4m.h +++ b/frame/3/her2k/4m1/bli_her2k4m1.h @@ -32,17 +32,17 @@ */ -#include "bli_her2k4m_entry.h" +#include "bli_her2k4m1_entry.h" // // Prototype object-based interface. // -void bli_her2k4m( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); +void bli_her2k4m1( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // @@ -64,5 +64,5 @@ void PASTEMAC(ch,opname)( \ ctype* c, inc_t rs_c, inc_t cs_c \ ); -INSERT_GENTPROTR_BASIC( her2k4m ) +INSERT_GENTPROTR_BASIC( her2k4m1 ) diff --git a/frame/3/gemm/3m/bli_gemm3m_entry.c b/frame/3/her2k/4m1/bli_her2k4m1_entry.c similarity index 85% rename from frame/3/gemm/3m/bli_gemm3m_entry.c rename to frame/3/her2k/4m1/bli_her2k4m1_entry.c index ef32125f4..7d43fdf51 100644 --- a/frame/3/gemm/3m/bli_gemm3m_entry.c +++ b/frame/3/her2k/4m1/bli_her2k4m1_entry.c @@ -34,15 +34,15 @@ #include "blis.h" -extern gemm_t* gemm3m_cntl; +extern gemm_t* gemm4m1_cntl; -void bli_gemm3m_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_her2k4m1_entry( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { - bli_gemm_front( alpha, a, b, beta, c, - gemm3m_cntl ); + bli_her2k_front( alpha, a, b, beta, c, + gemm4m1_cntl ); } diff --git a/frame/3/gemm/3m/bli_gemm3m_entry.h b/frame/3/her2k/4m1/bli_her2k4m1_entry.h similarity index 89% rename from frame/3/gemm/3m/bli_gemm3m_entry.h rename to frame/3/her2k/4m1/bli_her2k4m1_entry.h index 0af6a73cb..f6d8c47e1 100644 --- a/frame/3/gemm/3m/bli_gemm3m_entry.h +++ b/frame/3/her2k/4m1/bli_her2k4m1_entry.h @@ -32,9 +32,9 @@ */ -void bli_gemm3m_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); +void bli_her2k4m1_entry( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/her2k/4mh/bli_her2k4mh.c b/frame/3/her2k/4mh/bli_her2k4mh.c index 18e4f33e3..8a8917cc3 100644 --- a/frame/3/her2k/4mh/bli_her2k4mh.c +++ b/frame/3/her2k/4mh/bli_her2k4mh.c @@ -38,10 +38,10 @@ // Define object-based interface. // void bli_her2k4mh( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 4mh only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/her2k/4mh/bli_her2k4mh.h b/frame/3/her2k/4mh/bli_her2k4mh.h index fa86a85bd..02cba6c33 100644 --- a/frame/3/her2k/4mh/bli_her2k4mh.h +++ b/frame/3/her2k/4mh/bli_her2k4mh.h @@ -39,10 +39,10 @@ // Prototype object-based interface. // void bli_her2k4mh( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // diff --git a/frame/3/her2k/4mh/bli_her2k4mh_entry.c b/frame/3/her2k/4mh/bli_her2k4mh_entry.c index a122c4c6f..6547b819f 100644 --- a/frame/3/her2k/4mh/bli_her2k4mh_entry.c +++ b/frame/3/her2k/4mh/bli_her2k4mh_entry.c @@ -40,10 +40,10 @@ extern gemm_t* gemm4mh_cntl_ir; extern gemm_t* gemm4mh_cntl_ii; void bli_her2k4mh_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { bli_her2k_front( alpha, a, b, beta, c, gemm4mh_cntl_rr ); bli_her2k_front( alpha, a, b, &BLIS_ONE, c, gemm4mh_cntl_ii ); diff --git a/frame/3/her2k/4mh/bli_her2k4mh_entry.h b/frame/3/her2k/4mh/bli_her2k4mh_entry.h index 706150811..943a428a6 100644 --- a/frame/3/her2k/4mh/bli_her2k4mh_entry.h +++ b/frame/3/her2k/4mh/bli_her2k4mh_entry.h @@ -33,8 +33,8 @@ */ void bli_her2k4mh_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/her2k/bli_her2k.c b/frame/3/her2k/bli_her2k.c index 74e1613df..135334dfb 100644 --- a/frame/3/her2k/bli_her2k.c +++ b/frame/3/her2k/bli_her2k.c @@ -46,9 +46,9 @@ void bli_her2k( obj_t* alpha, num_t dt = bli_obj_datatype( *c ); if ( bli_3mh_is_enabled_dt( dt ) ) bli_her2k3mh_entry( alpha, a, b, beta, c ); - else if ( bli_3m_is_enabled_dt( dt ) ) bli_her2k3m_entry( alpha, a, b, beta, c ); + else if ( bli_3m1_is_enabled_dt( dt ) ) bli_her2k3m1_entry( alpha, a, b, beta, c ); else if ( bli_4mh_is_enabled_dt( dt ) ) bli_her2k4mh_entry( alpha, a, b, beta, c ); - else if ( bli_4m_is_enabled_dt( dt ) ) bli_her2k4m_entry( alpha, a, b, beta, c ); + else if ( bli_4m1_is_enabled_dt( dt ) ) bli_her2k4m1_entry( alpha, a, b, beta, c ); else bli_her2k_entry( alpha, a, b, beta, c ); } diff --git a/frame/3/her2k/bli_her2k.h b/frame/3/her2k/bli_her2k.h index a1ffb8e51..976c1644d 100644 --- a/frame/3/her2k/bli_her2k.h +++ b/frame/3/her2k/bli_her2k.h @@ -50,10 +50,10 @@ #include "bli_her2k_u_ker_var2.h" */ -#include "bli_her2k4m.h" -#include "bli_her2k3m.h" #include "bli_her2k4mh.h" +#include "bli_her2k4m1.h" #include "bli_her2k3mh.h" +#include "bli_her2k3m1.h" // diff --git a/frame/3/herk/3m/bli_herk3m_entry.c b/frame/3/herk/3m/bli_herk3m_entry.c deleted file mode 100644 index 8f6c17f66..000000000 --- a/frame/3/herk/3m/bli_herk3m_entry.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -extern gemm_t* gemm3m_cntl; - -void bli_herk3m_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) -{ - bli_herk_front( alpha, a, beta, c, - gemm3m_cntl ); -} - diff --git a/frame/3/herk/3m/bli_herk3m_entry.h b/frame/3/herk/3m/bli_herk3m_entry.h deleted file mode 100644 index 29bc193e9..000000000 --- a/frame/3/herk/3m/bli_herk3m_entry.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_herk3m_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); - diff --git a/frame/3/herk/3m/bli_herk3m.c b/frame/3/herk/3m1/bli_herk3m1.c similarity index 92% rename from frame/3/herk/3m/bli_herk3m.c rename to frame/3/herk/3m1/bli_herk3m1.c index 32b699539..6e2a4911f 100644 --- a/frame/3/herk/3m/bli_herk3m.c +++ b/frame/3/herk/3m1/bli_herk3m1.c @@ -37,15 +37,15 @@ // // Define object-based interface. // -void bli_herk3m( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) +void bli_herk3m1( obj_t* alpha, + obj_t* a, + obj_t* beta, + obj_t* c ) { - // Since 3m only applies to the complex domain, we use the regular + // Since 3m1 only applies to the complex domain, we use the regular // implementation for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_herk3m_entry( alpha, a, beta, c ); + bli_herk3m1_entry( alpha, a, beta, c ); else bli_herk_entry( alpha, a, beta, c ); } @@ -93,5 +93,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNCR_BASIC( herk3m, herk3m ) +INSERT_GENTFUNCR_BASIC( herk3m1, herk3m1 ) diff --git a/frame/3/herk/3m/bli_herk3m.h b/frame/3/herk/3m1/bli_herk3m1.h similarity index 92% rename from frame/3/herk/3m/bli_herk3m.h rename to frame/3/herk/3m1/bli_herk3m1.h index 706398826..f30fb4028 100644 --- a/frame/3/herk/3m/bli_herk3m.h +++ b/frame/3/herk/3m1/bli_herk3m1.h @@ -32,16 +32,16 @@ */ -#include "bli_herk3m_entry.h" +#include "bli_herk3m1_entry.h" // // Prototype object-based interface. // -void bli_herk3m( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); +void bli_herk3m1( obj_t* alpha, + obj_t* a, + obj_t* beta, + obj_t* c ); // @@ -61,5 +61,5 @@ void PASTEMAC(ch,opname)( \ ctype* c, inc_t rs_c, inc_t cs_c \ ); -INSERT_GENTPROTR_BASIC( herk3m ) +INSERT_GENTPROTR_BASIC( herk3m1 ) diff --git a/frame/3/syr2k/3m/bli_syr2k3m_entry.c b/frame/3/herk/3m1/bli_herk3m1_entry.c similarity index 91% rename from frame/3/syr2k/3m/bli_syr2k3m_entry.c rename to frame/3/herk/3m1/bli_herk3m1_entry.c index 77876f684..fd5c93575 100644 --- a/frame/3/syr2k/3m/bli_syr2k3m_entry.c +++ b/frame/3/herk/3m1/bli_herk3m1_entry.c @@ -34,15 +34,14 @@ #include "blis.h" -extern gemm_t* gemm3m_cntl; +extern gemm_t* gemm3m1_cntl; -void bli_syr2k3m_entry( obj_t* alpha, +void bli_herk3m1_entry( obj_t* alpha, obj_t* a, - obj_t* b, obj_t* beta, obj_t* c ) { - bli_syr2k_front( alpha, a, b, beta, c, - gemm3m_cntl ); + bli_herk_front( alpha, a, beta, c, + gemm3m1_cntl ); } diff --git a/frame/3/syr2k/3m/bli_syr2k3m_entry.h b/frame/3/herk/3m1/bli_herk3m1_entry.h similarity index 96% rename from frame/3/syr2k/3m/bli_syr2k3m_entry.h rename to frame/3/herk/3m1/bli_herk3m1_entry.h index 1d6bb0298..1d6c81ee8 100644 --- a/frame/3/syr2k/3m/bli_syr2k3m_entry.h +++ b/frame/3/herk/3m1/bli_herk3m1_entry.h @@ -32,9 +32,8 @@ */ -void bli_syr2k3m_entry( obj_t* alpha, +void bli_herk3m1_entry( obj_t* alpha, obj_t* a, - obj_t* b, obj_t* beta, obj_t* c ); diff --git a/frame/3/herk/4m/old/bli_herk4m_cntl.h b/frame/3/herk/3m1/old/bli_herk3m1_cntl.h similarity index 95% rename from frame/3/herk/4m/old/bli_herk4m_cntl.h rename to frame/3/herk/3m1/old/bli_herk3m1_cntl.h index 988c2b3bc..1f98c591d 100644 --- a/frame/3/herk/4m/old/bli_herk4m_cntl.h +++ b/frame/3/herk/3m1/old/bli_herk3m1_cntl.h @@ -32,6 +32,6 @@ */ -void bli_herk4m_cntl_init( void ); -void bli_herk4m_cntl_finalize( void ); +void bli_herk3m1_cntl_init( void ); +void bli_herk3m1_cntl_finalize( void ); diff --git a/frame/3/herk/3mh/bli_herk3mh.c b/frame/3/herk/3mh/bli_herk3mh.c index b3b7ae2db..fc3928a11 100644 --- a/frame/3/herk/3mh/bli_herk3mh.c +++ b/frame/3/herk/3mh/bli_herk3mh.c @@ -38,9 +38,9 @@ // Define object-based interface. // void bli_herk3mh( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* beta, + obj_t* c ) { // Since 3mh only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/herk/3mh/bli_herk3mh.h b/frame/3/herk/3mh/bli_herk3mh.h index b748db2c0..76134c572 100644 --- a/frame/3/herk/3mh/bli_herk3mh.h +++ b/frame/3/herk/3mh/bli_herk3mh.h @@ -39,9 +39,9 @@ // Prototype object-based interface. // void bli_herk3mh( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* beta, + obj_t* c ); // diff --git a/frame/3/herk/3mh/bli_herk3mh_entry.c b/frame/3/herk/3mh/bli_herk3mh_entry.c index 5e8be3635..6174d1e0d 100644 --- a/frame/3/herk/3mh/bli_herk3mh_entry.c +++ b/frame/3/herk/3mh/bli_herk3mh_entry.c @@ -39,9 +39,9 @@ extern gemm_t* gemm3mh_cntl_io; extern gemm_t* gemm3mh_cntl_rpi; void bli_herk3mh_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* beta, + obj_t* c ) { bli_herk_front( alpha, a, beta, c, gemm3mh_cntl_ro ); bli_herk_front( alpha, a, &BLIS_ONE, c, gemm3mh_cntl_io ); diff --git a/frame/3/herk/3mh/bli_herk3mh_entry.h b/frame/3/herk/3mh/bli_herk3mh_entry.h index 7ad1e67b4..3d59740f5 100644 --- a/frame/3/herk/3mh/bli_herk3mh_entry.h +++ b/frame/3/herk/3mh/bli_herk3mh_entry.h @@ -33,7 +33,7 @@ */ void bli_herk3mh_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/herk/4m/bli_herk4m_entry.c b/frame/3/herk/4m/bli_herk4m_entry.c deleted file mode 100644 index 7168ca7a4..000000000 --- a/frame/3/herk/4m/bli_herk4m_entry.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -extern gemm_t* gemm4m_cntl; - -void bli_herk4m_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) -{ - bli_herk_front( alpha, a, beta, c, - gemm4m_cntl ); -} - diff --git a/frame/3/herk/4m/bli_herk4m_entry.h b/frame/3/herk/4m/bli_herk4m_entry.h deleted file mode 100644 index e67df69d9..000000000 --- a/frame/3/herk/4m/bli_herk4m_entry.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_herk4m_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); - diff --git a/frame/3/herk/4m/bli_herk4m.c b/frame/3/herk/4m1/bli_herk4m1.c similarity index 94% rename from frame/3/herk/4m/bli_herk4m.c rename to frame/3/herk/4m1/bli_herk4m1.c index 3baaba4ff..d8ca25c5d 100644 --- a/frame/3/herk/4m/bli_herk4m.c +++ b/frame/3/herk/4m1/bli_herk4m1.c @@ -37,15 +37,15 @@ // // Define object-based interface. // -void bli_herk4m( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) +void bli_herk4m1( obj_t* alpha, + obj_t* a, + obj_t* beta, + obj_t* c ) { // Since 4m only applies to the complex domain, we use the regular // implementation for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_herk4m_entry( alpha, a, beta, c ); + bli_herk4m1_entry( alpha, a, beta, c ); else bli_herk_entry( alpha, a, beta, c ); } @@ -93,5 +93,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNCR_BASIC( herk4m, herk4m ) +INSERT_GENTFUNCR_BASIC( herk4m1, herk4m1 ) diff --git a/frame/3/herk/4m/bli_herk4m.h b/frame/3/herk/4m1/bli_herk4m1.h similarity index 92% rename from frame/3/herk/4m/bli_herk4m.h rename to frame/3/herk/4m1/bli_herk4m1.h index e1938273f..94e6e42d3 100644 --- a/frame/3/herk/4m/bli_herk4m.h +++ b/frame/3/herk/4m1/bli_herk4m1.h @@ -32,16 +32,16 @@ */ -#include "bli_herk4m_entry.h" +#include "bli_herk4m1_entry.h" // // Prototype object-based interface. // -void bli_herk4m( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); +void bli_herk4m1( obj_t* alpha, + obj_t* a, + obj_t* beta, + obj_t* c ); // @@ -61,5 +61,5 @@ void PASTEMAC(ch,opname)( \ ctype* c, inc_t rs_c, inc_t cs_c \ ); -INSERT_GENTPROTR_BASIC( herk4m ) +INSERT_GENTPROTR_BASIC( herk4m1 ) diff --git a/frame/3/syr2k/4m/bli_syr2k4m_entry.c b/frame/3/herk/4m1/bli_herk4m1_entry.c similarity index 91% rename from frame/3/syr2k/4m/bli_syr2k4m_entry.c rename to frame/3/herk/4m1/bli_herk4m1_entry.c index fe0b8733d..0349a7a32 100644 --- a/frame/3/syr2k/4m/bli_syr2k4m_entry.c +++ b/frame/3/herk/4m1/bli_herk4m1_entry.c @@ -34,15 +34,14 @@ #include "blis.h" -extern gemm_t* gemm4m_cntl; +extern gemm_t* gemm4m1_cntl; -void bli_syr2k4m_entry( obj_t* alpha, +void bli_herk4m1_entry( obj_t* alpha, obj_t* a, - obj_t* b, obj_t* beta, obj_t* c ) { - bli_syr2k_front( alpha, a, b, beta, c, - gemm4m_cntl ); + bli_herk_front( alpha, a, beta, c, + gemm4m1_cntl ); } diff --git a/frame/3/syr2k/4m/bli_syr2k4m_entry.h b/frame/3/herk/4m1/bli_herk4m1_entry.h similarity index 96% rename from frame/3/syr2k/4m/bli_syr2k4m_entry.h rename to frame/3/herk/4m1/bli_herk4m1_entry.h index df7f124ee..f2a2333c8 100644 --- a/frame/3/syr2k/4m/bli_syr2k4m_entry.h +++ b/frame/3/herk/4m1/bli_herk4m1_entry.h @@ -32,9 +32,8 @@ */ -void bli_syr2k4m_entry( obj_t* alpha, +void bli_herk4m1_entry( obj_t* alpha, obj_t* a, - obj_t* b, obj_t* beta, obj_t* c ); diff --git a/frame/3/herk/3m/old/bli_herk3m_cntl.h b/frame/3/herk/4m1/old/bli_herk4m1_cntl.h similarity index 95% rename from frame/3/herk/3m/old/bli_herk3m_cntl.h rename to frame/3/herk/4m1/old/bli_herk4m1_cntl.h index 62bd7ad31..8f13aa452 100644 --- a/frame/3/herk/3m/old/bli_herk3m_cntl.h +++ b/frame/3/herk/4m1/old/bli_herk4m1_cntl.h @@ -32,6 +32,6 @@ */ -void bli_herk3m_cntl_init( void ); -void bli_herk3m_cntl_finalize( void ); +void bli_herk4m1_cntl_init( void ); +void bli_herk4m1_cntl_finalize( void ); diff --git a/frame/3/herk/4mh/bli_herk4mh.c b/frame/3/herk/4mh/bli_herk4mh.c index 7288881cd..c0c1804c3 100644 --- a/frame/3/herk/4mh/bli_herk4mh.c +++ b/frame/3/herk/4mh/bli_herk4mh.c @@ -38,9 +38,9 @@ // Define object-based interface. // void bli_herk4mh( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* beta, + obj_t* c ) { // Since 4mh only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/herk/4mh/bli_herk4mh.h b/frame/3/herk/4mh/bli_herk4mh.h index aeff510dc..fb67184e9 100644 --- a/frame/3/herk/4mh/bli_herk4mh.h +++ b/frame/3/herk/4mh/bli_herk4mh.h @@ -39,9 +39,9 @@ // Prototype object-based interface. // void bli_herk4mh( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* beta, + obj_t* c ); // diff --git a/frame/3/herk/4mh/bli_herk4mh_entry.c b/frame/3/herk/4mh/bli_herk4mh_entry.c index 19009f715..30e7ff84f 100644 --- a/frame/3/herk/4mh/bli_herk4mh_entry.c +++ b/frame/3/herk/4mh/bli_herk4mh_entry.c @@ -40,9 +40,9 @@ extern gemm_t* gemm4mh_cntl_ir; extern gemm_t* gemm4mh_cntl_ii; void bli_herk4mh_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* beta, + obj_t* c ) { bli_herk_front( alpha, a, beta, c, gemm4mh_cntl_rr ); bli_herk_front( alpha, a, &BLIS_ONE, c, gemm4mh_cntl_ii ); diff --git a/frame/3/herk/4mh/bli_herk4mh_entry.h b/frame/3/herk/4mh/bli_herk4mh_entry.h index a75a501eb..6f4483713 100644 --- a/frame/3/herk/4mh/bli_herk4mh_entry.h +++ b/frame/3/herk/4mh/bli_herk4mh_entry.h @@ -33,7 +33,7 @@ */ void bli_herk4mh_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/herk/bli_herk.c b/frame/3/herk/bli_herk.c index a56ff3971..a619d0a17 100644 --- a/frame/3/herk/bli_herk.c +++ b/frame/3/herk/bli_herk.c @@ -45,9 +45,9 @@ void bli_herk( obj_t* alpha, num_t dt = bli_obj_datatype( *c ); if ( bli_3mh_is_enabled_dt( dt ) ) bli_herk3mh_entry( alpha, a, beta, c ); - else if ( bli_3m_is_enabled_dt( dt ) ) bli_herk3m_entry( alpha, a, beta, c ); + else if ( bli_3m1_is_enabled_dt( dt ) ) bli_herk3m1_entry( alpha, a, beta, c ); else if ( bli_4mh_is_enabled_dt( dt ) ) bli_herk4mh_entry( alpha, a, beta, c ); - else if ( bli_4m_is_enabled_dt( dt ) ) bli_herk4m_entry( alpha, a, beta, c ); + else if ( bli_4m1_is_enabled_dt( dt ) ) bli_herk4m1_entry( alpha, a, beta, c ); else bli_herk_entry( alpha, a, beta, c ); } diff --git a/frame/3/herk/bli_herk.h b/frame/3/herk/bli_herk.h index 8c779fe91..750052761 100644 --- a/frame/3/herk/bli_herk.h +++ b/frame/3/herk/bli_herk.h @@ -46,10 +46,10 @@ #include "bli_herk_l_ker_var2.h" #include "bli_herk_u_ker_var2.h" -#include "bli_herk4m.h" -#include "bli_herk3m.h" #include "bli_herk4mh.h" +#include "bli_herk4m1.h" #include "bli_herk3mh.h" +#include "bli_herk3m1.h" // diff --git a/frame/3/symm/3m/bli_symm3m.h b/frame/3/symm/3m/bli_symm3m.h deleted file mode 100644 index d823de20b..000000000 --- a/frame/3/symm/3m/bli_symm3m.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "bli_symm3m_entry.h" - - -// -// Prototype object-based interface. -// -void bli_symm3m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); - -// -// Prototype BLAS-like interfaces with homogeneous-typed operands. -// -#undef GENTPROT -#define GENTPROT( ctype, ch, opname ) \ -\ -void PASTEMAC(ch,opname)( \ - side_t side, \ - uplo_t uploa, \ - conj_t conja, \ - trans_t transb, \ - dim_t m, \ - dim_t n, \ - ctype* alpha, \ - ctype* a, inc_t rs_a, inc_t cs_a, \ - ctype* b, inc_t rs_b, inc_t cs_b, \ - ctype* beta, \ - ctype* c, inc_t rs_c, inc_t cs_c \ - ); - -INSERT_GENTPROT_BASIC( symm3m ) - diff --git a/frame/3/symm/3m/bli_symm3m_entry.c b/frame/3/symm/3m/bli_symm3m_entry.c deleted file mode 100644 index f19b35638..000000000 --- a/frame/3/symm/3m/bli_symm3m_entry.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -extern gemm_t* gemm3m_cntl; - -void bli_symm3m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) -{ - bli_symm_front( side, alpha, a, b, beta, c, - gemm3m_cntl ); -} - diff --git a/frame/3/symm/3m/bli_symm3m_entry.h b/frame/3/symm/3m/bli_symm3m_entry.h deleted file mode 100644 index dbd63df34..000000000 --- a/frame/3/symm/3m/bli_symm3m_entry.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_symm3m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); - diff --git a/frame/3/symm/3m/bli_symm3m.c b/frame/3/symm/3m1/bli_symm3m1.c similarity index 91% rename from frame/3/symm/3m/bli_symm3m.c rename to frame/3/symm/3m1/bli_symm3m1.c index 5fe67dc29..b43408036 100644 --- a/frame/3/symm/3m/bli_symm3m.c +++ b/frame/3/symm/3m1/bli_symm3m1.c @@ -37,17 +37,17 @@ // // Define object-based interface. // -void bli_symm3m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_symm3m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { - // Since 3m only applies to the complex domain, we use the regular + // Since 3m1 only applies to the complex domain, we use the regular // implementation for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_symm3m_entry( side, alpha, a, b, beta, c ); + bli_symm3m1_entry( side, alpha, a, b, beta, c ); else bli_symm_entry( side, alpha, a, b, beta, c ); } @@ -103,5 +103,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNC_BASIC( symm3m, symm3m ) +INSERT_GENTFUNC_BASIC( symm3m1, symm3m1 ) diff --git a/frame/3/hemm/4m/bli_hemm4m.h b/frame/3/symm/3m1/bli_symm3m1.h similarity index 90% rename from frame/3/hemm/4m/bli_hemm4m.h rename to frame/3/symm/3m1/bli_symm3m1.h index 66d8ddf10..1717534cc 100644 --- a/frame/3/hemm/4m/bli_hemm4m.h +++ b/frame/3/symm/3m1/bli_symm3m1.h @@ -32,18 +32,18 @@ */ -#include "bli_hemm4m_entry.h" +#include "bli_symm3m1_entry.h" // // Prototype object-based interface. // -void bli_hemm4m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); +void bli_symm3m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // // Prototype BLAS-like interfaces with homogeneous-typed operands. @@ -65,5 +65,5 @@ void PASTEMAC(ch,opname)( \ ctype* c, inc_t rs_c, inc_t cs_c \ ); -INSERT_GENTPROT_BASIC( hemm4m ) +INSERT_GENTPROT_BASIC( symm3m1 ) diff --git a/frame/3/symm/3m1/bli_symm3m1_entry.c b/frame/3/symm/3m1/bli_symm3m1_entry.c new file mode 100644 index 000000000..1499dafed --- /dev/null +++ b/frame/3/symm/3m1/bli_symm3m1_entry.c @@ -0,0 +1,49 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +extern gemm_t* gemm3m1_cntl; + +void bli_symm3m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) +{ + bli_symm_front( side, alpha, a, b, beta, c, + gemm3m1_cntl ); +} + diff --git a/frame/3/symm/3m1/bli_symm3m1_entry.h b/frame/3/symm/3m1/bli_symm3m1_entry.h new file mode 100644 index 000000000..f818a45a6 --- /dev/null +++ b/frame/3/symm/3m1/bli_symm3m1_entry.h @@ -0,0 +1,41 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_symm3m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); + diff --git a/frame/3/symm/3mh/bli_symm3mh.c b/frame/3/symm/3mh/bli_symm3mh.c index c79f367c0..2b391138c 100644 --- a/frame/3/symm/3mh/bli_symm3mh.c +++ b/frame/3/symm/3mh/bli_symm3mh.c @@ -38,11 +38,11 @@ // Define object-based interface. // void bli_symm3mh( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 3mh only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/symm/3mh/bli_symm3mh.h b/frame/3/symm/3mh/bli_symm3mh.h index d353a8651..68b8fc67e 100644 --- a/frame/3/symm/3mh/bli_symm3mh.h +++ b/frame/3/symm/3mh/bli_symm3mh.h @@ -39,11 +39,11 @@ // Prototype object-based interface. // void bli_symm3mh( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // // Prototype BLAS-like interfaces with homogeneous-typed operands. diff --git a/frame/3/symm/3mh/bli_symm3mh_entry.c b/frame/3/symm/3mh/bli_symm3mh_entry.c index 1277cbf8f..7289153a4 100644 --- a/frame/3/symm/3mh/bli_symm3mh_entry.c +++ b/frame/3/symm/3mh/bli_symm3mh_entry.c @@ -39,11 +39,11 @@ extern gemm_t* gemm3mh_cntl_io; extern gemm_t* gemm3mh_cntl_rpi; void bli_symm3mh_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { bli_symm_front( side, alpha, a, b, beta, c, gemm3mh_cntl_ro ); bli_symm_front( side, alpha, a, b, &BLIS_ONE, c, gemm3mh_cntl_io ); diff --git a/frame/3/symm/3mh/bli_symm3mh_entry.h b/frame/3/symm/3mh/bli_symm3mh_entry.h index 1030c3e9f..be29b4ad4 100644 --- a/frame/3/symm/3mh/bli_symm3mh_entry.h +++ b/frame/3/symm/3mh/bli_symm3mh_entry.h @@ -33,9 +33,9 @@ */ void bli_symm3mh_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/symm/4m/bli_symm4m_entry.c b/frame/3/symm/4m/bli_symm4m_entry.c deleted file mode 100644 index bee823e70..000000000 --- a/frame/3/symm/4m/bli_symm4m_entry.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -extern gemm_t* gemm4m_cntl; - -void bli_symm4m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) -{ - bli_symm_front( side, alpha, a, b, beta, c, - gemm4m_cntl ); -} - diff --git a/frame/3/symm/4m/bli_symm4m_entry.h b/frame/3/symm/4m/bli_symm4m_entry.h deleted file mode 100644 index 38ea4c8da..000000000 --- a/frame/3/symm/4m/bli_symm4m_entry.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_symm4m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); - diff --git a/frame/3/symm/4m/bli_symm4m.c b/frame/3/symm/4m1/bli_symm4m1.c similarity index 92% rename from frame/3/symm/4m/bli_symm4m.c rename to frame/3/symm/4m1/bli_symm4m1.c index 5e0df03ef..e51482dee 100644 --- a/frame/3/symm/4m/bli_symm4m.c +++ b/frame/3/symm/4m1/bli_symm4m1.c @@ -37,17 +37,17 @@ // // Define object-based interface. // -void bli_symm4m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_symm4m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 4m only applies to the complex domain, we use the regular // implementation for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_symm4m_entry( side, alpha, a, b, beta, c ); + bli_symm4m1_entry( side, alpha, a, b, beta, c ); else bli_symm_entry( side, alpha, a, b, beta, c ); } @@ -103,5 +103,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNC_BASIC( symm4m, symm4m ) +INSERT_GENTFUNC_BASIC( symm4m1, symm4m1 ) diff --git a/frame/3/symm/4m/bli_symm4m.h b/frame/3/symm/4m1/bli_symm4m1.h similarity index 90% rename from frame/3/symm/4m/bli_symm4m.h rename to frame/3/symm/4m1/bli_symm4m1.h index 14f554631..de6390baa 100644 --- a/frame/3/symm/4m/bli_symm4m.h +++ b/frame/3/symm/4m1/bli_symm4m1.h @@ -32,18 +32,18 @@ */ -#include "bli_symm4m_entry.h" +#include "bli_symm4m1_entry.h" // // Prototype object-based interface. // -void bli_symm4m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); +void bli_symm4m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // // Prototype BLAS-like interfaces with homogeneous-typed operands. @@ -65,5 +65,5 @@ void PASTEMAC(ch,opname)( \ ctype* c, inc_t rs_c, inc_t cs_c \ ); -INSERT_GENTPROT_BASIC( symm4m ) +INSERT_GENTPROT_BASIC( symm4m1 ) diff --git a/frame/3/symm/4m1/bli_symm4m1_entry.c b/frame/3/symm/4m1/bli_symm4m1_entry.c new file mode 100644 index 000000000..52ab5ccef --- /dev/null +++ b/frame/3/symm/4m1/bli_symm4m1_entry.c @@ -0,0 +1,49 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +extern gemm_t* gemm4m1_cntl; + +void bli_symm4m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) +{ + bli_symm_front( side, alpha, a, b, beta, c, + gemm4m1_cntl ); +} + diff --git a/frame/3/symm/4m1/bli_symm4m1_entry.h b/frame/3/symm/4m1/bli_symm4m1_entry.h new file mode 100644 index 000000000..e3136ddd6 --- /dev/null +++ b/frame/3/symm/4m1/bli_symm4m1_entry.h @@ -0,0 +1,41 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_symm4m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); + diff --git a/frame/3/symm/4mh/bli_symm4mh.c b/frame/3/symm/4mh/bli_symm4mh.c index ac62aeadc..14e1aeb24 100644 --- a/frame/3/symm/4mh/bli_symm4mh.c +++ b/frame/3/symm/4mh/bli_symm4mh.c @@ -38,11 +38,11 @@ // Define object-based interface. // void bli_symm4mh( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 4mh only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/symm/4mh/bli_symm4mh.h b/frame/3/symm/4mh/bli_symm4mh.h index e52ba4079..be497360c 100644 --- a/frame/3/symm/4mh/bli_symm4mh.h +++ b/frame/3/symm/4mh/bli_symm4mh.h @@ -39,11 +39,11 @@ // Prototype object-based interface. // void bli_symm4mh( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // // Prototype BLAS-like interfaces with homogeneous-typed operands. diff --git a/frame/3/symm/4mh/bli_symm4mh_entry.c b/frame/3/symm/4mh/bli_symm4mh_entry.c index e2322e1d0..27a482dfc 100644 --- a/frame/3/symm/4mh/bli_symm4mh_entry.c +++ b/frame/3/symm/4mh/bli_symm4mh_entry.c @@ -40,11 +40,11 @@ extern gemm_t* gemm4mh_cntl_ir; extern gemm_t* gemm4mh_cntl_ii; void bli_symm4mh_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { bli_symm_front( side, alpha, a, b, beta, c, gemm4mh_cntl_rr ); bli_symm_front( side, alpha, a, b, &BLIS_ONE, c, gemm4mh_cntl_ii ); diff --git a/frame/3/symm/4mh/bli_symm4mh_entry.h b/frame/3/symm/4mh/bli_symm4mh_entry.h index 05a416627..bc621061a 100644 --- a/frame/3/symm/4mh/bli_symm4mh_entry.h +++ b/frame/3/symm/4mh/bli_symm4mh_entry.h @@ -33,9 +33,9 @@ */ void bli_symm4mh_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/symm/bli_symm.c b/frame/3/symm/bli_symm.c index d01232f65..088c4ad24 100644 --- a/frame/3/symm/bli_symm.c +++ b/frame/3/symm/bli_symm.c @@ -47,9 +47,9 @@ void bli_symm( side_t side, num_t dt = bli_obj_datatype( *c ); if ( bli_3mh_is_enabled_dt( dt ) ) bli_symm3mh_entry( side, alpha, a, b, beta, c ); - else if ( bli_3m_is_enabled_dt( dt ) ) bli_symm3m_entry( side, alpha, a, b, beta, c ); + else if ( bli_3m1_is_enabled_dt( dt ) ) bli_symm3m1_entry( side, alpha, a, b, beta, c ); else if ( bli_4mh_is_enabled_dt( dt ) ) bli_symm4mh_entry( side, alpha, a, b, beta, c ); - else if ( bli_4m_is_enabled_dt( dt ) ) bli_symm4m_entry( side, alpha, a, b, beta, c ); + else if ( bli_4m1_is_enabled_dt( dt ) ) bli_symm4m1_entry( side, alpha, a, b, beta, c ); else bli_symm_entry( side, alpha, a, b, beta, c ); } diff --git a/frame/3/symm/bli_symm.h b/frame/3/symm/bli_symm.h index f9bddfa2f..04d83adb7 100644 --- a/frame/3/symm/bli_symm.h +++ b/frame/3/symm/bli_symm.h @@ -36,10 +36,10 @@ #include "bli_symm_entry.h" #include "bli_symm_front.h" -#include "bli_symm4m.h" -#include "bli_symm3m.h" #include "bli_symm4mh.h" +#include "bli_symm4m1.h" #include "bli_symm3mh.h" +#include "bli_symm3m1.h" // diff --git a/frame/3/syr2k/3m/bli_syr2k3m.c b/frame/3/syr2k/3m1/bli_syr2k3m1.c similarity index 91% rename from frame/3/syr2k/3m/bli_syr2k3m.c rename to frame/3/syr2k/3m1/bli_syr2k3m1.c index 40db98ad0..c0e7f217c 100644 --- a/frame/3/syr2k/3m/bli_syr2k3m.c +++ b/frame/3/syr2k/3m1/bli_syr2k3m1.c @@ -37,16 +37,16 @@ // // Define object-based interface. // -void bli_syr2k3m( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_syr2k3m1( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { - // Since 3m only applies to the complex domain, we use the regular + // Since 3m1 only applies to the complex domain, we use the regular // implementation for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_syr2k3m_entry( alpha, a, b, beta, c ); + bli_syr2k3m1_entry( alpha, a, b, beta, c ); else bli_syr2k_entry( alpha, a, b, beta, c ); } @@ -100,5 +100,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNC_BASIC( syr2k3m, syr2k3m ) +INSERT_GENTFUNC_BASIC( syr2k3m1, syr2k3m1 ) diff --git a/frame/3/gemm/3m/bli_gemm3m.h b/frame/3/syr2k/3m1/bli_syr2k3m1.h similarity index 88% rename from frame/3/gemm/3m/bli_gemm3m.h rename to frame/3/syr2k/3m1/bli_syr2k3m1.h index 85032afce..612bea5bc 100644 --- a/frame/3/gemm/3m/bli_gemm3m.h +++ b/frame/3/syr2k/3m1/bli_syr2k3m1.h @@ -32,20 +32,17 @@ */ -#include "bli_gemm3m_cntl.h" -#include "bli_gemm3m_entry.h" - -#include "bli_gemm3m_ukr_ref.h" +#include "bli_syr2k3m1_entry.h" // // Prototype object-based interface. // -void bli_gemm3m( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); +void bli_syr2k3m1( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // @@ -55,10 +52,10 @@ void bli_gemm3m( obj_t* alpha, #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname)( \ + uplo_t uploc, \ trans_t transa, \ trans_t transb, \ dim_t m, \ - dim_t n, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ @@ -67,5 +64,5 @@ void PASTEMAC(ch,opname)( \ ctype* c, inc_t rs_c, inc_t cs_c \ ); -INSERT_GENTPROT_BASIC( gemm3m ) +INSERT_GENTPROT_BASIC( syr2k3m1 ) diff --git a/frame/3/syr2k/3m1/bli_syr2k3m1_entry.c b/frame/3/syr2k/3m1/bli_syr2k3m1_entry.c new file mode 100644 index 000000000..3dc9f5061 --- /dev/null +++ b/frame/3/syr2k/3m1/bli_syr2k3m1_entry.c @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +extern gemm_t* gemm3m1_cntl; + +void bli_syr2k3m1_entry( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) +{ + bli_syr2k_front( alpha, a, b, beta, c, + gemm3m1_cntl ); +} + diff --git a/frame/3/syr2k/3m1/bli_syr2k3m1_entry.h b/frame/3/syr2k/3m1/bli_syr2k3m1_entry.h new file mode 100644 index 000000000..b54a9e102 --- /dev/null +++ b/frame/3/syr2k/3m1/bli_syr2k3m1_entry.h @@ -0,0 +1,40 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_syr2k3m1_entry( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); + diff --git a/frame/3/syr2k/3mh/bli_syr2k3mh.c b/frame/3/syr2k/3mh/bli_syr2k3mh.c index 0fa00e953..56de9b5fd 100644 --- a/frame/3/syr2k/3mh/bli_syr2k3mh.c +++ b/frame/3/syr2k/3mh/bli_syr2k3mh.c @@ -38,10 +38,10 @@ // Define object-based interface. // void bli_syr2k3mh( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 3mh only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/syr2k/3mh/bli_syr2k3mh.h b/frame/3/syr2k/3mh/bli_syr2k3mh.h index e14739094..55a155765 100644 --- a/frame/3/syr2k/3mh/bli_syr2k3mh.h +++ b/frame/3/syr2k/3mh/bli_syr2k3mh.h @@ -39,10 +39,10 @@ // Prototype object-based interface. // void bli_syr2k3mh( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // diff --git a/frame/3/syr2k/3mh/bli_syr2k3mh_entry.c b/frame/3/syr2k/3mh/bli_syr2k3mh_entry.c index 74d5e24a0..ab809361a 100644 --- a/frame/3/syr2k/3mh/bli_syr2k3mh_entry.c +++ b/frame/3/syr2k/3mh/bli_syr2k3mh_entry.c @@ -39,10 +39,10 @@ extern gemm_t* gemm3mh_cntl_io; extern gemm_t* gemm3mh_cntl_rpi; void bli_syr2k3mh_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { bli_syr2k_front( alpha, a, b, beta, c, gemm3mh_cntl_ro ); bli_syr2k_front( alpha, a, b, &BLIS_ONE, c, gemm3mh_cntl_io ); diff --git a/frame/3/syr2k/3mh/bli_syr2k3mh_entry.h b/frame/3/syr2k/3mh/bli_syr2k3mh_entry.h index 55f828542..38ec5f8ad 100644 --- a/frame/3/syr2k/3mh/bli_syr2k3mh_entry.h +++ b/frame/3/syr2k/3mh/bli_syr2k3mh_entry.h @@ -33,8 +33,8 @@ */ void bli_syr2k3mh_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/syr2k/4m/bli_syr2k4m.c b/frame/3/syr2k/4m1/bli_syr2k4m1.c similarity index 93% rename from frame/3/syr2k/4m/bli_syr2k4m.c rename to frame/3/syr2k/4m1/bli_syr2k4m1.c index 520d4a78f..3d16bbbc7 100644 --- a/frame/3/syr2k/4m/bli_syr2k4m.c +++ b/frame/3/syr2k/4m1/bli_syr2k4m1.c @@ -37,16 +37,16 @@ // // Define object-based interface. // -void bli_syr2k4m( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_syr2k4m1( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 4m only applies to the complex domain, we use the regular // implementation for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_syr2k4m_entry( alpha, a, b, beta, c ); + bli_syr2k4m1_entry( alpha, a, b, beta, c ); else bli_syr2k_entry( alpha, a, b, beta, c ); } @@ -100,5 +100,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNC_BASIC( syr2k4m, syr2k4m ) +INSERT_GENTFUNC_BASIC( syr2k4m1, syr2k4m1 ) diff --git a/frame/3/gemm/4m/bli_gemm4m.h b/frame/3/syr2k/4m1/bli_syr2k4m1.h similarity index 88% rename from frame/3/gemm/4m/bli_gemm4m.h rename to frame/3/syr2k/4m1/bli_syr2k4m1.h index 42f83ef88..880b913ca 100644 --- a/frame/3/gemm/4m/bli_gemm4m.h +++ b/frame/3/syr2k/4m1/bli_syr2k4m1.h @@ -32,20 +32,17 @@ */ -#include "bli_gemm4m_cntl.h" -#include "bli_gemm4m_entry.h" - -#include "bli_gemm4m_ukr_ref.h" +#include "bli_syr2k4m1_entry.h" // // Prototype object-based interface. // -void bli_gemm4m( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); +void bli_syr2k4m1( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // @@ -55,10 +52,10 @@ void bli_gemm4m( obj_t* alpha, #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname)( \ + uplo_t uploc, \ trans_t transa, \ trans_t transb, \ dim_t m, \ - dim_t n, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ @@ -67,5 +64,5 @@ void PASTEMAC(ch,opname)( \ ctype* c, inc_t rs_c, inc_t cs_c \ ); -INSERT_GENTPROT_BASIC( gemm4m ) +INSERT_GENTPROT_BASIC( syr2k4m1 ) diff --git a/frame/3/syr2k/4m1/bli_syr2k4m1_entry.c b/frame/3/syr2k/4m1/bli_syr2k4m1_entry.c new file mode 100644 index 000000000..e3c0c1a19 --- /dev/null +++ b/frame/3/syr2k/4m1/bli_syr2k4m1_entry.c @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +extern gemm_t* gemm4m1_cntl; + +void bli_syr2k4m1_entry( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) +{ + bli_syr2k_front( alpha, a, b, beta, c, + gemm4m1_cntl ); +} + diff --git a/frame/3/syr2k/4m1/bli_syr2k4m1_entry.h b/frame/3/syr2k/4m1/bli_syr2k4m1_entry.h new file mode 100644 index 000000000..35c58a94e --- /dev/null +++ b/frame/3/syr2k/4m1/bli_syr2k4m1_entry.h @@ -0,0 +1,40 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_syr2k4m1_entry( obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); + diff --git a/frame/3/syr2k/4mh/bli_syr2k4mh.c b/frame/3/syr2k/4mh/bli_syr2k4mh.c index 7fc5410d6..76c46ae0e 100644 --- a/frame/3/syr2k/4mh/bli_syr2k4mh.c +++ b/frame/3/syr2k/4mh/bli_syr2k4mh.c @@ -38,10 +38,10 @@ // Define object-based interface. // void bli_syr2k4mh( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 4mh only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/syr2k/4mh/bli_syr2k4mh.h b/frame/3/syr2k/4mh/bli_syr2k4mh.h index 2b0cfa4cb..b664253b1 100644 --- a/frame/3/syr2k/4mh/bli_syr2k4mh.h +++ b/frame/3/syr2k/4mh/bli_syr2k4mh.h @@ -39,10 +39,10 @@ // Prototype object-based interface. // void bli_syr2k4mh( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // diff --git a/frame/3/syr2k/4mh/bli_syr2k4mh_entry.c b/frame/3/syr2k/4mh/bli_syr2k4mh_entry.c index 58218174f..7d28b6eb7 100644 --- a/frame/3/syr2k/4mh/bli_syr2k4mh_entry.c +++ b/frame/3/syr2k/4mh/bli_syr2k4mh_entry.c @@ -40,10 +40,10 @@ extern gemm_t* gemm4mh_cntl_ir; extern gemm_t* gemm4mh_cntl_ii; void bli_syr2k4mh_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { bli_syr2k_front( alpha, a, b, beta, c, gemm4mh_cntl_rr ); bli_syr2k_front( alpha, a, b, &BLIS_ONE, c, gemm4mh_cntl_ii ); diff --git a/frame/3/syr2k/4mh/bli_syr2k4mh_entry.h b/frame/3/syr2k/4mh/bli_syr2k4mh_entry.h index 6e0e4cc9b..705569310 100644 --- a/frame/3/syr2k/4mh/bli_syr2k4mh_entry.h +++ b/frame/3/syr2k/4mh/bli_syr2k4mh_entry.h @@ -33,8 +33,8 @@ */ void bli_syr2k4mh_entry( obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/syr2k/bli_syr2k.c b/frame/3/syr2k/bli_syr2k.c index 9fbc9d7a7..09a4c2db5 100644 --- a/frame/3/syr2k/bli_syr2k.c +++ b/frame/3/syr2k/bli_syr2k.c @@ -46,9 +46,9 @@ void bli_syr2k( obj_t* alpha, num_t dt = bli_obj_datatype( *c ); if ( bli_3mh_is_enabled_dt( dt ) ) bli_syr2k3mh_entry( alpha, a, b, beta, c ); - else if ( bli_3m_is_enabled_dt( dt ) ) bli_syr2k3m_entry( alpha, a, b, beta, c ); + else if ( bli_3m1_is_enabled_dt( dt ) ) bli_syr2k3m1_entry( alpha, a, b, beta, c ); else if ( bli_4mh_is_enabled_dt( dt ) ) bli_syr2k4mh_entry( alpha, a, b, beta, c ); - else if ( bli_4m_is_enabled_dt( dt ) ) bli_syr2k4m_entry( alpha, a, b, beta, c ); + else if ( bli_4m1_is_enabled_dt( dt ) ) bli_syr2k4m1_entry( alpha, a, b, beta, c ); else bli_syr2k_entry( alpha, a, b, beta, c ); } diff --git a/frame/3/syr2k/bli_syr2k.h b/frame/3/syr2k/bli_syr2k.h index 99c16dce7..3a71d4975 100644 --- a/frame/3/syr2k/bli_syr2k.h +++ b/frame/3/syr2k/bli_syr2k.h @@ -36,10 +36,10 @@ #include "bli_syr2k_entry.h" #include "bli_syr2k_front.h" -#include "bli_syr2k4m.h" -#include "bli_syr2k3m.h" #include "bli_syr2k4mh.h" +#include "bli_syr2k4m1.h" #include "bli_syr2k3mh.h" +#include "bli_syr2k3m1.h" // diff --git a/frame/3/syrk/3m/bli_syrk3m_entry.c b/frame/3/syrk/3m/bli_syrk3m_entry.c deleted file mode 100644 index 56937d81a..000000000 --- a/frame/3/syrk/3m/bli_syrk3m_entry.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -extern gemm_t* gemm3m_cntl; - -void bli_syrk3m_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) -{ - bli_syrk_front( alpha, a, beta, c, - gemm3m_cntl ); -} - diff --git a/frame/3/syrk/3m/bli_syrk3m_entry.h b/frame/3/syrk/3m/bli_syrk3m_entry.h deleted file mode 100644 index ed0646332..000000000 --- a/frame/3/syrk/3m/bli_syrk3m_entry.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_syrk3m_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); - diff --git a/frame/3/syrk/3m/bli_syrk3m.c b/frame/3/syrk/3m1/bli_syrk3m1.c similarity index 91% rename from frame/3/syrk/3m/bli_syrk3m.c rename to frame/3/syrk/3m1/bli_syrk3m1.c index 8e639ec17..ea24f3397 100644 --- a/frame/3/syrk/3m/bli_syrk3m.c +++ b/frame/3/syrk/3m1/bli_syrk3m1.c @@ -37,15 +37,15 @@ // // Define object-based interface. // -void bli_syrk3m( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) +void bli_syrk3m1( obj_t* alpha, + obj_t* a, + obj_t* beta, + obj_t* c ) { - // Since 3m only applies to the complex domain, we use the regular + // Since 3m1 only applies to the complex domain, we use the regular // implementation for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_syrk3m_entry( alpha, a, beta, c ); + bli_syrk3m1_entry( alpha, a, beta, c ); else bli_syrk_entry( alpha, a, beta, c ); } @@ -92,5 +92,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNC_BASIC( syrk3m, syrk3m ) +INSERT_GENTFUNC_BASIC( syrk3m1, syrk3m1 ) diff --git a/frame/3/syrk/3m/bli_syrk3m.h b/frame/3/syrk/3m1/bli_syrk3m1.h similarity index 92% rename from frame/3/syrk/3m/bli_syrk3m.h rename to frame/3/syrk/3m1/bli_syrk3m1.h index ecabd0020..2da93ad6a 100644 --- a/frame/3/syrk/3m/bli_syrk3m.h +++ b/frame/3/syrk/3m1/bli_syrk3m1.h @@ -32,16 +32,16 @@ */ -#include "bli_syrk3m_entry.h" +#include "bli_syrk3m1_entry.h" // // Prototype object-based interface. // -void bli_syrk3m( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); +void bli_syrk3m1( obj_t* alpha, + obj_t* a, + obj_t* beta, + obj_t* c ); // @@ -61,5 +61,5 @@ void PASTEMAC(ch,opname)( \ ctype* c, inc_t rs_c, inc_t cs_c \ ); -INSERT_GENTPROT_BASIC( syrk3m ) +INSERT_GENTPROT_BASIC( syrk3m1 ) diff --git a/frame/3/syrk/3m1/bli_syrk3m1_entry.c b/frame/3/syrk/3m1/bli_syrk3m1_entry.c new file mode 100644 index 000000000..ad147e186 --- /dev/null +++ b/frame/3/syrk/3m1/bli_syrk3m1_entry.c @@ -0,0 +1,47 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +extern gemm_t* gemm3m1_cntl; + +void bli_syrk3m1_entry( obj_t* alpha, + obj_t* a, + obj_t* beta, + obj_t* c ) +{ + bli_syrk_front( alpha, a, beta, c, + gemm3m1_cntl ); +} + diff --git a/frame/3/syrk/3m1/bli_syrk3m1_entry.h b/frame/3/syrk/3m1/bli_syrk3m1_entry.h new file mode 100644 index 000000000..8c6b0a42d --- /dev/null +++ b/frame/3/syrk/3m1/bli_syrk3m1_entry.h @@ -0,0 +1,39 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_syrk3m1_entry( obj_t* alpha, + obj_t* a, + obj_t* beta, + obj_t* c ); + diff --git a/frame/3/syrk/3mh/bli_syrk3mh.c b/frame/3/syrk/3mh/bli_syrk3mh.c index 2822a732a..5c8e2720f 100644 --- a/frame/3/syrk/3mh/bli_syrk3mh.c +++ b/frame/3/syrk/3mh/bli_syrk3mh.c @@ -38,9 +38,9 @@ // Define object-based interface. // void bli_syrk3mh( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* beta, + obj_t* c ) { // Since 3mh only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/syrk/3mh/bli_syrk3mh.h b/frame/3/syrk/3mh/bli_syrk3mh.h index c25d02926..c5bfa6ede 100644 --- a/frame/3/syrk/3mh/bli_syrk3mh.h +++ b/frame/3/syrk/3mh/bli_syrk3mh.h @@ -39,9 +39,9 @@ // Prototype object-based interface. // void bli_syrk3mh( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* beta, + obj_t* c ); // diff --git a/frame/3/syrk/3mh/bli_syrk3mh_entry.c b/frame/3/syrk/3mh/bli_syrk3mh_entry.c index 8f1e46143..c5a54f05a 100644 --- a/frame/3/syrk/3mh/bli_syrk3mh_entry.c +++ b/frame/3/syrk/3mh/bli_syrk3mh_entry.c @@ -39,9 +39,9 @@ extern gemm_t* gemm3mh_cntl_io; extern gemm_t* gemm3mh_cntl_rpi; void bli_syrk3mh_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* beta, + obj_t* c ) { bli_syrk_front( alpha, a, beta, c, gemm3mh_cntl_ro ); bli_syrk_front( alpha, a, &BLIS_ONE, c, gemm3mh_cntl_io ); diff --git a/frame/3/syrk/3mh/bli_syrk3mh_entry.h b/frame/3/syrk/3mh/bli_syrk3mh_entry.h index f6b3c5e4d..8c7b6481e 100644 --- a/frame/3/syrk/3mh/bli_syrk3mh_entry.h +++ b/frame/3/syrk/3mh/bli_syrk3mh_entry.h @@ -33,7 +33,7 @@ */ void bli_syrk3mh_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/syrk/4m/bli_syrk4m_entry.c b/frame/3/syrk/4m/bli_syrk4m_entry.c deleted file mode 100644 index 2a95799e4..000000000 --- a/frame/3/syrk/4m/bli_syrk4m_entry.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -extern gemm_t* gemm4m_cntl; - -void bli_syrk4m_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) -{ - bli_syrk_front( alpha, a, beta, c, - gemm4m_cntl ); -} - diff --git a/frame/3/syrk/4m/bli_syrk4m_entry.h b/frame/3/syrk/4m/bli_syrk4m_entry.h deleted file mode 100644 index 0434fb2a3..000000000 --- a/frame/3/syrk/4m/bli_syrk4m_entry.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_syrk4m_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); - diff --git a/frame/3/syrk/4m/bli_syrk4m.c b/frame/3/syrk/4m1/bli_syrk4m1.c similarity index 93% rename from frame/3/syrk/4m/bli_syrk4m.c rename to frame/3/syrk/4m1/bli_syrk4m1.c index 5f190cb29..4f2420442 100644 --- a/frame/3/syrk/4m/bli_syrk4m.c +++ b/frame/3/syrk/4m1/bli_syrk4m1.c @@ -37,15 +37,15 @@ // // Define object-based interface. // -void bli_syrk4m( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) +void bli_syrk4m1( obj_t* alpha, + obj_t* a, + obj_t* beta, + obj_t* c ) { // Since 4m only applies to the complex domain, we use the regular // implementation for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_syrk4m_entry( alpha, a, beta, c ); + bli_syrk4m1_entry( alpha, a, beta, c ); else bli_syrk_entry( alpha, a, beta, c ); } @@ -92,5 +92,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNC_BASIC( syrk4m, syrk4m ) +INSERT_GENTFUNC_BASIC( syrk4m1, syrk4m1 ) diff --git a/frame/3/syrk/4m/bli_syrk4m.h b/frame/3/syrk/4m1/bli_syrk4m1.h similarity index 92% rename from frame/3/syrk/4m/bli_syrk4m.h rename to frame/3/syrk/4m1/bli_syrk4m1.h index f8554ecfa..035f3cd60 100644 --- a/frame/3/syrk/4m/bli_syrk4m.h +++ b/frame/3/syrk/4m1/bli_syrk4m1.h @@ -32,16 +32,16 @@ */ -#include "bli_syrk4m_entry.h" +#include "bli_syrk4m1_entry.h" // // Prototype object-based interface. // -void bli_syrk4m( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); +void bli_syrk4m1( obj_t* alpha, + obj_t* a, + obj_t* beta, + obj_t* c ); // @@ -61,5 +61,5 @@ void PASTEMAC(ch,opname)( \ ctype* c, inc_t rs_c, inc_t cs_c \ ); -INSERT_GENTPROT_BASIC( syrk4m ) +INSERT_GENTPROT_BASIC( syrk4m1 ) diff --git a/frame/3/syrk/4m1/bli_syrk4m1_entry.c b/frame/3/syrk/4m1/bli_syrk4m1_entry.c new file mode 100644 index 000000000..cf265caba --- /dev/null +++ b/frame/3/syrk/4m1/bli_syrk4m1_entry.c @@ -0,0 +1,47 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +extern gemm_t* gemm4m1_cntl; + +void bli_syrk4m1_entry( obj_t* alpha, + obj_t* a, + obj_t* beta, + obj_t* c ) +{ + bli_syrk_front( alpha, a, beta, c, + gemm4m1_cntl ); +} + diff --git a/frame/3/syrk/4m1/bli_syrk4m1_entry.h b/frame/3/syrk/4m1/bli_syrk4m1_entry.h new file mode 100644 index 000000000..31ee6e628 --- /dev/null +++ b/frame/3/syrk/4m1/bli_syrk4m1_entry.h @@ -0,0 +1,39 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_syrk4m1_entry( obj_t* alpha, + obj_t* a, + obj_t* beta, + obj_t* c ); + diff --git a/frame/3/syrk/4mh/bli_syrk4mh.c b/frame/3/syrk/4mh/bli_syrk4mh.c index 8ff0cbc39..fa83476e2 100644 --- a/frame/3/syrk/4mh/bli_syrk4mh.c +++ b/frame/3/syrk/4mh/bli_syrk4mh.c @@ -38,9 +38,9 @@ // Define object-based interface. // void bli_syrk4mh( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* beta, + obj_t* c ) { // Since 4mh only applies to the complex domain, we use the regular // implementation for real domain cases. diff --git a/frame/3/syrk/4mh/bli_syrk4mh.h b/frame/3/syrk/4mh/bli_syrk4mh.h index 9474d350b..76d63f7bc 100644 --- a/frame/3/syrk/4mh/bli_syrk4mh.h +++ b/frame/3/syrk/4mh/bli_syrk4mh.h @@ -39,9 +39,9 @@ // Prototype object-based interface. // void bli_syrk4mh( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* beta, + obj_t* c ); // diff --git a/frame/3/syrk/4mh/bli_syrk4mh_entry.c b/frame/3/syrk/4mh/bli_syrk4mh_entry.c index d7942604f..0eaa56717 100644 --- a/frame/3/syrk/4mh/bli_syrk4mh_entry.c +++ b/frame/3/syrk/4mh/bli_syrk4mh_entry.c @@ -40,9 +40,9 @@ extern gemm_t* gemm4mh_cntl_ir; extern gemm_t* gemm4mh_cntl_ii; void bli_syrk4mh_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ) + obj_t* a, + obj_t* beta, + obj_t* c ) { bli_syrk_front( alpha, a, beta, c, gemm4mh_cntl_rr ); bli_syrk_front( alpha, a, &BLIS_ONE, c, gemm4mh_cntl_ii ); diff --git a/frame/3/syrk/4mh/bli_syrk4mh_entry.h b/frame/3/syrk/4mh/bli_syrk4mh_entry.h index 1760db027..a46576517 100644 --- a/frame/3/syrk/4mh/bli_syrk4mh_entry.h +++ b/frame/3/syrk/4mh/bli_syrk4mh_entry.h @@ -33,7 +33,7 @@ */ void bli_syrk4mh_entry( obj_t* alpha, - obj_t* a, - obj_t* beta, - obj_t* c ); + obj_t* a, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/syrk/bli_syrk.c b/frame/3/syrk/bli_syrk.c index 69dde1fcb..5ac1f221b 100644 --- a/frame/3/syrk/bli_syrk.c +++ b/frame/3/syrk/bli_syrk.c @@ -45,9 +45,9 @@ void bli_syrk( obj_t* alpha, num_t dt = bli_obj_datatype( *c ); if ( bli_3mh_is_enabled_dt( dt ) ) bli_syrk3mh_entry( alpha, a, beta, c ); - else if ( bli_3m_is_enabled_dt( dt ) ) bli_syrk3m_entry( alpha, a, beta, c ); + else if ( bli_3m1_is_enabled_dt( dt ) ) bli_syrk3m1_entry( alpha, a, beta, c ); else if ( bli_4mh_is_enabled_dt( dt ) ) bli_syrk4mh_entry( alpha, a, beta, c ); - else if ( bli_4m_is_enabled_dt( dt ) ) bli_syrk4m_entry( alpha, a, beta, c ); + else if ( bli_4m1_is_enabled_dt( dt ) ) bli_syrk4m1_entry( alpha, a, beta, c ); else bli_syrk_entry( alpha, a, beta, c ); } diff --git a/frame/3/syrk/bli_syrk.h b/frame/3/syrk/bli_syrk.h index 268b21b30..b8f11090e 100644 --- a/frame/3/syrk/bli_syrk.h +++ b/frame/3/syrk/bli_syrk.h @@ -36,10 +36,10 @@ #include "bli_syrk_entry.h" #include "bli_syrk_front.h" -#include "bli_syrk4m.h" -#include "bli_syrk3m.h" #include "bli_syrk4mh.h" +#include "bli_syrk4m1.h" #include "bli_syrk3mh.h" +#include "bli_syrk3m1.h" // diff --git a/frame/3/trmm/3m/bli_trmm3m_entry.h b/frame/3/trmm/3m/bli_trmm3m_entry.h deleted file mode 100644 index 9ffc0689f..000000000 --- a/frame/3/trmm/3m/bli_trmm3m_entry.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_trmm3m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ); - diff --git a/frame/3/trmm/3m/old/bli_trmm3m_cntl.h b/frame/3/trmm/3m/old/bli_trmm3m_cntl.h deleted file mode 100644 index 091a0635e..000000000 --- a/frame/3/trmm/3m/old/bli_trmm3m_cntl.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_trmm3m_cntl_init( void ); -void bli_trmm3m_cntl_finalize( void ); diff --git a/frame/3/trmm/3m/bli_trmm3m.c b/frame/3/trmm/3m1/bli_trmm3m1.c similarity index 91% rename from frame/3/trmm/3m/bli_trmm3m.c rename to frame/3/trmm/3m1/bli_trmm3m1.c index 4bee7e39e..5e9c8e379 100644 --- a/frame/3/trmm/3m/bli_trmm3m.c +++ b/frame/3/trmm/3m1/bli_trmm3m1.c @@ -37,15 +37,15 @@ // // Define object-based interface. // -void bli_trmm3m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ) +void bli_trmm3m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ) { - // Since 3m only applies to the complex domain, we use the regular + // Since 3m1 only applies to the complex domain, we use the regular // control tree for real domain cases. if ( bli_obj_is_complex( *b ) ) - bli_trmm3m_entry( side, alpha, a, b ); + bli_trmm3m1_entry( side, alpha, a, b ); else bli_trmm_entry( side, alpha, a, b ); } @@ -93,5 +93,5 @@ void PASTEMAC(ch,opname)( \ &bo ); \ } -INSERT_GENTFUNC_BASIC( trmm3m, trmm3m ) +INSERT_GENTFUNC_BASIC( trmm3m1, trmm3m1 ) diff --git a/frame/3/trmm/3m/bli_trmm3m.h b/frame/3/trmm/3m1/bli_trmm3m1.h similarity index 92% rename from frame/3/trmm/3m/bli_trmm3m.h rename to frame/3/trmm/3m1/bli_trmm3m1.h index a56b19170..23bd43eea 100644 --- a/frame/3/trmm/3m/bli_trmm3m.h +++ b/frame/3/trmm/3m1/bli_trmm3m1.h @@ -32,16 +32,16 @@ */ -#include "bli_trmm3m_entry.h" +#include "bli_trmm3m1_entry.h" // // Prototype object-based interface. // -void bli_trmm3m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ); +void bli_trmm3m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ); // @@ -62,5 +62,5 @@ void PASTEMAC(ch,opname)( \ ctype* b, inc_t rs_b, inc_t cs_b \ ); -INSERT_GENTPROT_BASIC( trmm3m ) +INSERT_GENTPROT_BASIC( trmm3m1 ) diff --git a/frame/3/trmm/3m/bli_trmm3m_entry.c b/frame/3/trmm/3m1/bli_trmm3m1_entry.c similarity index 89% rename from frame/3/trmm/3m/bli_trmm3m_entry.c rename to frame/3/trmm/3m1/bli_trmm3m1_entry.c index 0b4b7f012..364de3f47 100644 --- a/frame/3/trmm/3m/bli_trmm3m_entry.c +++ b/frame/3/trmm/3m1/bli_trmm3m1_entry.c @@ -34,14 +34,14 @@ #include "blis.h" -extern gemm_t* gemm3m_cntl; +extern gemm_t* gemm3m1_cntl; -void bli_trmm3m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ) +void bli_trmm3m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ) { bli_trmm_front( side, alpha, a, b, - gemm3m_cntl ); + gemm3m1_cntl ); } diff --git a/frame/3/trmm/3m1/bli_trmm3m1_entry.h b/frame/3/trmm/3m1/bli_trmm3m1_entry.h new file mode 100644 index 000000000..46def0b1f --- /dev/null +++ b/frame/3/trmm/3m1/bli_trmm3m1_entry.h @@ -0,0 +1,39 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_trmm3m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ); + diff --git a/frame/3/trmm/3m1/old/bli_trmm3m1_cntl.h b/frame/3/trmm/3m1/old/bli_trmm3m1_cntl.h new file mode 100644 index 000000000..39edfc2e6 --- /dev/null +++ b/frame/3/trmm/3m1/old/bli_trmm3m1_cntl.h @@ -0,0 +1,36 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_trmm3m1_cntl_init( void ); +void bli_trmm3m1_cntl_finalize( void ); diff --git a/frame/3/trmm/4m/bli_trmm4m_entry.h b/frame/3/trmm/4m/bli_trmm4m_entry.h deleted file mode 100644 index 2b411056b..000000000 --- a/frame/3/trmm/4m/bli_trmm4m_entry.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_trmm4m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ); - diff --git a/frame/3/trmm/4m/old/bli_trmm4m_cntl.h b/frame/3/trmm/4m/old/bli_trmm4m_cntl.h deleted file mode 100644 index b4152f471..000000000 --- a/frame/3/trmm/4m/old/bli_trmm4m_cntl.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_trmm4m_cntl_init( void ); -void bli_trmm4m_cntl_finalize( void ); diff --git a/frame/3/trmm/4m/bli_trmm4m.c b/frame/3/trmm/4m1/bli_trmm4m1.c similarity index 93% rename from frame/3/trmm/4m/bli_trmm4m.c rename to frame/3/trmm/4m1/bli_trmm4m1.c index 66982a8aa..f94cff342 100644 --- a/frame/3/trmm/4m/bli_trmm4m.c +++ b/frame/3/trmm/4m1/bli_trmm4m1.c @@ -37,15 +37,15 @@ // // Define object-based interface. // -void bli_trmm4m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ) +void bli_trmm4m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ) { // Since 4m only applies to the complex domain, we use the regular // control tree for real domain cases. if ( bli_obj_is_complex( *b ) ) - bli_trmm4m_entry( side, alpha, a, b ); + bli_trmm4m1_entry( side, alpha, a, b ); else bli_trmm_entry( side, alpha, a, b ); } @@ -93,5 +93,5 @@ void PASTEMAC(ch,opname)( \ &bo ); \ } -INSERT_GENTFUNC_BASIC( trmm4m, trmm4m ) +INSERT_GENTFUNC_BASIC( trmm4m1, trmm4m1 ) diff --git a/frame/3/trmm/4m/bli_trmm4m.h b/frame/3/trmm/4m1/bli_trmm4m1.h similarity index 92% rename from frame/3/trmm/4m/bli_trmm4m.h rename to frame/3/trmm/4m1/bli_trmm4m1.h index eeb6d33b3..f1000a041 100644 --- a/frame/3/trmm/4m/bli_trmm4m.h +++ b/frame/3/trmm/4m1/bli_trmm4m1.h @@ -32,16 +32,16 @@ */ -#include "bli_trmm4m_entry.h" +#include "bli_trmm4m1_entry.h" // // Prototype object-based interface. // -void bli_trmm4m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ); +void bli_trmm4m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ); // @@ -62,5 +62,5 @@ void PASTEMAC(ch,opname)( \ ctype* b, inc_t rs_b, inc_t cs_b \ ); -INSERT_GENTPROT_BASIC( trmm4m ) +INSERT_GENTPROT_BASIC( trmm4m1 ) diff --git a/frame/3/trmm/4m/bli_trmm4m_entry.c b/frame/3/trmm/4m1/bli_trmm4m1_entry.c similarity index 89% rename from frame/3/trmm/4m/bli_trmm4m_entry.c rename to frame/3/trmm/4m1/bli_trmm4m1_entry.c index edb612493..85e539071 100644 --- a/frame/3/trmm/4m/bli_trmm4m_entry.c +++ b/frame/3/trmm/4m1/bli_trmm4m1_entry.c @@ -34,14 +34,14 @@ #include "blis.h" -extern gemm_t* gemm4m_cntl; +extern gemm_t* gemm4m1_cntl; -void bli_trmm4m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ) +void bli_trmm4m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ) { bli_trmm_front( side, alpha, a, b, - gemm4m_cntl ); + gemm4m1_cntl ); } diff --git a/frame/3/trmm/4m1/bli_trmm4m1_entry.h b/frame/3/trmm/4m1/bli_trmm4m1_entry.h new file mode 100644 index 000000000..9fdcffde0 --- /dev/null +++ b/frame/3/trmm/4m1/bli_trmm4m1_entry.h @@ -0,0 +1,39 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_trmm4m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ); + diff --git a/frame/3/trmm/4m1/old/bli_trmm4m1_cntl.h b/frame/3/trmm/4m1/old/bli_trmm4m1_cntl.h new file mode 100644 index 000000000..89ae2b385 --- /dev/null +++ b/frame/3/trmm/4m1/old/bli_trmm4m1_cntl.h @@ -0,0 +1,36 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_trmm4m1_cntl_init( void ); +void bli_trmm4m1_cntl_finalize( void ); diff --git a/frame/3/trmm/bli_trmm.c b/frame/3/trmm/bli_trmm.c index 7037876bb..6d89eca50 100644 --- a/frame/3/trmm/bli_trmm.c +++ b/frame/3/trmm/bli_trmm.c @@ -44,8 +44,8 @@ void bli_trmm( side_t side, { num_t dt = bli_obj_datatype( *b ); - if ( bli_3m_is_enabled_dt( dt ) ) bli_trmm3m_entry( side, alpha, a, b ); - else if ( bli_4m_is_enabled_dt( dt ) ) bli_trmm4m_entry( side, alpha, a, b ); + if ( bli_3m1_is_enabled_dt( dt ) ) bli_trmm3m1_entry( side, alpha, a, b ); + else if ( bli_4m1_is_enabled_dt( dt ) ) bli_trmm4m1_entry( side, alpha, a, b ); else bli_trmm_entry( side, alpha, a, b ); } diff --git a/frame/3/trmm/bli_trmm.h b/frame/3/trmm/bli_trmm.h index 2278f713a..be5376fe0 100644 --- a/frame/3/trmm/bli_trmm.h +++ b/frame/3/trmm/bli_trmm.h @@ -52,8 +52,8 @@ #include "bli_trmm_rl_ker_var2.h" #include "bli_trmm_ru_ker_var2.h" -#include "bli_trmm4m.h" -#include "bli_trmm3m.h" +#include "bli_trmm4m1.h" +#include "bli_trmm3m1.h" // diff --git a/frame/3/trmm/bli_trmm_ll_ker_var2.c b/frame/3/trmm/bli_trmm_ll_ker_var2.c index 035699ec0..4c97c2523 100644 --- a/frame/3/trmm/bli_trmm_ll_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ll_ker_var2.c @@ -230,7 +230,7 @@ void PASTEMAC(ch,varname)( \ needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular - matrix), which is used by 3m and 4m implementations, we need + matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ @@ -376,7 +376,7 @@ void PASTEMAC(ch,varname)( \ bli_auxinfo_set_next_a( a2, aux ); \ bli_auxinfo_set_next_b( b2, aux ); \ \ - /* Save the 4m/3m imaginary stride of A to the auxinfo_t + /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( PACKMR * k_a1011, aux ); \ \ @@ -438,7 +438,7 @@ void PASTEMAC(ch,varname)( \ bli_auxinfo_set_next_a( a2, aux ); \ bli_auxinfo_set_next_b( b2, aux ); \ \ - /* Save the 4m/3m imaginary stride of A to the auxinfo_t + /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, aux ); \ \ diff --git a/frame/3/trmm/bli_trmm_lu_ker_var2.c b/frame/3/trmm/bli_trmm_lu_ker_var2.c index 12b9faca3..f8f7a09be 100644 --- a/frame/3/trmm/bli_trmm_lu_ker_var2.c +++ b/frame/3/trmm/bli_trmm_lu_ker_var2.c @@ -230,7 +230,7 @@ void PASTEMAC(ch,varname)( \ needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular - matrix), which is used by 3m and 4m implementations, we need + matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ @@ -383,7 +383,7 @@ void PASTEMAC(ch,varname)( \ bli_auxinfo_set_next_a( a2, aux ); \ bli_auxinfo_set_next_b( b2, aux ); \ \ - /* Save the 4m/3m imaginary stride of A to the auxinfo_t + /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( PACKMR * k_a1112, aux ); \ \ @@ -445,7 +445,7 @@ void PASTEMAC(ch,varname)( \ bli_auxinfo_set_next_a( a2, aux ); \ bli_auxinfo_set_next_b( b2, aux ); \ \ - /* Save the 4m/3m imaginary stride of A to the auxinfo_t + /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, aux ); \ \ diff --git a/frame/3/trmm/bli_trmm_query.c b/frame/3/trmm/bli_trmm_query.c index 74b163c64..ed106f3eb 100644 --- a/frame/3/trmm/bli_trmm_query.c +++ b/frame/3/trmm/bli_trmm_query.c @@ -34,21 +34,21 @@ #include "blis.h" -extern func_t* gemm3m_ukrs; -extern func_t* gemm4m_ukrs; +extern func_t* gemm3m1_ukrs; +extern func_t* gemm4m1_ukrs; extern func_t* gemm_ukrs; func_t* bli_trmm_query_ukrs( num_t dt ) { - if ( bli_3m_is_enabled_dt( dt ) ) return gemm3m_ukrs; - else if ( bli_4m_is_enabled_dt( dt ) ) return gemm4m_ukrs; + if ( bli_3m1_is_enabled_dt( dt ) ) return gemm3m1_ukrs; + else if ( bli_4m1_is_enabled_dt( dt ) ) return gemm4m1_ukrs; else return gemm_ukrs; } char* bli_trmm_query_impl_string( num_t dt ) { - if ( bli_3m_is_enabled_dt( dt ) ) return bli_3m_get_string(); - else if ( bli_4m_is_enabled_dt( dt ) ) return bli_4m_get_string(); + if ( bli_3m1_is_enabled_dt( dt ) ) return bli_3m1_get_string(); + else if ( bli_4m1_is_enabled_dt( dt ) ) return bli_4m1_get_string(); else return bli_native_get_string(); } diff --git a/frame/3/trmm/bli_trmm_rl_ker_var2.c b/frame/3/trmm/bli_trmm_rl_ker_var2.c index 9107f3130..b5b3f74b7 100644 --- a/frame/3/trmm/bli_trmm_rl_ker_var2.c +++ b/frame/3/trmm/bli_trmm_rl_ker_var2.c @@ -230,7 +230,7 @@ void PASTEMAC(ch,varname)( \ needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of A (the non-triangular - matrix), which is used by 3m and 4m implementations, we need + matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ @@ -356,7 +356,7 @@ void PASTEMAC(ch,varname)( \ \ if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \ \ - /* Save the 4m/3m imaginary stride of B to the auxinfo_t + /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( PACKNR * k_b1121, aux ); \ \ @@ -433,7 +433,7 @@ void PASTEMAC(ch,varname)( \ { \ if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \ \ - /* Save the 4m/3m imaginary stride of B to the auxinfo_t + /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, aux ); \ \ diff --git a/frame/3/trmm/bli_trmm_ru_ker_var2.c b/frame/3/trmm/bli_trmm_ru_ker_var2.c index 55c92f6cc..8443fb3e1 100644 --- a/frame/3/trmm/bli_trmm_ru_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ru_ker_var2.c @@ -230,7 +230,7 @@ void PASTEMAC(ch,varname)( \ needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of A (the non-triangular - matrix), which is used by 3m and 4m implementations, we need + matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ @@ -356,7 +356,7 @@ void PASTEMAC(ch,varname)( \ \ if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \ \ - /* Save the 4m/3m imaginary stride of B to the auxinfo_t + /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( PACKNR * k_b0111, aux ); \ \ @@ -433,7 +433,7 @@ void PASTEMAC(ch,varname)( \ { \ if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \ \ - /* Save the 4m/3m imaginary stride of B to the auxinfo_t + /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, aux ); \ \ diff --git a/frame/3/trmm3/3m/bli_trmm33m.c b/frame/3/trmm3/3m1/bli_trmm33m1.c similarity index 91% rename from frame/3/trmm3/3m/bli_trmm33m.c rename to frame/3/trmm3/3m1/bli_trmm33m1.c index a345624c0..38412d657 100644 --- a/frame/3/trmm3/3m/bli_trmm33m.c +++ b/frame/3/trmm3/3m1/bli_trmm33m1.c @@ -37,17 +37,17 @@ // // Define object-based interface. // -void bli_trmm33m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_trmm33m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { - // Since 3m only applies to the complex domain, we use the regular + // Since 3m1 only applies to the complex domain, we use the regular // control tree for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_trmm33m_entry( side, alpha, a, b, beta, c ); + bli_trmm33m1_entry( side, alpha, a, b, beta, c ); else bli_trmm3_entry( side, alpha, a, b, beta, c ); } @@ -105,5 +105,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNC_BASIC( trmm33m, trmm33m ) +INSERT_GENTFUNC_BASIC( trmm33m1, trmm33m1 ) diff --git a/frame/3/trmm3/3m1/bli_trmm33m1.h b/frame/3/trmm3/3m1/bli_trmm33m1.h new file mode 100644 index 000000000..83f9bf41e --- /dev/null +++ b/frame/3/trmm3/3m1/bli_trmm33m1.h @@ -0,0 +1,71 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "bli_trmm33m1_entry.h" + + +// +// Prototype object-based interface. +// +void bli_trmm33m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); + + +// +// Prototype BLAS-like interfaces with homogeneous-typed operands. +// +#undef GENTPROT +#define GENTPROT( ctype, ch, opname ) \ +\ +void PASTEMAC(ch,opname)( \ + side_t side, \ + uplo_t uploa, \ + trans_t transa, \ + diag_t diaga, \ + trans_t transb, \ + dim_t m, \ + dim_t n, \ + ctype* alpha, \ + ctype* a, inc_t rs_a, inc_t cs_a, \ + ctype* b, inc_t rs_b, inc_t cs_b, \ + ctype* beta, \ + ctype* c, inc_t rs_c, inc_t cs_c \ + ); + +INSERT_GENTPROT_BASIC( trmm33m1 ) + diff --git a/frame/3/hemm/4m/bli_hemm4m_entry.c b/frame/3/trmm3/3m1/bli_trmm33m1_entry.c similarity index 83% rename from frame/3/hemm/4m/bli_hemm4m_entry.c rename to frame/3/trmm3/3m1/bli_trmm33m1_entry.c index 40d9230b5..954edd89c 100644 --- a/frame/3/hemm/4m/bli_hemm4m_entry.c +++ b/frame/3/trmm3/3m1/bli_trmm33m1_entry.c @@ -34,16 +34,16 @@ #include "blis.h" -extern gemm_t* gemm4m_cntl; +extern gemm_t* gemm3m1_cntl; -void bli_hemm4m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_trmm33m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { - bli_hemm_front( side, alpha, a, b, beta, c, - gemm4m_cntl ); + bli_trmm3_front( side, alpha, a, b, beta, c, + gemm3m1_cntl ); } diff --git a/frame/3/trmm3/3m1/bli_trmm33m1_entry.h b/frame/3/trmm3/3m1/bli_trmm33m1_entry.h new file mode 100644 index 000000000..4803073bb --- /dev/null +++ b/frame/3/trmm3/3m1/bli_trmm33m1_entry.h @@ -0,0 +1,41 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_trmm33m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); + diff --git a/frame/3/trmm3/3mh/bli_trmm33mh.c b/frame/3/trmm3/3mh/bli_trmm33mh.c index 8c764a857..5036c82aa 100644 --- a/frame/3/trmm3/3mh/bli_trmm33mh.c +++ b/frame/3/trmm3/3mh/bli_trmm33mh.c @@ -38,11 +38,11 @@ // Define object-based interface. // void bli_trmm33mh( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 3mh only applies to the complex domain, we use the regular // control tree for real domain cases. diff --git a/frame/3/trmm3/3mh/bli_trmm33mh.h b/frame/3/trmm3/3mh/bli_trmm33mh.h index ba2523d34..cabd68043 100644 --- a/frame/3/trmm3/3mh/bli_trmm33mh.h +++ b/frame/3/trmm3/3mh/bli_trmm33mh.h @@ -39,11 +39,11 @@ // Prototype object-based interface. // void bli_trmm33mh( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // diff --git a/frame/3/trmm3/3mh/bli_trmm33mh_entry.c b/frame/3/trmm3/3mh/bli_trmm33mh_entry.c index 4dd92d9c2..2b2f393f8 100644 --- a/frame/3/trmm3/3mh/bli_trmm33mh_entry.c +++ b/frame/3/trmm3/3mh/bli_trmm33mh_entry.c @@ -39,11 +39,11 @@ extern gemm_t* gemm3mh_cntl_io; extern gemm_t* gemm3mh_cntl_rpi; void bli_trmm33mh_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { bli_trmm3_front( side, alpha, a, b, beta, c, gemm3mh_cntl_ro ); bli_trmm3_front( side, alpha, a, b, &BLIS_ONE, c, gemm3mh_cntl_io ); diff --git a/frame/3/trmm3/3mh/bli_trmm33mh_entry.h b/frame/3/trmm3/3mh/bli_trmm33mh_entry.h index 0e8934336..65a12b49e 100644 --- a/frame/3/trmm3/3mh/bli_trmm33mh_entry.h +++ b/frame/3/trmm3/3mh/bli_trmm33mh_entry.h @@ -33,9 +33,9 @@ */ void bli_trmm33mh_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/trmm3/4m/bli_trmm34m.c b/frame/3/trmm3/4m1/bli_trmm34m1.c similarity index 92% rename from frame/3/trmm3/4m/bli_trmm34m.c rename to frame/3/trmm3/4m1/bli_trmm34m1.c index 092ea51b9..68442682a 100644 --- a/frame/3/trmm3/4m/bli_trmm34m.c +++ b/frame/3/trmm3/4m1/bli_trmm34m1.c @@ -37,17 +37,17 @@ // // Define object-based interface. // -void bli_trmm34m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) +void bli_trmm34m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 4m only applies to the complex domain, we use the regular // control tree for real domain cases. if ( bli_obj_is_complex( *c ) ) - bli_trmm34m_entry( side, alpha, a, b, beta, c ); + bli_trmm34m1_entry( side, alpha, a, b, beta, c ); else bli_trmm3_entry( side, alpha, a, b, beta, c ); } @@ -105,5 +105,5 @@ void PASTEMAC(ch,opname)( \ &co ); \ } -INSERT_GENTFUNC_BASIC( trmm34m, trmm34m ) +INSERT_GENTFUNC_BASIC( trmm34m1, trmm34m1 ) diff --git a/frame/3/trmm3/4m1/bli_trmm34m1.h b/frame/3/trmm3/4m1/bli_trmm34m1.h new file mode 100644 index 000000000..34a8d52c3 --- /dev/null +++ b/frame/3/trmm3/4m1/bli_trmm34m1.h @@ -0,0 +1,71 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "bli_trmm34m1_entry.h" + + +// +// Prototype object-based interface. +// +void bli_trmm34m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); + + +// +// Prototype BLAS-like interfaces with homogeneous-typed operands. +// +#undef GENTPROT +#define GENTPROT( ctype, ch, opname ) \ +\ +void PASTEMAC(ch,opname)( \ + side_t side, \ + uplo_t uploa, \ + trans_t transa, \ + diag_t diaga, \ + trans_t transb, \ + dim_t m, \ + dim_t n, \ + ctype* alpha, \ + ctype* a, inc_t rs_a, inc_t cs_a, \ + ctype* b, inc_t rs_b, inc_t cs_b, \ + ctype* beta, \ + ctype* c, inc_t rs_c, inc_t cs_c \ + ); + +INSERT_GENTPROT_BASIC( trmm34m1 ) + diff --git a/frame/3/trmm3/4m1/bli_trmm34m1_entry.c b/frame/3/trmm3/4m1/bli_trmm34m1_entry.c new file mode 100644 index 000000000..5124aac11 --- /dev/null +++ b/frame/3/trmm3/4m1/bli_trmm34m1_entry.c @@ -0,0 +1,49 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +extern gemm_t* gemm4m1_cntl; + +void bli_trmm34m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) +{ + bli_trmm3_front( side, alpha, a, b, beta, c, + gemm4m1_cntl ); +} + diff --git a/frame/3/trmm3/4m1/bli_trmm34m1_entry.h b/frame/3/trmm3/4m1/bli_trmm34m1_entry.h new file mode 100644 index 000000000..a92f22930 --- /dev/null +++ b/frame/3/trmm3/4m1/bli_trmm34m1_entry.h @@ -0,0 +1,41 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_trmm34m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); + diff --git a/frame/3/trmm3/4mh/bli_trmm34mh.c b/frame/3/trmm3/4mh/bli_trmm34mh.c index bcf256429..25493d831 100644 --- a/frame/3/trmm3/4mh/bli_trmm34mh.c +++ b/frame/3/trmm3/4mh/bli_trmm34mh.c @@ -38,11 +38,11 @@ // Define object-based interface. // void bli_trmm34mh( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { // Since 4mh only applies to the complex domain, we use the regular // control tree for real domain cases. diff --git a/frame/3/trmm3/4mh/bli_trmm34mh.h b/frame/3/trmm3/4mh/bli_trmm34mh.h index 0e3e3f43b..1ab5934f9 100644 --- a/frame/3/trmm3/4mh/bli_trmm34mh.h +++ b/frame/3/trmm3/4mh/bli_trmm34mh.h @@ -39,11 +39,11 @@ // Prototype object-based interface. // void bli_trmm34mh( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); // diff --git a/frame/3/trmm3/4mh/bli_trmm34mh_entry.c b/frame/3/trmm3/4mh/bli_trmm34mh_entry.c index 63548c2ad..de0fe3eec 100644 --- a/frame/3/trmm3/4mh/bli_trmm34mh_entry.c +++ b/frame/3/trmm3/4mh/bli_trmm34mh_entry.c @@ -40,11 +40,11 @@ extern gemm_t* gemm4mh_cntl_ir; extern gemm_t* gemm4mh_cntl_ii; void bli_trmm34mh_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ) { bli_trmm3_front( side, alpha, a, b, beta, c, gemm4mh_cntl_rr ); bli_trmm3_front( side, alpha, a, b, &BLIS_ONE, c, gemm4mh_cntl_ii ); diff --git a/frame/3/trmm3/4mh/bli_trmm34mh_entry.h b/frame/3/trmm3/4mh/bli_trmm34mh_entry.h index 818e1fdf8..73a103623 100644 --- a/frame/3/trmm3/4mh/bli_trmm34mh_entry.h +++ b/frame/3/trmm3/4mh/bli_trmm34mh_entry.h @@ -33,9 +33,9 @@ */ void bli_trmm34mh_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ); + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c ); diff --git a/frame/3/trmm3/bli_trmm3.c b/frame/3/trmm3/bli_trmm3.c index 510908ff5..4b19fe9bc 100644 --- a/frame/3/trmm3/bli_trmm3.c +++ b/frame/3/trmm3/bli_trmm3.c @@ -47,9 +47,9 @@ void bli_trmm3( side_t side, num_t dt = bli_obj_datatype( *c ); if ( bli_3mh_is_enabled_dt( dt ) ) bli_trmm33mh_entry( side, alpha, a, b, beta, c ); - else if ( bli_3m_is_enabled_dt( dt ) ) bli_trmm33m_entry( side, alpha, a, b, beta, c ); + else if ( bli_3m1_is_enabled_dt( dt ) ) bli_trmm33m1_entry( side, alpha, a, b, beta, c ); else if ( bli_4mh_is_enabled_dt( dt ) ) bli_trmm34mh_entry( side, alpha, a, b, beta, c ); - else if ( bli_4m_is_enabled_dt( dt ) ) bli_trmm34m_entry( side, alpha, a, b, beta, c ); + else if ( bli_4m1_is_enabled_dt( dt ) ) bli_trmm34m1_entry( side, alpha, a, b, beta, c ); else bli_trmm3_entry( side, alpha, a, b, beta, c ); } diff --git a/frame/3/trmm3/bli_trmm3.h b/frame/3/trmm3/bli_trmm3.h index ff53dece5..288b175f4 100644 --- a/frame/3/trmm3/bli_trmm3.h +++ b/frame/3/trmm3/bli_trmm3.h @@ -36,10 +36,10 @@ #include "bli_trmm3_entry.h" #include "bli_trmm3_front.h" -#include "bli_trmm34m.h" -#include "bli_trmm33m.h" #include "bli_trmm34mh.h" +#include "bli_trmm34m1.h" #include "bli_trmm33mh.h" +#include "bli_trmm33m1.h" // diff --git a/frame/3/trsm/3m/bli_trsm3m_cntl.h b/frame/3/trsm/3m/bli_trsm3m_cntl.h deleted file mode 100644 index ba8ca3082..000000000 --- a/frame/3/trsm/3m/bli_trsm3m_cntl.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_trsm3m_cntl_init( void ); -void bli_trsm3m_cntl_finalize( void ); diff --git a/frame/3/trsm/3m/bli_trsm3m_entry.c b/frame/3/trsm/3m/bli_trsm3m_entry.c deleted file mode 100644 index 1b95b76df..000000000 --- a/frame/3/trsm/3m/bli_trsm3m_entry.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -extern trsm_t* trsm3m_l_cntl; -extern trsm_t* trsm3m_r_cntl; - -void bli_trsm3m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ) -{ - bli_trsm_front( side, alpha, a, b, - trsm3m_l_cntl, - trsm3m_r_cntl ); -} - diff --git a/frame/3/trsm/3m/bli_trsm3m_entry.h b/frame/3/trsm/3m/bli_trsm3m_entry.h deleted file mode 100644 index a5e542182..000000000 --- a/frame/3/trsm/3m/bli_trsm3m_entry.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_trsm3m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ); - diff --git a/frame/3/trsm/3m/bli_trsm3m.c b/frame/3/trsm/3m1/bli_trsm3m1.c similarity index 91% rename from frame/3/trsm/3m/bli_trsm3m.c rename to frame/3/trsm/3m1/bli_trsm3m1.c index e1c63a024..a9d61fec6 100644 --- a/frame/3/trsm/3m/bli_trsm3m.c +++ b/frame/3/trsm/3m1/bli_trsm3m1.c @@ -37,15 +37,15 @@ // // Define object-based interface. // -void bli_trsm3m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ) +void bli_trsm3m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ) { - // Since 3m only applies to the complex domain, we use the regular + // Since 3m1 only applies to the complex domain, we use the regular // control tree for real domain cases. if ( bli_obj_is_complex( *b ) ) - bli_trsm3m_entry( side, alpha, a, b ); + bli_trsm3m1_entry( side, alpha, a, b ); else bli_trsm_entry( side, alpha, a, b ); } @@ -93,5 +93,5 @@ void PASTEMAC(ch,opname)( \ &bo ); \ } -INSERT_GENTFUNC_BASIC( trsm3m, trsm3m ) +INSERT_GENTFUNC_BASIC( trsm3m1, trsm3m1 ) diff --git a/frame/3/trsm/3m/bli_trsm3m.h b/frame/3/trsm/3m1/bli_trsm3m1.h similarity index 86% rename from frame/3/trsm/3m/bli_trsm3m.h rename to frame/3/trsm/3m1/bli_trsm3m1.h index 2c3136d41..3dc7201e4 100644 --- a/frame/3/trsm/3m/bli_trsm3m.h +++ b/frame/3/trsm/3m1/bli_trsm3m1.h @@ -32,23 +32,23 @@ */ -#include "bli_trsm3m_cntl.h" -#include "bli_trsm3m_entry.h" +#include "bli_trsm3m1_cntl.h" +#include "bli_trsm3m1_entry.h" -#include "bli_gemmtrsm3m_l_ukr_ref.h" -#include "bli_gemmtrsm3m_u_ukr_ref.h" +#include "bli_gemmtrsm3m1_l_ukr_ref.h" +#include "bli_gemmtrsm3m1_u_ukr_ref.h" -#include "bli_trsm3m_l_ukr_ref.h" -#include "bli_trsm3m_u_ukr_ref.h" +#include "bli_trsm3m1_l_ukr_ref.h" +#include "bli_trsm3m1_u_ukr_ref.h" // // Prototype object-based interface. // -void bli_trsm3m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ); +void bli_trsm3m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ); // @@ -69,5 +69,5 @@ void PASTEMAC(ch,opname)( \ ctype* b, inc_t rs_b, inc_t cs_b \ ); -INSERT_GENTPROT_BASIC( trsm3m ) +INSERT_GENTPROT_BASIC( trsm3m1 ) diff --git a/frame/3/trsm/3m/bli_trsm3m_cntl.c b/frame/3/trsm/3m1/bli_trsm3m1_cntl.c similarity index 65% rename from frame/3/trsm/3m/bli_trsm3m_cntl.c rename to frame/3/trsm/3m1/bli_trsm3m1_cntl.c index 9633d8bd7..642d9317f 100644 --- a/frame/3/trsm/3m/bli_trsm3m_cntl.c +++ b/frame/3/trsm/3m1/bli_trsm3m1_cntl.c @@ -36,101 +36,101 @@ extern scalm_t* scalm_cntl; -extern blksz_t* gemm3m_mc; -extern blksz_t* gemm3m_nc; -extern blksz_t* gemm3m_kc; -extern blksz_t* gemm3m_mr; -extern blksz_t* gemm3m_nr; -extern blksz_t* gemm3m_kr; +extern blksz_t* gemm3m1_mc; +extern blksz_t* gemm3m1_nc; +extern blksz_t* gemm3m1_kc; +extern blksz_t* gemm3m1_mr; +extern blksz_t* gemm3m1_nr; +extern blksz_t* gemm3m1_kr; -extern func_t* gemm3m_ukrs; +extern func_t* gemm3m1_ukrs; -func_t* gemmtrsm3m_l_ukrs; -func_t* gemmtrsm3m_u_ukrs; +func_t* gemmtrsm3m1_l_ukrs; +func_t* gemmtrsm3m1_u_ukrs; -func_t* trsm3m_l_ukrs; -func_t* trsm3m_u_ukrs; +func_t* trsm3m1_l_ukrs; +func_t* trsm3m1_u_ukrs; -packm_t* trsm3m_l_packa_cntl; -packm_t* trsm3m_l_packb_cntl; +packm_t* trsm3m1_l_packa_cntl; +packm_t* trsm3m1_l_packb_cntl; -packm_t* trsm3m_r_packa_cntl; -packm_t* trsm3m_r_packb_cntl; +packm_t* trsm3m1_r_packa_cntl; +packm_t* trsm3m1_r_packb_cntl; -trsm_t* trsm3m_cntl_bp_ke; +trsm_t* trsm3m1_cntl_bp_ke; -trsm_t* trsm3m_l_cntl_op_bp; -trsm_t* trsm3m_l_cntl_mm_op; -trsm_t* trsm3m_l_cntl_vl_mm; +trsm_t* trsm3m1_l_cntl_op_bp; +trsm_t* trsm3m1_l_cntl_mm_op; +trsm_t* trsm3m1_l_cntl_vl_mm; -trsm_t* trsm3m_r_cntl_op_bp; -trsm_t* trsm3m_r_cntl_mm_op; -trsm_t* trsm3m_r_cntl_vl_mm; +trsm_t* trsm3m1_r_cntl_op_bp; +trsm_t* trsm3m1_r_cntl_mm_op; +trsm_t* trsm3m1_r_cntl_vl_mm; -trsm_t* trsm3m_l_cntl; -trsm_t* trsm3m_r_cntl; +trsm_t* trsm3m1_l_cntl; +trsm_t* trsm3m1_r_cntl; -void bli_trsm3m_cntl_init() +void bli_trsm3m1_cntl_init() { // Create function pointer objects for each datatype-specific - // gemmtrsm3m_l and gemmtrsm3m_u micro-kernel. - gemmtrsm3m_l_ukrs + // gemmtrsm3m1_l and gemmtrsm3m1_u micro-kernel. + gemmtrsm3m1_l_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, - BLIS_CGEMMTRSM3M_L_UKERNEL, FALSE, - BLIS_ZGEMMTRSM3M_L_UKERNEL, FALSE ); + BLIS_CGEMMTRSM3M1_L_UKERNEL, FALSE, + BLIS_ZGEMMTRSM3M1_L_UKERNEL, FALSE ); - gemmtrsm3m_u_ukrs + gemmtrsm3m1_u_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, - BLIS_CGEMMTRSM3M_U_UKERNEL, FALSE, - BLIS_ZGEMMTRSM3M_U_UKERNEL, FALSE ); + BLIS_CGEMMTRSM3M1_U_UKERNEL, FALSE, + BLIS_ZGEMMTRSM3M1_U_UKERNEL, FALSE ); // Create function pointer objects for each datatype-specific - // trsm3m_l and trsm3m_u micro-kernel. - trsm3m_l_ukrs + // trsm3m1_l and trsm3m1_u micro-kernel. + trsm3m1_l_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, - BLIS_CTRSM3M_L_UKERNEL, FALSE, - BLIS_ZTRSM3M_L_UKERNEL, FALSE ); + BLIS_CTRSM3M1_L_UKERNEL, FALSE, + BLIS_ZTRSM3M1_L_UKERNEL, FALSE ); - trsm3m_u_ukrs + trsm3m1_u_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, - BLIS_CTRSM3M_U_UKERNEL, FALSE, - BLIS_ZTRSM3M_U_UKERNEL, FALSE ); + BLIS_CTRSM3M1_U_UKERNEL, FALSE, + BLIS_ZTRSM3M1_U_UKERNEL, FALSE ); // Create control tree objects for packm operations (left side). - trsm3m_l_packa_cntl + trsm3m1_l_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, // IMPORTANT: n dim multiple must be mr to // support right and bottom-right edge cases - gemm3m_mr, - gemm3m_mr, + gemm3m1_mr, + gemm3m1_mr, TRUE, // invert diagonal TRUE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_3MI, BLIS_BUFFER_FOR_A_BLOCK ); - trsm3m_l_packb_cntl + trsm3m1_l_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, // IMPORTANT: m dim multiple must be mr since // B_pack is updated (ie: serves as C) in trsm - gemm3m_mr, - gemm3m_nr, + gemm3m1_mr, + gemm3m1_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? @@ -138,24 +138,24 @@ void bli_trsm3m_cntl_init() BLIS_BUFFER_FOR_B_PANEL ); // Create control tree objects for packm operations (right side). - trsm3m_r_packa_cntl + trsm3m1_r_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - gemm3m_nr, - gemm3m_mr, + gemm3m1_nr, + gemm3m1_mr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_3MI, BLIS_BUFFER_FOR_A_BLOCK ); - trsm3m_r_packb_cntl + trsm3m1_r_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - gemm3m_mr, - gemm3m_mr, + gemm3m1_mr, + gemm3m1_mr, TRUE, // invert diagonal FALSE, // reverse iteration if upper? TRUE, // reverse iteration if lower? @@ -164,137 +164,137 @@ void bli_trsm3m_cntl_init() // Create control tree object for lowest-level block-panel kernel. - trsm3m_cntl_bp_ke + trsm3m1_cntl_bp_ke = bli_trsm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, - gemm3m_ukrs, - gemmtrsm3m_l_ukrs, - gemmtrsm3m_u_ukrs, + gemm3m1_ukrs, + gemmtrsm3m1_l_ukrs, + gemmtrsm3m1_u_ukrs, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem (left side). - trsm3m_l_cntl_op_bp + trsm3m1_l_cntl_op_bp = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, - gemm3m_mc, + gemm3m1_mc, NULL, NULL, NULL, NULL, - trsm3m_l_packa_cntl, - trsm3m_l_packb_cntl, + trsm3m1_l_packa_cntl, + trsm3m1_l_packb_cntl, NULL, - trsm3m_cntl_bp_ke, + trsm3m1_cntl_bp_ke, NULL, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates (left side). - trsm3m_l_cntl_mm_op + trsm3m1_l_cntl_mm_op = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, - gemm3m_kc, + gemm3m1_kc, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - trsm3m_l_cntl_op_bp, + trsm3m1_l_cntl_op_bp, NULL, NULL ); // Create control tree object for very large problem via multiple // general problems (left side). - trsm3m_l_cntl_vl_mm + trsm3m1_l_cntl_vl_mm = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - gemm3m_nc, + gemm3m1_nc, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - trsm3m_l_cntl_mm_op, + trsm3m1_l_cntl_mm_op, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem (right side). - trsm3m_r_cntl_op_bp + trsm3m1_r_cntl_op_bp = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, - gemm3m_mc, + gemm3m1_mc, NULL, NULL, NULL, NULL, - trsm3m_r_packa_cntl, - trsm3m_r_packb_cntl, + trsm3m1_r_packa_cntl, + trsm3m1_r_packb_cntl, NULL, - trsm3m_cntl_bp_ke, + trsm3m1_cntl_bp_ke, NULL, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates (right side). - trsm3m_r_cntl_mm_op + trsm3m1_r_cntl_mm_op = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, - gemm3m_kc, + gemm3m1_kc, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - trsm3m_r_cntl_op_bp, + trsm3m1_r_cntl_op_bp, NULL, NULL ); // Create control tree object for very large problem via multiple // general problems (right side). - trsm3m_r_cntl_vl_mm + trsm3m1_r_cntl_vl_mm = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - gemm3m_nc, + gemm3m1_nc, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - trsm3m_r_cntl_mm_op, + trsm3m1_r_cntl_mm_op, NULL, NULL ); // Alias the "master" trsm control trees to shorter names. - trsm3m_l_cntl = trsm3m_l_cntl_vl_mm; - trsm3m_r_cntl = trsm3m_r_cntl_vl_mm; + trsm3m1_l_cntl = trsm3m1_l_cntl_vl_mm; + trsm3m1_r_cntl = trsm3m1_r_cntl_vl_mm; } -void bli_trsm3m_cntl_finalize() +void bli_trsm3m1_cntl_finalize() { - bli_func_obj_free( gemmtrsm3m_l_ukrs ); - bli_func_obj_free( gemmtrsm3m_u_ukrs ); - bli_func_obj_free( trsm3m_l_ukrs ); - bli_func_obj_free( trsm3m_u_ukrs ); + bli_func_obj_free( gemmtrsm3m1_l_ukrs ); + bli_func_obj_free( gemmtrsm3m1_u_ukrs ); + bli_func_obj_free( trsm3m1_l_ukrs ); + bli_func_obj_free( trsm3m1_u_ukrs ); - bli_cntl_obj_free( trsm3m_l_packa_cntl ); - bli_cntl_obj_free( trsm3m_l_packb_cntl ); - bli_cntl_obj_free( trsm3m_r_packa_cntl ); - bli_cntl_obj_free( trsm3m_r_packb_cntl ); + bli_cntl_obj_free( trsm3m1_l_packa_cntl ); + bli_cntl_obj_free( trsm3m1_l_packb_cntl ); + bli_cntl_obj_free( trsm3m1_r_packa_cntl ); + bli_cntl_obj_free( trsm3m1_r_packb_cntl ); - bli_cntl_obj_free( trsm3m_cntl_bp_ke ); + bli_cntl_obj_free( trsm3m1_cntl_bp_ke ); - bli_cntl_obj_free( trsm3m_l_cntl_op_bp ); - bli_cntl_obj_free( trsm3m_l_cntl_mm_op ); - bli_cntl_obj_free( trsm3m_l_cntl_vl_mm ); - bli_cntl_obj_free( trsm3m_r_cntl_op_bp ); - bli_cntl_obj_free( trsm3m_r_cntl_mm_op ); - bli_cntl_obj_free( trsm3m_r_cntl_vl_mm ); + bli_cntl_obj_free( trsm3m1_l_cntl_op_bp ); + bli_cntl_obj_free( trsm3m1_l_cntl_mm_op ); + bli_cntl_obj_free( trsm3m1_l_cntl_vl_mm ); + bli_cntl_obj_free( trsm3m1_r_cntl_op_bp ); + bli_cntl_obj_free( trsm3m1_r_cntl_mm_op ); + bli_cntl_obj_free( trsm3m1_r_cntl_vl_mm ); } diff --git a/frame/3/trsm/3m1/bli_trsm3m1_cntl.h b/frame/3/trsm/3m1/bli_trsm3m1_cntl.h new file mode 100644 index 000000000..fe59a6c32 --- /dev/null +++ b/frame/3/trsm/3m1/bli_trsm3m1_cntl.h @@ -0,0 +1,36 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_trsm3m1_cntl_init( void ); +void bli_trsm3m1_cntl_finalize( void ); diff --git a/frame/3/trsm/3m1/bli_trsm3m1_entry.c b/frame/3/trsm/3m1/bli_trsm3m1_entry.c new file mode 100644 index 000000000..c56fbe236 --- /dev/null +++ b/frame/3/trsm/3m1/bli_trsm3m1_entry.c @@ -0,0 +1,49 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +extern trsm_t* trsm3m1_l_cntl; +extern trsm_t* trsm3m1_r_cntl; + +void bli_trsm3m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ) +{ + bli_trsm_front( side, alpha, a, b, + trsm3m1_l_cntl, + trsm3m1_r_cntl ); +} + diff --git a/frame/3/trsm/3m1/bli_trsm3m1_entry.h b/frame/3/trsm/3m1/bli_trsm3m1_entry.h new file mode 100644 index 000000000..c93b499cd --- /dev/null +++ b/frame/3/trsm/3m1/bli_trsm3m1_entry.h @@ -0,0 +1,39 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_trsm3m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ); + diff --git a/frame/3/trsm/3m/ukernels/bli_gemmtrsm3m_l_ukr_ref.c b/frame/3/trsm/3m1/ukernels/bli_gemmtrsm3m1_l_ukr_ref.c similarity index 93% rename from frame/3/trsm/3m/ukernels/bli_gemmtrsm3m_l_ukr_ref.c rename to frame/3/trsm/3m1/ukernels/bli_gemmtrsm3m1_l_ukr_ref.c index a176fb890..5d5b93d1e 100644 --- a/frame/3/trsm/3m/ukernels/bli_gemmtrsm3m_l_ukr_ref.c +++ b/frame/3/trsm/3m1/ukernels/bli_gemmtrsm3m1_l_ukr_ref.c @@ -190,22 +190,22 @@ void PASTEMAC(ch,varname)( \ \ \ /* -PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m_l_ukr: b11_r after", m, n, \ +PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m1_l_ukr: b11_r after", m, n, \ b11_r, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ -PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m_l_ukr: b11_i after", m, n, \ +PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m1_l_ukr: b11_i after", m, n, \ b11_i, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ */ \ /* -PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m_l_ukr: b01_r", k, n, \ +PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m1_l_ukr: b01_r", k, n, \ b01_r, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ -PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m_l_ukr: b01_i", k, n, \ +PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m1_l_ukr: b01_i", k, n, \ b01_i, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ -PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m_l_ukr: b11_r", m, n, \ +PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m1_l_ukr: b11_r", m, n, \ b11_r, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ -PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m_l_ukr: b11_i", m, n, \ +PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m1_l_ukr: b11_i", m, n, \ b11_i, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ */ \ } -INSERT_GENTFUNCCO_BASIC2( gemmtrsm3m_l_ukr_ref, GEMM_UKERNEL, TRSM3M_L_UKERNEL ) +INSERT_GENTFUNCCO_BASIC2( gemmtrsm3m1_l_ukr_ref, GEMM_UKERNEL, TRSM3M1_L_UKERNEL ) diff --git a/frame/3/trsm/4m/ukernels/bli_gemmtrsm4m_l_ukr_ref.h b/frame/3/trsm/3m1/ukernels/bli_gemmtrsm3m1_l_ukr_ref.h similarity index 97% rename from frame/3/trsm/4m/ukernels/bli_gemmtrsm4m_l_ukr_ref.h rename to frame/3/trsm/3m1/ukernels/bli_gemmtrsm3m1_l_ukr_ref.h index 597fd05df..c72c7410c 100644 --- a/frame/3/trsm/4m/ukernels/bli_gemmtrsm4m_l_ukr_ref.h +++ b/frame/3/trsm/3m1/ukernels/bli_gemmtrsm3m1_l_ukr_ref.h @@ -47,5 +47,5 @@ void PASTEMAC(ch,varname)( \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( gemmtrsm4m_l_ukr_ref ) +INSERT_GENTPROTCO_BASIC( gemmtrsm3m1_l_ukr_ref ) diff --git a/frame/3/trsm/3m/ukernels/bli_gemmtrsm3m_u_ukr_ref.c b/frame/3/trsm/3m1/ukernels/bli_gemmtrsm3m1_u_ukr_ref.c similarity index 98% rename from frame/3/trsm/3m/ukernels/bli_gemmtrsm3m_u_ukr_ref.c rename to frame/3/trsm/3m1/ukernels/bli_gemmtrsm3m1_u_ukr_ref.c index 59e66e9c8..59d0fa352 100644 --- a/frame/3/trsm/3m/ukernels/bli_gemmtrsm3m_u_ukr_ref.c +++ b/frame/3/trsm/3m1/ukernels/bli_gemmtrsm3m1_u_ukr_ref.c @@ -189,5 +189,5 @@ void PASTEMAC(ch,varname)( \ data ); \ } -INSERT_GENTFUNCCO_BASIC2( gemmtrsm3m_u_ukr_ref, GEMM_UKERNEL, TRSM3M_U_UKERNEL ) +INSERT_GENTFUNCCO_BASIC2( gemmtrsm3m1_u_ukr_ref, GEMM_UKERNEL, TRSM3M1_U_UKERNEL ) diff --git a/frame/3/trsm/4m/ukernels/bli_gemmtrsm4m_u_ukr_ref.h b/frame/3/trsm/3m1/ukernels/bli_gemmtrsm3m1_u_ukr_ref.h similarity index 97% rename from frame/3/trsm/4m/ukernels/bli_gemmtrsm4m_u_ukr_ref.h rename to frame/3/trsm/3m1/ukernels/bli_gemmtrsm3m1_u_ukr_ref.h index 6aa5bbb7c..0618ce134 100644 --- a/frame/3/trsm/4m/ukernels/bli_gemmtrsm4m_u_ukr_ref.h +++ b/frame/3/trsm/3m1/ukernels/bli_gemmtrsm3m1_u_ukr_ref.h @@ -47,5 +47,5 @@ void PASTEMAC(ch,varname)( \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( gemmtrsm4m_u_ukr_ref ) +INSERT_GENTPROTCO_BASIC( gemmtrsm3m1_u_ukr_ref ) diff --git a/frame/3/trsm/3m/ukernels/bli_trsm3m_l_ukr_ref.c b/frame/3/trsm/3m1/ukernels/bli_trsm3m1_l_ukr_ref.c similarity index 99% rename from frame/3/trsm/3m/ukernels/bli_trsm3m_l_ukr_ref.c rename to frame/3/trsm/3m1/ukernels/bli_trsm3m1_l_ukr_ref.c index 38d341ee6..705d4aee1 100644 --- a/frame/3/trsm/3m/ukernels/bli_trsm3m_l_ukr_ref.c +++ b/frame/3/trsm/3m1/ukernels/bli_trsm3m1_l_ukr_ref.c @@ -146,5 +146,5 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( trsm3m_l_ukr_ref ) +INSERT_GENTFUNCCO_BASIC0( trsm3m1_l_ukr_ref ) diff --git a/frame/3/trsm/3m/ukernels/bli_trsm3m_l_ukr_ref.h b/frame/3/trsm/3m1/ukernels/bli_trsm3m1_l_ukr_ref.h similarity index 97% rename from frame/3/trsm/3m/ukernels/bli_trsm3m_l_ukr_ref.h rename to frame/3/trsm/3m1/ukernels/bli_trsm3m1_l_ukr_ref.h index 8af951330..3de325b94 100644 --- a/frame/3/trsm/3m/ukernels/bli_trsm3m_l_ukr_ref.h +++ b/frame/3/trsm/3m1/ukernels/bli_trsm3m1_l_ukr_ref.h @@ -43,5 +43,5 @@ void PASTEMAC(ch,varname)( \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( trsm3m_l_ukr_ref ) +INSERT_GENTPROTCO_BASIC( trsm3m1_l_ukr_ref ) diff --git a/frame/3/trsm/3m/ukernels/bli_trsm3m_u_ukr_ref.c b/frame/3/trsm/3m1/ukernels/bli_trsm3m1_u_ukr_ref.c similarity index 99% rename from frame/3/trsm/3m/ukernels/bli_trsm3m_u_ukr_ref.c rename to frame/3/trsm/3m1/ukernels/bli_trsm3m1_u_ukr_ref.c index 1ab6a31fc..567abe198 100644 --- a/frame/3/trsm/3m/ukernels/bli_trsm3m_u_ukr_ref.c +++ b/frame/3/trsm/3m1/ukernels/bli_trsm3m1_u_ukr_ref.c @@ -146,5 +146,5 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( trsm3m_u_ukr_ref ) +INSERT_GENTFUNCCO_BASIC0( trsm3m1_u_ukr_ref ) diff --git a/frame/3/trsm/4m/ukernels/bli_trsm4m_u_ukr_ref.h b/frame/3/trsm/3m1/ukernels/bli_trsm3m1_u_ukr_ref.h similarity index 97% rename from frame/3/trsm/4m/ukernels/bli_trsm4m_u_ukr_ref.h rename to frame/3/trsm/3m1/ukernels/bli_trsm3m1_u_ukr_ref.h index e20043dbf..efa0f644e 100644 --- a/frame/3/trsm/4m/ukernels/bli_trsm4m_u_ukr_ref.h +++ b/frame/3/trsm/3m1/ukernels/bli_trsm3m1_u_ukr_ref.h @@ -43,5 +43,5 @@ void PASTEMAC(ch,varname)( \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( trsm4m_u_ukr_ref ) +INSERT_GENTPROTCO_BASIC( trsm3m1_u_ukr_ref ) diff --git a/frame/3/trsm/4m/bli_trsm4m_cntl.h b/frame/3/trsm/4m/bli_trsm4m_cntl.h deleted file mode 100644 index c9ea2af68..000000000 --- a/frame/3/trsm/4m/bli_trsm4m_cntl.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_trsm4m_cntl_init( void ); -void bli_trsm4m_cntl_finalize( void ); diff --git a/frame/3/trsm/4m/bli_trsm4m_entry.c b/frame/3/trsm/4m/bli_trsm4m_entry.c deleted file mode 100644 index c56b7ae32..000000000 --- a/frame/3/trsm/4m/bli_trsm4m_entry.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -extern trsm_t* trsm4m_l_cntl; -extern trsm_t* trsm4m_r_cntl; - -void bli_trsm4m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ) -{ - bli_trsm_front( side, alpha, a, b, - trsm4m_l_cntl, - trsm4m_r_cntl ); -} - diff --git a/frame/3/trsm/4m/bli_trsm4m_entry.h b/frame/3/trsm/4m/bli_trsm4m_entry.h deleted file mode 100644 index 3a5b906a4..000000000 --- a/frame/3/trsm/4m/bli_trsm4m_entry.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_trsm4m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ); - diff --git a/frame/3/trsm/4m/bli_trsm4m.c b/frame/3/trsm/4m1/bli_trsm4m1.c similarity index 93% rename from frame/3/trsm/4m/bli_trsm4m.c rename to frame/3/trsm/4m1/bli_trsm4m1.c index 9a3c51bf8..1eaed6313 100644 --- a/frame/3/trsm/4m/bli_trsm4m.c +++ b/frame/3/trsm/4m1/bli_trsm4m1.c @@ -37,15 +37,15 @@ // // Define object-based interface. // -void bli_trsm4m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ) +void bli_trsm4m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ) { // Since 4m only applies to the complex domain, we use the regular // control tree for real domain cases. if ( bli_obj_is_complex( *b ) ) - bli_trsm4m_entry( side, alpha, a, b ); + bli_trsm4m1_entry( side, alpha, a, b ); else bli_trsm_entry( side, alpha, a, b ); } @@ -93,5 +93,5 @@ void PASTEMAC(ch,opname)( \ &bo ); \ } -INSERT_GENTFUNC_BASIC( trsm4m, trsm4m ) +INSERT_GENTFUNC_BASIC( trsm4m1, trsm4m1 ) diff --git a/frame/3/trsm/4m/bli_trsm4m.h b/frame/3/trsm/4m1/bli_trsm4m1.h similarity index 86% rename from frame/3/trsm/4m/bli_trsm4m.h rename to frame/3/trsm/4m1/bli_trsm4m1.h index 1d705a930..4146f57e9 100644 --- a/frame/3/trsm/4m/bli_trsm4m.h +++ b/frame/3/trsm/4m1/bli_trsm4m1.h @@ -32,23 +32,23 @@ */ -#include "bli_trsm4m_cntl.h" -#include "bli_trsm4m_entry.h" +#include "bli_trsm4m1_cntl.h" +#include "bli_trsm4m1_entry.h" -#include "bli_gemmtrsm4m_l_ukr_ref.h" -#include "bli_gemmtrsm4m_u_ukr_ref.h" +#include "bli_gemmtrsm4m1_l_ukr_ref.h" +#include "bli_gemmtrsm4m1_u_ukr_ref.h" -#include "bli_trsm4m_l_ukr_ref.h" -#include "bli_trsm4m_u_ukr_ref.h" +#include "bli_trsm4m1_l_ukr_ref.h" +#include "bli_trsm4m1_u_ukr_ref.h" // // Prototype object-based interface. // -void bli_trsm4m( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b ); +void bli_trsm4m1( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ); // @@ -69,5 +69,5 @@ void PASTEMAC(ch,opname)( \ ctype* b, inc_t rs_b, inc_t cs_b \ ); -INSERT_GENTPROT_BASIC( trsm4m ) +INSERT_GENTPROT_BASIC( trsm4m1 ) diff --git a/frame/3/trsm/4m/bli_trsm4m_cntl.c b/frame/3/trsm/4m1/bli_trsm4m1_cntl.c similarity index 65% rename from frame/3/trsm/4m/bli_trsm4m_cntl.c rename to frame/3/trsm/4m1/bli_trsm4m1_cntl.c index ff1053872..191e46f61 100644 --- a/frame/3/trsm/4m/bli_trsm4m_cntl.c +++ b/frame/3/trsm/4m1/bli_trsm4m1_cntl.c @@ -36,101 +36,101 @@ extern scalm_t* scalm_cntl; -extern blksz_t* gemm4m_mc; -extern blksz_t* gemm4m_nc; -extern blksz_t* gemm4m_kc; -extern blksz_t* gemm4m_mr; -extern blksz_t* gemm4m_nr; -extern blksz_t* gemm4m_kr; +extern blksz_t* gemm4m1_mc; +extern blksz_t* gemm4m1_nc; +extern blksz_t* gemm4m1_kc; +extern blksz_t* gemm4m1_mr; +extern blksz_t* gemm4m1_nr; +extern blksz_t* gemm4m1_kr; -extern func_t* gemm4m_ukrs; +extern func_t* gemm4m1_ukrs; -func_t* gemmtrsm4m_l_ukrs; -func_t* gemmtrsm4m_u_ukrs; +func_t* gemmtrsm4m1_l_ukrs; +func_t* gemmtrsm4m1_u_ukrs; -func_t* trsm4m_l_ukrs; -func_t* trsm4m_u_ukrs; +func_t* trsm4m1_l_ukrs; +func_t* trsm4m1_u_ukrs; -packm_t* trsm4m_l_packa_cntl; -packm_t* trsm4m_l_packb_cntl; +packm_t* trsm4m1_l_packa_cntl; +packm_t* trsm4m1_l_packb_cntl; -packm_t* trsm4m_r_packa_cntl; -packm_t* trsm4m_r_packb_cntl; +packm_t* trsm4m1_r_packa_cntl; +packm_t* trsm4m1_r_packb_cntl; -trsm_t* trsm4m_cntl_bp_ke; +trsm_t* trsm4m1_cntl_bp_ke; -trsm_t* trsm4m_l_cntl_op_bp; -trsm_t* trsm4m_l_cntl_mm_op; -trsm_t* trsm4m_l_cntl_vl_mm; +trsm_t* trsm4m1_l_cntl_op_bp; +trsm_t* trsm4m1_l_cntl_mm_op; +trsm_t* trsm4m1_l_cntl_vl_mm; -trsm_t* trsm4m_r_cntl_op_bp; -trsm_t* trsm4m_r_cntl_mm_op; -trsm_t* trsm4m_r_cntl_vl_mm; +trsm_t* trsm4m1_r_cntl_op_bp; +trsm_t* trsm4m1_r_cntl_mm_op; +trsm_t* trsm4m1_r_cntl_vl_mm; -trsm_t* trsm4m_l_cntl; -trsm_t* trsm4m_r_cntl; +trsm_t* trsm4m1_l_cntl; +trsm_t* trsm4m1_r_cntl; -void bli_trsm4m_cntl_init() +void bli_trsm4m1_cntl_init() { // Create function pointer objects for each datatype-specific - // gemmtrsm4m_l and gemmtrsm4m_u micro-kernel. - gemmtrsm4m_l_ukrs + // gemmtrsm4m1_l and gemmtrsm4m1_u micro-kernel. + gemmtrsm4m1_l_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, - BLIS_CGEMMTRSM4M_L_UKERNEL, FALSE, - BLIS_ZGEMMTRSM4M_L_UKERNEL, FALSE ); + BLIS_CGEMMTRSM4M1_L_UKERNEL, FALSE, + BLIS_ZGEMMTRSM4M1_L_UKERNEL, FALSE ); - gemmtrsm4m_u_ukrs + gemmtrsm4m1_u_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, - BLIS_CGEMMTRSM4M_U_UKERNEL, FALSE, - BLIS_ZGEMMTRSM4M_U_UKERNEL, FALSE ); + BLIS_CGEMMTRSM4M1_U_UKERNEL, FALSE, + BLIS_ZGEMMTRSM4M1_U_UKERNEL, FALSE ); // Create function pointer objects for each datatype-specific - // trsm4m_l and trsm4m_u micro-kernel. - trsm4m_l_ukrs + // trsm4m1_l and trsm4m1_u micro-kernel. + trsm4m1_l_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, - BLIS_CTRSM4M_L_UKERNEL, FALSE, - BLIS_ZTRSM4M_L_UKERNEL, FALSE ); + BLIS_CTRSM4M1_L_UKERNEL, FALSE, + BLIS_ZTRSM4M1_L_UKERNEL, FALSE ); - trsm4m_u_ukrs + trsm4m1_u_ukrs = bli_func_obj_create( NULL, FALSE, NULL, FALSE, - BLIS_CTRSM4M_U_UKERNEL, FALSE, - BLIS_ZTRSM4M_U_UKERNEL, FALSE ); + BLIS_CTRSM4M1_U_UKERNEL, FALSE, + BLIS_ZTRSM4M1_U_UKERNEL, FALSE ); // Create control tree objects for packm operations (left side). - trsm4m_l_packa_cntl + trsm4m1_l_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, // IMPORTANT: n dim multiple must be mr to // support right and bottom-right edge cases - gemm4m_mr, - gemm4m_mr, + gemm4m1_mr, + gemm4m1_mr, TRUE, // invert diagonal TRUE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_4MI, BLIS_BUFFER_FOR_A_BLOCK ); - trsm4m_l_packb_cntl + trsm4m1_l_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, // IMPORTANT: m dim multiple must be mr since // B_pack is updated (ie: serves as C) in trsm - gemm4m_mr, - gemm4m_nr, + gemm4m1_mr, + gemm4m1_nr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? @@ -138,24 +138,24 @@ void bli_trsm4m_cntl_init() BLIS_BUFFER_FOR_B_PANEL ); // Create control tree objects for packm operations (right side). - trsm4m_r_packa_cntl + trsm4m1_r_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - gemm4m_nr, - gemm4m_mr, + gemm4m1_nr, + gemm4m1_mr, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS_4MI, BLIS_BUFFER_FOR_A_BLOCK ); - trsm4m_r_packb_cntl + trsm4m1_r_packb_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - gemm4m_mr, - gemm4m_mr, + gemm4m1_mr, + gemm4m1_mr, TRUE, // invert diagonal FALSE, // reverse iteration if upper? TRUE, // reverse iteration if lower? @@ -164,137 +164,137 @@ void bli_trsm4m_cntl_init() // Create control tree object for lowest-level block-panel kernel. - trsm4m_cntl_bp_ke + trsm4m1_cntl_bp_ke = bli_trsm_cntl_obj_create( BLIS_UNB_OPT, BLIS_VARIANT2, NULL, - gemm4m_ukrs, - gemmtrsm4m_l_ukrs, - gemmtrsm4m_u_ukrs, + gemm4m1_ukrs, + gemmtrsm4m1_l_ukrs, + gemmtrsm4m1_u_ukrs, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem (left side). - trsm4m_l_cntl_op_bp + trsm4m1_l_cntl_op_bp = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, - gemm4m_mc, + gemm4m1_mc, NULL, NULL, NULL, NULL, - trsm4m_l_packa_cntl, - trsm4m_l_packb_cntl, + trsm4m1_l_packa_cntl, + trsm4m1_l_packb_cntl, NULL, - trsm4m_cntl_bp_ke, + trsm4m1_cntl_bp_ke, NULL, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates (left side). - trsm4m_l_cntl_mm_op + trsm4m1_l_cntl_mm_op = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, - gemm4m_kc, + gemm4m1_kc, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - trsm4m_l_cntl_op_bp, + trsm4m1_l_cntl_op_bp, NULL, NULL ); // Create control tree object for very large problem via multiple // general problems (left side). - trsm4m_l_cntl_vl_mm + trsm4m1_l_cntl_vl_mm = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - gemm4m_nc, + gemm4m1_nc, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - trsm4m_l_cntl_mm_op, + trsm4m1_l_cntl_mm_op, NULL, NULL ); // Create control tree object for outer panel (to block-panel) // problem (right side). - trsm4m_r_cntl_op_bp + trsm4m1_r_cntl_op_bp = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, - gemm4m_mc, + gemm4m1_mc, NULL, NULL, NULL, NULL, - trsm4m_r_packa_cntl, - trsm4m_r_packb_cntl, + trsm4m1_r_packa_cntl, + trsm4m1_r_packb_cntl, NULL, - trsm4m_cntl_bp_ke, + trsm4m1_cntl_bp_ke, NULL, NULL ); // Create control tree object for general problem via multiple // rank-k (outer panel) updates (right side). - trsm4m_r_cntl_mm_op + trsm4m1_r_cntl_mm_op = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, - gemm4m_kc, + gemm4m1_kc, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - trsm4m_r_cntl_op_bp, + trsm4m1_r_cntl_op_bp, NULL, NULL ); // Create control tree object for very large problem via multiple // general problems (right side). - trsm4m_r_cntl_vl_mm + trsm4m1_r_cntl_vl_mm = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - gemm4m_nc, + gemm4m1_nc, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - trsm4m_r_cntl_mm_op, + trsm4m1_r_cntl_mm_op, NULL, NULL ); // Alias the "master" trsm control trees to shorter names. - trsm4m_l_cntl = trsm4m_l_cntl_vl_mm; - trsm4m_r_cntl = trsm4m_r_cntl_vl_mm; + trsm4m1_l_cntl = trsm4m1_l_cntl_vl_mm; + trsm4m1_r_cntl = trsm4m1_r_cntl_vl_mm; } -void bli_trsm4m_cntl_finalize() +void bli_trsm4m1_cntl_finalize() { - bli_func_obj_free( gemmtrsm4m_l_ukrs ); - bli_func_obj_free( gemmtrsm4m_u_ukrs ); - bli_func_obj_free( trsm4m_l_ukrs ); - bli_func_obj_free( trsm4m_u_ukrs ); + bli_func_obj_free( gemmtrsm4m1_l_ukrs ); + bli_func_obj_free( gemmtrsm4m1_u_ukrs ); + bli_func_obj_free( trsm4m1_l_ukrs ); + bli_func_obj_free( trsm4m1_u_ukrs ); - bli_cntl_obj_free( trsm4m_l_packa_cntl ); - bli_cntl_obj_free( trsm4m_l_packb_cntl ); - bli_cntl_obj_free( trsm4m_r_packa_cntl ); - bli_cntl_obj_free( trsm4m_r_packb_cntl ); + bli_cntl_obj_free( trsm4m1_l_packa_cntl ); + bli_cntl_obj_free( trsm4m1_l_packb_cntl ); + bli_cntl_obj_free( trsm4m1_r_packa_cntl ); + bli_cntl_obj_free( trsm4m1_r_packb_cntl ); - bli_cntl_obj_free( trsm4m_cntl_bp_ke ); + bli_cntl_obj_free( trsm4m1_cntl_bp_ke ); - bli_cntl_obj_free( trsm4m_l_cntl_op_bp ); - bli_cntl_obj_free( trsm4m_l_cntl_mm_op ); - bli_cntl_obj_free( trsm4m_l_cntl_vl_mm ); - bli_cntl_obj_free( trsm4m_r_cntl_op_bp ); - bli_cntl_obj_free( trsm4m_r_cntl_mm_op ); - bli_cntl_obj_free( trsm4m_r_cntl_vl_mm ); + bli_cntl_obj_free( trsm4m1_l_cntl_op_bp ); + bli_cntl_obj_free( trsm4m1_l_cntl_mm_op ); + bli_cntl_obj_free( trsm4m1_l_cntl_vl_mm ); + bli_cntl_obj_free( trsm4m1_r_cntl_op_bp ); + bli_cntl_obj_free( trsm4m1_r_cntl_mm_op ); + bli_cntl_obj_free( trsm4m1_r_cntl_vl_mm ); } diff --git a/frame/3/trsm/4m1/bli_trsm4m1_cntl.h b/frame/3/trsm/4m1/bli_trsm4m1_cntl.h new file mode 100644 index 000000000..7a3769186 --- /dev/null +++ b/frame/3/trsm/4m1/bli_trsm4m1_cntl.h @@ -0,0 +1,36 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_trsm4m1_cntl_init( void ); +void bli_trsm4m1_cntl_finalize( void ); diff --git a/frame/3/trsm/4m1/bli_trsm4m1_entry.c b/frame/3/trsm/4m1/bli_trsm4m1_entry.c new file mode 100644 index 000000000..7f9c06ccc --- /dev/null +++ b/frame/3/trsm/4m1/bli_trsm4m1_entry.c @@ -0,0 +1,49 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +extern trsm_t* trsm4m1_l_cntl; +extern trsm_t* trsm4m1_r_cntl; + +void bli_trsm4m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ) +{ + bli_trsm_front( side, alpha, a, b, + trsm4m1_l_cntl, + trsm4m1_r_cntl ); +} + diff --git a/frame/3/trsm/4m1/bli_trsm4m1_entry.h b/frame/3/trsm/4m1/bli_trsm4m1_entry.h new file mode 100644 index 000000000..1574f143c --- /dev/null +++ b/frame/3/trsm/4m1/bli_trsm4m1_entry.h @@ -0,0 +1,39 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_trsm4m1_entry( side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b ); + diff --git a/frame/3/trsm/4m/ukernels/bli_gemmtrsm4m_l_ukr_ref.c b/frame/3/trsm/4m1/ukernels/bli_gemmtrsm4m1_l_ukr_ref.c similarity index 98% rename from frame/3/trsm/4m/ukernels/bli_gemmtrsm4m_l_ukr_ref.c rename to frame/3/trsm/4m1/ukernels/bli_gemmtrsm4m1_l_ukr_ref.c index ee404e439..f73106485 100644 --- a/frame/3/trsm/4m/ukernels/bli_gemmtrsm4m_l_ukr_ref.c +++ b/frame/3/trsm/4m1/ukernels/bli_gemmtrsm4m1_l_ukr_ref.c @@ -154,5 +154,5 @@ void PASTEMAC(ch,varname)( \ data ); \ } -INSERT_GENTFUNCCO_BASIC2( gemmtrsm4m_l_ukr_ref, GEMM_UKERNEL, TRSM4M_L_UKERNEL ) +INSERT_GENTFUNCCO_BASIC2( gemmtrsm4m1_l_ukr_ref, GEMM_UKERNEL, TRSM4M1_L_UKERNEL ) diff --git a/frame/3/trsm/3m/ukernels/bli_gemmtrsm3m_l_ukr_ref.h b/frame/3/trsm/4m1/ukernels/bli_gemmtrsm4m1_l_ukr_ref.h similarity index 97% rename from frame/3/trsm/3m/ukernels/bli_gemmtrsm3m_l_ukr_ref.h rename to frame/3/trsm/4m1/ukernels/bli_gemmtrsm4m1_l_ukr_ref.h index 7b8dc13ab..d70aac5a7 100644 --- a/frame/3/trsm/3m/ukernels/bli_gemmtrsm3m_l_ukr_ref.h +++ b/frame/3/trsm/4m1/ukernels/bli_gemmtrsm4m1_l_ukr_ref.h @@ -47,5 +47,5 @@ void PASTEMAC(ch,varname)( \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( gemmtrsm3m_l_ukr_ref ) +INSERT_GENTPROTCO_BASIC( gemmtrsm4m1_l_ukr_ref ) diff --git a/frame/3/trsm/4m/ukernels/bli_gemmtrsm4m_u_ukr_ref.c b/frame/3/trsm/4m1/ukernels/bli_gemmtrsm4m1_u_ukr_ref.c similarity index 94% rename from frame/3/trsm/4m/ukernels/bli_gemmtrsm4m_u_ukr_ref.c rename to frame/3/trsm/4m1/ukernels/bli_gemmtrsm4m1_u_ukr_ref.c index 2a72db508..1df6451d6 100644 --- a/frame/3/trsm/4m/ukernels/bli_gemmtrsm4m_u_ukr_ref.c +++ b/frame/3/trsm/4m1/ukernels/bli_gemmtrsm4m1_u_ukr_ref.c @@ -80,13 +80,13 @@ void PASTEMAC(ch,varname)( \ dim_t i, j; \ \ /* -PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m_ukr: a1112p_r", m, k+m, \ +PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m1_ukr: a1112p_r", m, k+m, \ a11_r, 1, PASTEMAC(chr,packmr), "%4.1f", "" ); \ -PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m_ukr: a1112p_i", m, k+m, \ +PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m1_ukr: a1112p_i", m, k+m, \ a11_r+is_a, 1, PASTEMAC(chr,packmr), "%4.1f", "" ); \ -PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m_ukr: b1121p_r", k+m, n, \ +PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m1_ukr: b1121p_r", k+m, n, \ b11_r, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ -PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m_ukr: b1121p_i", k+m, n, \ +PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m1_ukr: b1121p_i", k+m, n, \ b11_r+is_b, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ */ \ \ @@ -165,5 +165,5 @@ PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m_ukr: b1121p_i", k+m, n, \ data ); \ } -INSERT_GENTFUNCCO_BASIC2( gemmtrsm4m_u_ukr_ref, GEMM_UKERNEL, TRSM4M_U_UKERNEL ) +INSERT_GENTFUNCCO_BASIC2( gemmtrsm4m1_u_ukr_ref, GEMM_UKERNEL, TRSM4M1_U_UKERNEL ) diff --git a/frame/3/trsm/3m/ukernels/bli_gemmtrsm3m_u_ukr_ref.h b/frame/3/trsm/4m1/ukernels/bli_gemmtrsm4m1_u_ukr_ref.h similarity index 97% rename from frame/3/trsm/3m/ukernels/bli_gemmtrsm3m_u_ukr_ref.h rename to frame/3/trsm/4m1/ukernels/bli_gemmtrsm4m1_u_ukr_ref.h index 0d1b09ea7..8d5b7f2b5 100644 --- a/frame/3/trsm/3m/ukernels/bli_gemmtrsm3m_u_ukr_ref.h +++ b/frame/3/trsm/4m1/ukernels/bli_gemmtrsm4m1_u_ukr_ref.h @@ -47,5 +47,5 @@ void PASTEMAC(ch,varname)( \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( gemmtrsm3m_u_ukr_ref ) +INSERT_GENTPROTCO_BASIC( gemmtrsm4m1_u_ukr_ref ) diff --git a/frame/3/trsm/4m/ukernels/bli_trsm4m_l_ukr_ref.c b/frame/3/trsm/4m1/ukernels/bli_trsm4m1_l_ukr_ref.c similarity index 99% rename from frame/3/trsm/4m/ukernels/bli_trsm4m_l_ukr_ref.c rename to frame/3/trsm/4m1/ukernels/bli_trsm4m1_l_ukr_ref.c index 3d571a780..d03ef40a4 100644 --- a/frame/3/trsm/4m/ukernels/bli_trsm4m_l_ukr_ref.c +++ b/frame/3/trsm/4m1/ukernels/bli_trsm4m1_l_ukr_ref.c @@ -138,5 +138,5 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( trsm4m_l_ukr_ref ) +INSERT_GENTFUNCCO_BASIC0( trsm4m1_l_ukr_ref ) diff --git a/frame/3/trsm/3m/ukernels/bli_trsm3m_u_ukr_ref.h b/frame/3/trsm/4m1/ukernels/bli_trsm4m1_l_ukr_ref.h similarity index 97% rename from frame/3/trsm/3m/ukernels/bli_trsm3m_u_ukr_ref.h rename to frame/3/trsm/4m1/ukernels/bli_trsm4m1_l_ukr_ref.h index ebd51e93d..08a4280cd 100644 --- a/frame/3/trsm/3m/ukernels/bli_trsm3m_u_ukr_ref.h +++ b/frame/3/trsm/4m1/ukernels/bli_trsm4m1_l_ukr_ref.h @@ -43,5 +43,5 @@ void PASTEMAC(ch,varname)( \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( trsm3m_u_ukr_ref ) +INSERT_GENTPROTCO_BASIC( trsm4m1_l_ukr_ref ) diff --git a/frame/3/trsm/4m/ukernels/bli_trsm4m_u_ukr_ref.c b/frame/3/trsm/4m1/ukernels/bli_trsm4m1_u_ukr_ref.c similarity index 99% rename from frame/3/trsm/4m/ukernels/bli_trsm4m_u_ukr_ref.c rename to frame/3/trsm/4m1/ukernels/bli_trsm4m1_u_ukr_ref.c index 4ddb1f0c2..f932ba5ea 100644 --- a/frame/3/trsm/4m/ukernels/bli_trsm4m_u_ukr_ref.c +++ b/frame/3/trsm/4m1/ukernels/bli_trsm4m1_u_ukr_ref.c @@ -138,5 +138,5 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNCCO_BASIC0( trsm4m_u_ukr_ref ) +INSERT_GENTFUNCCO_BASIC0( trsm4m1_u_ukr_ref ) diff --git a/frame/3/trsm/4m/ukernels/bli_trsm4m_l_ukr_ref.h b/frame/3/trsm/4m1/ukernels/bli_trsm4m1_u_ukr_ref.h similarity index 97% rename from frame/3/trsm/4m/ukernels/bli_trsm4m_l_ukr_ref.h rename to frame/3/trsm/4m1/ukernels/bli_trsm4m1_u_ukr_ref.h index 85bd7009d..63dba14fd 100644 --- a/frame/3/trsm/4m/ukernels/bli_trsm4m_l_ukr_ref.h +++ b/frame/3/trsm/4m1/ukernels/bli_trsm4m1_u_ukr_ref.h @@ -43,5 +43,5 @@ void PASTEMAC(ch,varname)( \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( trsm4m_l_ukr_ref ) +INSERT_GENTPROTCO_BASIC( trsm4m1_u_ukr_ref ) diff --git a/frame/3/trsm/bli_trsm.c b/frame/3/trsm/bli_trsm.c index 4b74acf61..9572ca6c9 100644 --- a/frame/3/trsm/bli_trsm.c +++ b/frame/3/trsm/bli_trsm.c @@ -44,8 +44,8 @@ void bli_trsm( side_t side, { num_t dt = bli_obj_datatype( *b ); - if ( bli_3m_is_enabled_dt( dt ) ) bli_trsm3m_entry( side, alpha, a, b ); - else if ( bli_4m_is_enabled_dt( dt ) ) bli_trsm4m_entry( side, alpha, a, b ); + if ( bli_3m1_is_enabled_dt( dt ) ) bli_trsm3m1_entry( side, alpha, a, b ); + else if ( bli_4m1_is_enabled_dt( dt ) ) bli_trsm4m1_entry( side, alpha, a, b ); else bli_trsm_entry( side, alpha, a, b ); } diff --git a/frame/3/trsm/bli_trsm.h b/frame/3/trsm/bli_trsm.h index c4a08db09..598e876a6 100644 --- a/frame/3/trsm/bli_trsm.h +++ b/frame/3/trsm/bli_trsm.h @@ -63,8 +63,8 @@ #include "bli_trsm_l_ukr_ref.h" #include "bli_trsm_u_ukr_ref.h" -#include "bli_trsm4m.h" -#include "bli_trsm3m.h" +#include "bli_trsm4m1.h" +#include "bli_trsm3m1.h" // diff --git a/frame/3/trsm/bli_trsm_ll_ker_var2.c b/frame/3/trsm/bli_trsm_ll_ker_var2.c index 1c0327690..1a190dda2 100644 --- a/frame/3/trsm/bli_trsm_ll_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ll_ker_var2.c @@ -242,7 +242,7 @@ void PASTEMAC(ch,varname)( \ needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular - matrix), which is used by 3m and 4m implementations, we need + matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % MR != 0 ? k + MR - ( k % MR ) : k ); \ \ @@ -412,7 +412,7 @@ void PASTEMAC(ch,varname)( \ bli_auxinfo_set_next_a( a2, aux ); \ bli_auxinfo_set_next_b( b2, aux ); \ \ - /* Save the 4m/3m imaginary stride of A to the auxinfo_t + /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( PACKMR * k_a1011, aux ); \ \ @@ -469,7 +469,7 @@ void PASTEMAC(ch,varname)( \ bli_auxinfo_set_next_a( a2, aux ); \ bli_auxinfo_set_next_b( b2, aux ); \ \ - /* Save the 4m/3m imaginary stride of A to the auxinfo_t + /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, aux ); \ \ diff --git a/frame/3/trsm/bli_trsm_lu_ker_var2.c b/frame/3/trsm/bli_trsm_lu_ker_var2.c index 3054a7f90..753063894 100644 --- a/frame/3/trsm/bli_trsm_lu_ker_var2.c +++ b/frame/3/trsm/bli_trsm_lu_ker_var2.c @@ -243,7 +243,7 @@ void PASTEMAC(ch,varname)( \ needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular - matrix), which is used by 3m and 4m implementations, we need + matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % MR != 0 ? k + MR - ( k % MR ) : k ); \ \ @@ -422,7 +422,7 @@ void PASTEMAC(ch,varname)( \ bli_auxinfo_set_next_a( a2, aux ); \ bli_auxinfo_set_next_b( b2, aux ); \ \ - /* Save the 4m/3m imaginary stride of A to the auxinfo_t + /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( PACKMR * k_a1112, aux ); \ \ @@ -479,7 +479,7 @@ void PASTEMAC(ch,varname)( \ bli_auxinfo_set_next_a( a2, aux ); \ bli_auxinfo_set_next_b( b2, aux ); \ \ - /* Save the 4m/3m imaginary stride of A to the auxinfo_t + /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, aux ); \ \ diff --git a/frame/3/trsm/bli_trsm_query.c b/frame/3/trsm/bli_trsm_query.c index 0a7ba82ae..f49e23f4c 100644 --- a/frame/3/trsm/bli_trsm_query.c +++ b/frame/3/trsm/bli_trsm_query.c @@ -34,28 +34,28 @@ #include "blis.h" -extern func_t* gemmtrsm3m_l_ukrs; -extern func_t* gemmtrsm3m_u_ukrs; -extern func_t* gemmtrsm4m_l_ukrs; -extern func_t* gemmtrsm4m_u_ukrs; +extern func_t* gemmtrsm3m1_l_ukrs; +extern func_t* gemmtrsm3m1_u_ukrs; +extern func_t* gemmtrsm4m1_l_ukrs; +extern func_t* gemmtrsm4m1_u_ukrs; extern func_t* gemmtrsm_l_ukrs; extern func_t* gemmtrsm_u_ukrs; -extern func_t* trsm3m_l_ukrs; -extern func_t* trsm3m_u_ukrs; -extern func_t* trsm4m_l_ukrs; -extern func_t* trsm4m_u_ukrs; +extern func_t* trsm3m1_l_ukrs; +extern func_t* trsm3m1_u_ukrs; +extern func_t* trsm4m1_l_ukrs; +extern func_t* trsm4m1_u_ukrs; extern func_t* trsm_l_ukrs; extern func_t* trsm_u_ukrs; func_t* bli_gemmtrsm_query_ukrs( uplo_t uplo, num_t dt ) { - if ( bli_3m_is_enabled_dt( dt ) ) - return ( bli_is_lower( uplo ) ? gemmtrsm3m_l_ukrs - : gemmtrsm3m_u_ukrs ); - else if ( bli_4m_is_enabled_dt( dt ) ) - return ( bli_is_lower( uplo ) ? gemmtrsm4m_l_ukrs - : gemmtrsm4m_u_ukrs ); + if ( bli_3m1_is_enabled_dt( dt ) ) + return ( bli_is_lower( uplo ) ? gemmtrsm3m1_l_ukrs + : gemmtrsm3m1_u_ukrs ); + else if ( bli_4m1_is_enabled_dt( dt ) ) + return ( bli_is_lower( uplo ) ? gemmtrsm4m1_l_ukrs + : gemmtrsm4m1_u_ukrs ); else return ( bli_is_lower( uplo ) ? gemmtrsm_l_ukrs : gemmtrsm_u_ukrs ); @@ -63,12 +63,12 @@ func_t* bli_gemmtrsm_query_ukrs( uplo_t uplo, num_t dt ) func_t* bli_trsm_query_ukrs( uplo_t uplo, num_t dt ) { - if ( bli_3m_is_enabled_dt( dt ) ) - return ( bli_is_lower( uplo ) ? trsm3m_l_ukrs - : trsm3m_u_ukrs ); - else if ( bli_4m_is_enabled_dt( dt ) ) - return ( bli_is_lower( uplo ) ? trsm4m_l_ukrs - : trsm4m_u_ukrs ); + if ( bli_3m1_is_enabled_dt( dt ) ) + return ( bli_is_lower( uplo ) ? trsm3m1_l_ukrs + : trsm3m1_u_ukrs ); + else if ( bli_4m1_is_enabled_dt( dt ) ) + return ( bli_is_lower( uplo ) ? trsm4m1_l_ukrs + : trsm4m1_u_ukrs ); else return ( bli_is_lower( uplo ) ? trsm_l_ukrs : trsm_u_ukrs ); @@ -76,9 +76,9 @@ func_t* bli_trsm_query_ukrs( uplo_t uplo, num_t dt ) char* bli_trsm_query_impl_string( num_t dt ) { - if ( bli_3m_is_enabled_dt( dt ) ) return bli_3m_get_string(); - else if ( bli_4m_is_enabled_dt( dt ) ) return bli_4m_get_string(); - else return bli_native_get_string(); + if ( bli_3m1_is_enabled_dt( dt ) ) return bli_3m1_get_string(); + else if ( bli_4m1_is_enabled_dt( dt ) ) return bli_4m1_get_string(); + else return bli_native_get_string(); } kimpl_t bli_gemmtrsm_l_ukernel_impl_type( num_t dt ) @@ -92,12 +92,12 @@ kimpl_t bli_gemmtrsm_l_ukernel_impl_type( num_t dt ) p == BLIS_ZGEMMTRSM_L_UKERNEL_REF ) return BLIS_REFERENCE_UKERNEL; else if ( - p == BLIS_CGEMMTRSM3M_L_UKERNEL_REF || - p == BLIS_ZGEMMTRSM3M_L_UKERNEL_REF + p == BLIS_CGEMMTRSM3M1_L_UKERNEL_REF || + p == BLIS_ZGEMMTRSM3M1_L_UKERNEL_REF ) return BLIS_VIRTUAL3M_UKERNEL; else if ( - p == BLIS_CGEMMTRSM4M_L_UKERNEL_REF || - p == BLIS_ZGEMMTRSM4M_L_UKERNEL_REF + p == BLIS_CGEMMTRSM4M1_L_UKERNEL_REF || + p == BLIS_ZGEMMTRSM4M1_L_UKERNEL_REF ) return BLIS_VIRTUAL4M_UKERNEL; else return BLIS_OPTIMIZED_UKERNEL; @@ -114,12 +114,12 @@ kimpl_t bli_gemmtrsm_u_ukernel_impl_type( num_t dt ) p == BLIS_ZGEMMTRSM_U_UKERNEL_REF ) return BLIS_REFERENCE_UKERNEL; else if ( - p == BLIS_CGEMMTRSM3M_U_UKERNEL_REF || - p == BLIS_ZGEMMTRSM3M_U_UKERNEL_REF + p == BLIS_CGEMMTRSM3M1_U_UKERNEL_REF || + p == BLIS_ZGEMMTRSM3M1_U_UKERNEL_REF ) return BLIS_VIRTUAL3M_UKERNEL; else if ( - p == BLIS_CGEMMTRSM4M_U_UKERNEL_REF || - p == BLIS_ZGEMMTRSM4M_U_UKERNEL_REF + p == BLIS_CGEMMTRSM4M1_U_UKERNEL_REF || + p == BLIS_ZGEMMTRSM4M1_U_UKERNEL_REF ) return BLIS_VIRTUAL4M_UKERNEL; else return BLIS_OPTIMIZED_UKERNEL; @@ -136,12 +136,12 @@ kimpl_t bli_trsm_l_ukernel_impl_type( num_t dt ) p == BLIS_ZTRSM_L_UKERNEL_REF ) return BLIS_REFERENCE_UKERNEL; else if ( - p == BLIS_CTRSM3M_L_UKERNEL_REF || - p == BLIS_ZTRSM3M_L_UKERNEL_REF + p == BLIS_CTRSM3M1_L_UKERNEL_REF || + p == BLIS_ZTRSM3M1_L_UKERNEL_REF ) return BLIS_VIRTUAL3M_UKERNEL; else if ( - p == BLIS_CTRSM4M_L_UKERNEL_REF || - p == BLIS_ZTRSM4M_L_UKERNEL_REF + p == BLIS_CTRSM4M1_L_UKERNEL_REF || + p == BLIS_ZTRSM4M1_L_UKERNEL_REF ) return BLIS_VIRTUAL4M_UKERNEL; else return BLIS_OPTIMIZED_UKERNEL; @@ -158,12 +158,12 @@ kimpl_t bli_trsm_u_ukernel_impl_type( num_t dt ) p == BLIS_ZTRSM_U_UKERNEL_REF ) return BLIS_REFERENCE_UKERNEL; else if ( - p == BLIS_CTRSM3M_U_UKERNEL_REF || - p == BLIS_ZTRSM3M_U_UKERNEL_REF + p == BLIS_CTRSM3M1_U_UKERNEL_REF || + p == BLIS_ZTRSM3M1_U_UKERNEL_REF ) return BLIS_VIRTUAL3M_UKERNEL; else if ( - p == BLIS_CTRSM4M_U_UKERNEL_REF || - p == BLIS_ZTRSM4M_U_UKERNEL_REF + p == BLIS_CTRSM4M1_U_UKERNEL_REF || + p == BLIS_ZTRSM4M1_U_UKERNEL_REF ) return BLIS_VIRTUAL4M_UKERNEL; else return BLIS_OPTIMIZED_UKERNEL; diff --git a/frame/3/trsm/bli_trsm_rl_ker_var2.c b/frame/3/trsm/bli_trsm_rl_ker_var2.c index cbcd17c76..4beb448f5 100644 --- a/frame/3/trsm/bli_trsm_rl_ker_var2.c +++ b/frame/3/trsm/bli_trsm_rl_ker_var2.c @@ -251,7 +251,7 @@ void PASTEMAC(ch,varname)( \ needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular - matrix), which is used by 3m and 4m implementations, we need + matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % NR != 0 ? k + NR - ( k % NR ) : k ); \ \ @@ -406,7 +406,8 @@ void PASTEMAC(ch,varname)( \ ps_b_cur += ( bli_is_odd( ps_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( ps_b_cur * ss_b_num ) / ss_b_den; \ \ - /* Save the imaginary stride of B to the auxinfo_t object. + /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t + object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_a( PACKNR * k_b1121, aux ); \ @@ -483,7 +484,8 @@ void PASTEMAC(ch,varname)( \ } \ else if ( bli_is_strictly_below_diag_n( diagoffb_j, k, NR ) ) \ { \ - /* Save the imaginary stride of B to the auxinfo_t object. + /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t + object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_a( istep_b, aux ); \ diff --git a/frame/3/trsm/bli_trsm_ru_ker_var2.c b/frame/3/trsm/bli_trsm_ru_ker_var2.c index f4ab7b44c..9b5df7ab5 100644 --- a/frame/3/trsm/bli_trsm_ru_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ru_ker_var2.c @@ -250,7 +250,7 @@ void PASTEMAC(ch,varname)( \ needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular - matrix), which is used by 3m and 4m implementations, we need + matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % NR != 0 ? k + NR - ( k % NR ) : k ); \ \ @@ -399,7 +399,8 @@ void PASTEMAC(ch,varname)( \ ps_b_cur += ( bli_is_odd( ps_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( ps_b_cur * ss_b_num ) / ss_b_den; \ \ - /* Save the imaginary stride of B to the auxinfo_t object. + /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t + object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_a( PACKNR * k_b0111, aux ); \ @@ -476,7 +477,8 @@ void PASTEMAC(ch,varname)( \ } \ else if ( bli_is_strictly_above_diag_n( diagoffb_j, k, NR ) ) \ { \ - /* Save the imaginary stride of B to the auxinfo_t object. + /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t + object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_a( istep_b, aux ); \ diff --git a/frame/base/bli_info.c b/frame/base/bli_info.c index 302de89a2..ff5e029a3 100644 --- a/frame/base/bli_info.c +++ b/frame/base/bli_info.c @@ -93,9 +93,9 @@ extern blksz_t* gemm_mc; extern blksz_t* gemm_nc; extern blksz_t* gemm_kc; -extern blksz_t* gemm4m_mc; -extern blksz_t* gemm4m_nc; -extern blksz_t* gemm4m_kc; +extern blksz_t* gemm4m1_mc; +extern blksz_t* gemm4m1_nc; +extern blksz_t* gemm4m1_kc; // -- Default cache blocksizes -- @@ -112,11 +112,11 @@ gint_t bli_info_get_default_mc( num_t dt ) gint_t bli_info_get_default_mc_s( void ) { bli_init(); return bli_blksz_for_type( BLIS_FLOAT, gemm_mc ); } gint_t bli_info_get_default_mc_d( void ) { bli_init(); return bli_blksz_for_type( BLIS_DOUBLE, gemm_mc ); } gint_t bli_info_get_default_mc_c( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_c() ) bsize = gemm4m_mc; + if ( bli_xm_is_enabled_c() ) bsize = gemm4m1_mc; else bsize = gemm_mc; return bli_blksz_for_type( BLIS_SCOMPLEX, bsize ); } gint_t bli_info_get_default_mc_z( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_z() ) bsize = gemm4m_mc; + if ( bli_xm_is_enabled_z() ) bsize = gemm4m1_mc; else bsize = gemm_mc; return bli_blksz_for_type( BLIS_DCOMPLEX, bsize ); } @@ -133,11 +133,11 @@ gint_t bli_info_get_default_nc( num_t dt ) gint_t bli_info_get_default_nc_s( void ) { bli_init(); return bli_blksz_for_type( BLIS_FLOAT, gemm_nc ); } gint_t bli_info_get_default_nc_d( void ) { bli_init(); return bli_blksz_for_type( BLIS_DOUBLE, gemm_nc ); } gint_t bli_info_get_default_nc_c( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_c() ) bsize = gemm4m_nc; + if ( bli_xm_is_enabled_c() ) bsize = gemm4m1_nc; else bsize = gemm_nc; return bli_blksz_for_type( BLIS_SCOMPLEX, bsize ); } gint_t bli_info_get_default_nc_z( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_z() ) bsize = gemm4m_nc; + if ( bli_xm_is_enabled_z() ) bsize = gemm4m1_nc; else bsize = gemm_nc; return bli_blksz_for_type( BLIS_DCOMPLEX, bsize ); } @@ -154,11 +154,11 @@ gint_t bli_info_get_default_kc( num_t dt ) gint_t bli_info_get_default_kc_s( void ) { bli_init(); return bli_blksz_for_type( BLIS_FLOAT, gemm_kc ); } gint_t bli_info_get_default_kc_d( void ) { bli_init(); return bli_blksz_for_type( BLIS_DOUBLE, gemm_kc ); } gint_t bli_info_get_default_kc_c( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_c() ) bsize = gemm4m_kc; + if ( bli_xm_is_enabled_c() ) bsize = gemm4m1_kc; else bsize = gemm_kc; return bli_blksz_for_type( BLIS_SCOMPLEX, bsize ); } gint_t bli_info_get_default_kc_z( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_z() ) bsize = gemm4m_kc; + if ( bli_xm_is_enabled_z() ) bsize = gemm4m1_kc; else bsize = gemm_kc; return bli_blksz_for_type( BLIS_DCOMPLEX, bsize ); } @@ -178,11 +178,11 @@ gint_t bli_info_get_maximum_mc( num_t dt ) gint_t bli_info_get_maximum_mc_s( void ) { bli_init(); return bli_blksz_max_for_type( BLIS_FLOAT, gemm_mc ); } gint_t bli_info_get_maximum_mc_d( void ) { bli_init(); return bli_blksz_max_for_type( BLIS_DOUBLE, gemm_mc ); } gint_t bli_info_get_maximum_mc_c( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_c() ) bsize = gemm4m_mc; + if ( bli_xm_is_enabled_c() ) bsize = gemm4m1_mc; else bsize = gemm_mc; return bli_blksz_max_for_type( BLIS_SCOMPLEX, bsize ); } gint_t bli_info_get_maximum_mc_z( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_z() ) bsize = gemm4m_mc; + if ( bli_xm_is_enabled_z() ) bsize = gemm4m1_mc; else bsize = gemm_mc; return bli_blksz_max_for_type( BLIS_DCOMPLEX, bsize ); } @@ -199,11 +199,11 @@ gint_t bli_info_get_maximum_nc( num_t dt ) gint_t bli_info_get_maximum_nc_s( void ) { bli_init(); return bli_blksz_max_for_type( BLIS_FLOAT, gemm_nc ); } gint_t bli_info_get_maximum_nc_d( void ) { bli_init(); return bli_blksz_max_for_type( BLIS_DOUBLE, gemm_nc ); } gint_t bli_info_get_maximum_nc_c( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_c() ) bsize = gemm4m_nc; + if ( bli_xm_is_enabled_c() ) bsize = gemm4m1_nc; else bsize = gemm_nc; return bli_blksz_max_for_type( BLIS_SCOMPLEX, bsize ); } gint_t bli_info_get_maximum_nc_z( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_z() ) bsize = gemm4m_nc; + if ( bli_xm_is_enabled_z() ) bsize = gemm4m1_nc; else bsize = gemm_nc; return bli_blksz_max_for_type( BLIS_DCOMPLEX, bsize ); } @@ -220,11 +220,11 @@ gint_t bli_info_get_maximum_kc( num_t dt ) gint_t bli_info_get_maximum_kc_s( void ) { bli_init(); return bli_blksz_max_for_type( BLIS_FLOAT, gemm_kc ); } gint_t bli_info_get_maximum_kc_d( void ) { bli_init(); return bli_blksz_max_for_type( BLIS_DOUBLE, gemm_kc ); } gint_t bli_info_get_maximum_kc_c( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_c() ) bsize = gemm4m_kc; + if ( bli_xm_is_enabled_c() ) bsize = gemm4m1_kc; else bsize = gemm_kc; return bli_blksz_max_for_type( BLIS_SCOMPLEX, bsize ); } gint_t bli_info_get_maximum_kc_z( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_z() ) bsize = gemm4m_kc; + if ( bli_xm_is_enabled_z() ) bsize = gemm4m1_kc; else bsize = gemm_kc; return bli_blksz_max_for_type( BLIS_DCOMPLEX, bsize ); } @@ -235,9 +235,9 @@ extern blksz_t* gemm_mr; extern blksz_t* gemm_nr; extern blksz_t* gemm_kr; -extern blksz_t* gemm4m_mr; -extern blksz_t* gemm4m_nr; -extern blksz_t* gemm4m_kr; +extern blksz_t* gemm4m1_mr; +extern blksz_t* gemm4m1_nr; +extern blksz_t* gemm4m1_kr; // MR default blocksizes @@ -252,11 +252,11 @@ gint_t bli_info_get_default_mr( num_t dt ) gint_t bli_info_get_default_mr_s( void ) { bli_init(); return bli_blksz_for_type( BLIS_FLOAT, gemm_mr ); } gint_t bli_info_get_default_mr_d( void ) { bli_init(); return bli_blksz_for_type( BLIS_DOUBLE, gemm_mr ); } gint_t bli_info_get_default_mr_c( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_c() ) bsize = gemm4m_mr; + if ( bli_xm_is_enabled_c() ) bsize = gemm4m1_mr; else bsize = gemm_mr; return bli_blksz_for_type( BLIS_SCOMPLEX, bsize ); } gint_t bli_info_get_default_mr_z( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_z() ) bsize = gemm4m_mr; + if ( bli_xm_is_enabled_z() ) bsize = gemm4m1_mr; else bsize = gemm_mr; return bli_blksz_for_type( BLIS_DCOMPLEX, bsize ); } @@ -273,11 +273,11 @@ gint_t bli_info_get_default_nr( num_t dt ) gint_t bli_info_get_default_nr_s( void ) { bli_init(); return bli_blksz_for_type( BLIS_FLOAT, gemm_nr ); } gint_t bli_info_get_default_nr_d( void ) { bli_init(); return bli_blksz_for_type( BLIS_DOUBLE, gemm_nr ); } gint_t bli_info_get_default_nr_c( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_c() ) bsize = gemm4m_nr; + if ( bli_xm_is_enabled_c() ) bsize = gemm4m1_nr; else bsize = gemm_nr; return bli_blksz_for_type( BLIS_SCOMPLEX, bsize ); } gint_t bli_info_get_default_nr_z( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_z() ) bsize = gemm4m_nr; + if ( bli_xm_is_enabled_z() ) bsize = gemm4m1_nr; else bsize = gemm_nr; return bli_blksz_for_type( BLIS_DCOMPLEX, bsize ); } @@ -294,11 +294,11 @@ gint_t bli_info_get_default_kr( num_t dt ) gint_t bli_info_get_default_kr_s( void ) { bli_init(); return bli_blksz_for_type( BLIS_FLOAT, gemm_kr ); } gint_t bli_info_get_default_kr_d( void ) { bli_init(); return bli_blksz_for_type( BLIS_DOUBLE, gemm_kr ); } gint_t bli_info_get_default_kr_c( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_c() ) bsize = gemm4m_kr; + if ( bli_xm_is_enabled_c() ) bsize = gemm4m1_kr; else bsize = gemm_kr; return bli_blksz_for_type( BLIS_SCOMPLEX, bsize ); } gint_t bli_info_get_default_kr_z( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_z() ) bsize = gemm4m_kr; + if ( bli_xm_is_enabled_z() ) bsize = gemm4m1_kr; else bsize = gemm_kr; return bli_blksz_for_type( BLIS_DCOMPLEX, bsize ); } @@ -318,11 +318,11 @@ gint_t bli_info_get_packdim_mr( num_t dt ) gint_t bli_info_get_packdim_mr_s( void ) { bli_init(); return bli_blksz_max_for_type( BLIS_FLOAT, gemm_mr ); } gint_t bli_info_get_packdim_mr_d( void ) { bli_init(); return bli_blksz_max_for_type( BLIS_DOUBLE, gemm_mr ); } gint_t bli_info_get_packdim_mr_c( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_c() ) bsize = gemm4m_mr; + if ( bli_xm_is_enabled_c() ) bsize = gemm4m1_mr; else bsize = gemm_mr; return bli_blksz_max_for_type( BLIS_SCOMPLEX, bsize ); } gint_t bli_info_get_packdim_mr_z( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_z() ) bsize = gemm4m_mr; + if ( bli_xm_is_enabled_z() ) bsize = gemm4m1_mr; else bsize = gemm_mr; return bli_blksz_max_for_type( BLIS_DCOMPLEX, bsize ); } @@ -339,11 +339,11 @@ gint_t bli_info_get_packdim_nr( num_t dt ) gint_t bli_info_get_packdim_nr_s( void ) { bli_init(); return bli_blksz_max_for_type( BLIS_FLOAT, gemm_nr ); } gint_t bli_info_get_packdim_nr_d( void ) { bli_init(); return bli_blksz_max_for_type( BLIS_DOUBLE, gemm_nr ); } gint_t bli_info_get_packdim_nr_c( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_c() ) bsize = gemm4m_nr; + if ( bli_xm_is_enabled_c() ) bsize = gemm4m1_nr; else bsize = gemm_nr; return bli_blksz_max_for_type( BLIS_SCOMPLEX, bsize ); } gint_t bli_info_get_packdim_nr_z( void ) { bli_init(); blksz_t* bsize; - if ( bli_xm_is_enabled_z() ) bsize = gemm4m_nr; + if ( bli_xm_is_enabled_z() ) bsize = gemm4m1_nr; else bsize = gemm_nr; return bli_blksz_max_for_type( BLIS_DCOMPLEX, bsize ); } diff --git a/frame/base/induced/bli_3m.h b/frame/base/induced/bli_3m.h deleted file mode 100644 index 7a3557777..000000000 --- a/frame/base/induced/bli_3m.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -char* bli_3m_get_string( void ); - -bool_t bli_3m_is_enabled_dt( num_t dt ); -bool_t bli_3m_is_enabled_c( void ); -bool_t bli_3m_is_enabled_z( void ); - -void bli_3m_enable_dt( num_t dt ); -void bli_3m_enable_c( void ); -void bli_3m_enable_z( void ); -void bli_3m_enable( void ); - -void bli_3m_disable_dt( num_t dt ); -void bli_3m_disable_c( void ); -void bli_3m_disable_z( void ); -void bli_3m_disable( void ); diff --git a/frame/base/induced/bli_3m.c b/frame/base/induced/bli_3m1.c similarity index 58% rename from frame/base/induced/bli_3m.c rename to frame/base/induced/bli_3m1.c index c8f8420e9..71df1752e 100644 --- a/frame/base/induced/bli_3m.c +++ b/frame/base/induced/bli_3m1.c @@ -34,42 +34,42 @@ #include "blis.h" -static char* bli_3m_str = "3m"; +static char* bli_3m1_str = "3m1"; -static bool_t bli_will_use_3m_c = FALSE; -static bool_t bli_will_use_3m_z = FALSE; +static bool_t bli_will_use_3m1_c = FALSE; +static bool_t bli_will_use_3m1_z = FALSE; -char* bli_3m_get_string( void ) { return bli_3m_str; } +char* bli_3m1_get_string( void ) { return bli_3m1_str; } -bool_t bli_3m_is_enabled_dt( num_t dt ) +bool_t bli_3m1_is_enabled_dt( num_t dt ) { - if ( bli_is_scomplex( dt ) ) return bli_3m_is_enabled_c(); - else if ( bli_is_dcomplex( dt ) ) return bli_3m_is_enabled_z(); + if ( bli_is_scomplex( dt ) ) return bli_3m1_is_enabled_c(); + else if ( bli_is_dcomplex( dt ) ) return bli_3m1_is_enabled_z(); else return FALSE; } -bool_t bli_3m_is_enabled_c( void ) { return bli_will_use_3m_c; } -bool_t bli_3m_is_enabled_z( void ) { return bli_will_use_3m_z; } +bool_t bli_3m1_is_enabled_c( void ) { return bli_will_use_3m1_c; } +bool_t bli_3m1_is_enabled_z( void ) { return bli_will_use_3m1_z; } -void bli_3m_enable_dt( num_t dt ) +void bli_3m1_enable_dt( num_t dt ) { - if ( bli_is_scomplex( dt ) ) bli_3m_enable_c(); - else if ( bli_is_dcomplex( dt ) ) bli_3m_enable_z(); + if ( bli_is_scomplex( dt ) ) bli_3m1_enable_c(); + else if ( bli_is_dcomplex( dt ) ) bli_3m1_enable_z(); } -void bli_3m_enable_c( void ) { bli_will_use_3m_c = TRUE; } -void bli_3m_enable_z( void ) { bli_will_use_3m_z = TRUE; } -void bli_3m_enable( void ) { bli_will_use_3m_c = - bli_will_use_3m_z = TRUE; } +void bli_3m1_enable_c( void ) { bli_will_use_3m1_c = TRUE; } +void bli_3m1_enable_z( void ) { bli_will_use_3m1_z = TRUE; } +void bli_3m1_enable( void ) { bli_will_use_3m1_c = + bli_will_use_3m1_z = TRUE; } -void bli_3m_disable_dt( num_t dt ) +void bli_3m1_disable_dt( num_t dt ) { - if ( bli_is_scomplex( dt ) ) bli_3m_disable_c(); - else if ( bli_is_dcomplex( dt ) ) bli_3m_disable_z(); + if ( bli_is_scomplex( dt ) ) bli_3m1_disable_c(); + else if ( bli_is_dcomplex( dt ) ) bli_3m1_disable_z(); } -void bli_3m_disable_c( void ) { bli_will_use_3m_c = FALSE; } -void bli_3m_disable_z( void ) { bli_will_use_3m_z = FALSE; } -void bli_3m_disable( void ) { bli_will_use_3m_c = - bli_will_use_3m_z = FALSE; } +void bli_3m1_disable_c( void ) { bli_will_use_3m1_c = FALSE; } +void bli_3m1_disable_z( void ) { bli_will_use_3m1_z = FALSE; } +void bli_3m1_disable( void ) { bli_will_use_3m1_c = + bli_will_use_3m1_z = FALSE; } diff --git a/frame/3/hemm/3m/bli_hemm3m_entry.c b/frame/base/induced/bli_3m1.h similarity index 79% rename from frame/3/hemm/3m/bli_hemm3m_entry.c rename to frame/base/induced/bli_3m1.h index 9364f2be4..fef815a85 100644 --- a/frame/3/hemm/3m/bli_hemm3m_entry.c +++ b/frame/base/induced/bli_3m1.h @@ -32,18 +32,18 @@ */ -#include "blis.h" +char* bli_3m1_get_string( void ); -extern gemm_t* gemm3m_cntl; +bool_t bli_3m1_is_enabled_dt( num_t dt ); +bool_t bli_3m1_is_enabled_c( void ); +bool_t bli_3m1_is_enabled_z( void ); -void bli_hemm3m_entry( side_t side, - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c ) -{ - bli_hemm_front( side, alpha, a, b, beta, c, - gemm3m_cntl ); -} +void bli_3m1_enable_dt( num_t dt ); +void bli_3m1_enable_c( void ); +void bli_3m1_enable_z( void ); +void bli_3m1_enable( void ); +void bli_3m1_disable_dt( num_t dt ); +void bli_3m1_disable_c( void ); +void bli_3m1_disable_z( void ); +void bli_3m1_disable( void ); diff --git a/frame/base/induced/bli_4m.c b/frame/base/induced/bli_4m.c index 7fe0b698b..f283b3383 100644 --- a/frame/base/induced/bli_4m.c +++ b/frame/base/induced/bli_4m.c @@ -35,54 +35,54 @@ #include "blis.h" static char* bli_native_str = "native"; -static char* bli_4m_str = "4m"; +static char* bli_4m1_str = "4m1"; // Initialize the 4m enabled/disabled state based on the cpp macros // which are set in bli_kernel_macro_defs.h. #ifdef BLIS_ENABLE_VIRTUAL_SCOMPLEX -static bool_t bli_will_use_4m_c = TRUE; +static bool_t bli_will_use_4m1_c = TRUE; #else -static bool_t bli_will_use_4m_c = FALSE; +static bool_t bli_will_use_4m1_c = FALSE; #endif #ifdef BLIS_ENABLE_VIRTUAL_DCOMPLEX -static bool_t bli_will_use_4m_z = TRUE; +static bool_t bli_will_use_4m1_z = TRUE; #else -static bool_t bli_will_use_4m_z = FALSE; +static bool_t bli_will_use_4m1_z = FALSE; #endif char* bli_native_get_string( void ) { return bli_native_str; } -char* bli_4m_get_string( void ) { return bli_4m_str; } +char* bli_4m1_get_string( void ) { return bli_4m1_str; } -bool_t bli_4m_is_enabled_dt( num_t dt ) +bool_t bli_4m1_is_enabled_dt( num_t dt ) { - if ( bli_is_scomplex( dt ) ) return bli_4m_is_enabled_c(); - else if ( bli_is_dcomplex( dt ) ) return bli_4m_is_enabled_z(); + if ( bli_is_scomplex( dt ) ) return bli_4m1_is_enabled_c(); + else if ( bli_is_dcomplex( dt ) ) return bli_4m1_is_enabled_z(); else return FALSE; } -bool_t bli_4m_is_enabled_c( void ) { return bli_will_use_4m_c; } -bool_t bli_4m_is_enabled_z( void ) { return bli_will_use_4m_z; } +bool_t bli_4m1_is_enabled_c( void ) { return bli_will_use_4m1_c; } +bool_t bli_4m1_is_enabled_z( void ) { return bli_will_use_4m1_z; } -void bli_4m_enable_dt( num_t dt ) +void bli_4m1_enable_dt( num_t dt ) { - if ( bli_is_scomplex( dt ) ) bli_4m_enable_c(); - else if ( bli_is_dcomplex( dt ) ) bli_4m_enable_z(); + if ( bli_is_scomplex( dt ) ) bli_4m1_enable_c(); + else if ( bli_is_dcomplex( dt ) ) bli_4m1_enable_z(); } -void bli_4m_enable_c( void ) { bli_will_use_4m_c = TRUE; } -void bli_4m_enable_z( void ) { bli_will_use_4m_z = TRUE; } -void bli_4m_enable( void ) { bli_will_use_4m_c = - bli_will_use_4m_z = TRUE; } +void bli_4m1_enable_c( void ) { bli_will_use_4m1_c = TRUE; } +void bli_4m1_enable_z( void ) { bli_will_use_4m1_z = TRUE; } +void bli_4m1_enable( void ) { bli_will_use_4m1_c = + bli_will_use_4m1_z = TRUE; } -void bli_4m_disable_dt( num_t dt ) +void bli_4m1_disable_dt( num_t dt ) { - if ( bli_is_scomplex( dt ) ) bli_4m_disable_c(); - else if ( bli_is_dcomplex( dt ) ) bli_4m_disable_z(); + if ( bli_is_scomplex( dt ) ) bli_4m1_disable_c(); + else if ( bli_is_dcomplex( dt ) ) bli_4m1_disable_z(); } -void bli_4m_disable_c( void ) { bli_will_use_4m_c = FALSE; } -void bli_4m_disable_z( void ) { bli_will_use_4m_z = FALSE; } -void bli_4m_disable( void ) { bli_will_use_4m_c = - bli_will_use_4m_z = FALSE; } +void bli_4m1_disable_c( void ) { bli_will_use_4m1_c = FALSE; } +void bli_4m1_disable_z( void ) { bli_will_use_4m1_z = FALSE; } +void bli_4m1_disable( void ) { bli_will_use_4m1_c = + bli_will_use_4m1_z = FALSE; } diff --git a/frame/base/induced/bli_4m.h b/frame/base/induced/bli_4m.h index db497820e..c9b2d3793 100644 --- a/frame/base/induced/bli_4m.h +++ b/frame/base/induced/bli_4m.h @@ -33,18 +33,18 @@ */ char* bli_native_get_string( void ); -char* bli_4m_get_string( void ); +char* bli_4m1_get_string( void ); -bool_t bli_4m_is_enabled_dt( num_t dt ); -bool_t bli_4m_is_enabled_c( void ); -bool_t bli_4m_is_enabled_z( void ); +bool_t bli_4m1_is_enabled_dt( num_t dt ); +bool_t bli_4m1_is_enabled_c( void ); +bool_t bli_4m1_is_enabled_z( void ); -void bli_4m_enable_dt( num_t dt ); -void bli_4m_enable_c( void ); -void bli_4m_enable_z( void ); -void bli_4m_enable( void ); +void bli_4m1_enable_dt( num_t dt ); +void bli_4m1_enable_c( void ); +void bli_4m1_enable_z( void ); +void bli_4m1_enable( void ); -void bli_4m_disable_dt( num_t dt ); -void bli_4m_disable_c( void ); -void bli_4m_disable_z( void ); -void bli_4m_disable( void ); +void bli_4m1_disable_dt( num_t dt ); +void bli_4m1_disable_c( void ); +void bli_4m1_disable_z( void ); +void bli_4m1_disable( void ); diff --git a/frame/base/induced/bli_xm.c b/frame/base/induced/bli_xm.c index 449e84db3..b3c6db712 100644 --- a/frame/base/induced/bli_xm.c +++ b/frame/base/induced/bli_xm.c @@ -44,18 +44,18 @@ bool_t bli_xm_is_enabled_dt( num_t dt ) bool_t bli_xm_is_enabled_c( void ) { if ( bli_3mh_is_enabled_c() ) return TRUE; - else if ( bli_3m_is_enabled_c() ) return TRUE; + else if ( bli_3m1_is_enabled_c() ) return TRUE; else if ( bli_4mh_is_enabled_c() ) return TRUE; - else if ( bli_4m_is_enabled_c() ) return TRUE; + else if ( bli_4m1_is_enabled_c() ) return TRUE; else return FALSE; } bool_t bli_xm_is_enabled_z( void ) { if ( bli_3mh_is_enabled_z() ) return TRUE; - else if ( bli_3m_is_enabled_z() ) return TRUE; + else if ( bli_3m1_is_enabled_z() ) return TRUE; else if ( bli_4mh_is_enabled_z() ) return TRUE; - else if ( bli_4m_is_enabled_z() ) return TRUE; + else if ( bli_4m1_is_enabled_z() ) return TRUE; else return FALSE; } diff --git a/frame/cntl/bli_cntl_init.c b/frame/cntl/bli_cntl_init.c index b6b4dbae2..43c5ca03a 100644 --- a/frame/cntl/bli_cntl_init.c +++ b/frame/cntl/bli_cntl_init.c @@ -59,22 +59,22 @@ void bli_cntl_init( void ) bli_gemm_cntl_init(); bli_trsm_cntl_init(); - // Level-3 via 4m - bli_gemm4m_cntl_init(); - bli_trsm4m_cntl_init(); - - // Level-3 via 3m - bli_gemm3m_cntl_init(); - bli_trsm3m_cntl_init(); - // Level-3 via 4mh bli_gemm4mh_cntl_init(); + // Level-3 via 4mb + bli_gemm4mb_cntl_init(); + + // Level-3 via 4m1 + bli_gemm4m1_cntl_init(); + bli_trsm4m1_cntl_init(); + // Level-3 via 3mh bli_gemm3mh_cntl_init(); - // Level-3 via 4mb - bli_gemm4mb_cntl_init(); + // Level-3 via 3m1 + bli_gemm3m1_cntl_init(); + bli_trsm3m1_cntl_init(); } void bli_cntl_finalize( void ) @@ -102,21 +102,21 @@ void bli_cntl_finalize( void ) bli_gemm_cntl_finalize(); bli_trsm_cntl_finalize(); - // Level-3 via 4m - bli_gemm4m_cntl_finalize(); - bli_trsm4m_cntl_finalize(); - - // Level-3 via 3m - bli_gemm3m_cntl_finalize(); - bli_trsm3m_cntl_finalize(); - // Level-3 via 4mh bli_gemm4mh_cntl_finalize(); + // Level-3 via 4mb + bli_gemm4mb_cntl_finalize(); + + // Level-3 via 4m1 + bli_gemm4m1_cntl_finalize(); + bli_trsm4m1_cntl_finalize(); + // Level-3 via 3mh bli_gemm3mh_cntl_finalize(); - // Level-3 via 4mb - bli_gemm4mb_cntl_finalize(); + // Level-3 via 3m1 + bli_gemm3m1_cntl_finalize(); + bli_trsm3m1_cntl_finalize(); } diff --git a/frame/include/bli_kernel_3m1_macro_defs.h b/frame/include/bli_kernel_3m1_macro_defs.h new file mode 100644 index 000000000..4303e8fc8 --- /dev/null +++ b/frame/include/bli_kernel_3m1_macro_defs.h @@ -0,0 +1,107 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_KERNEL_3M1_MACRO_DEFS_H +#define BLIS_KERNEL_3M1_MACRO_DEFS_H + + +// -- Define row access bools -------------------------------------------------- + +// gemm3m1 micro-kernels + +#define BLIS_CGEMM3M1_UKERNEL_PREFERS_CONTIG_ROWS \ + BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS +#define BLIS_ZGEMM3M1_UKERNEL_PREFERS_CONTIG_ROWS \ + BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS + + +// -- Define default 3m-specific kernel names ---------------------------------- + +// +// Level-3 +// + +// gemm3m1 micro-kernels + +#ifndef BLIS_CGEMM3M1_UKERNEL +#define BLIS_CGEMM3M1_UKERNEL BLIS_CGEMM3M1_UKERNEL_REF +#endif + +#ifndef BLIS_ZGEMM3M1_UKERNEL +#define BLIS_ZGEMM3M1_UKERNEL BLIS_ZGEMM3M1_UKERNEL_REF +#endif + +// gemmtrsm3m1_l micro-kernels + +#ifndef BLIS_CGEMMTRSM3M1_L_UKERNEL +#define BLIS_CGEMMTRSM3M1_L_UKERNEL BLIS_CGEMMTRSM3M1_L_UKERNEL_REF +#endif + +#ifndef BLIS_ZGEMMTRSM3M1_L_UKERNEL +#define BLIS_ZGEMMTRSM3M1_L_UKERNEL BLIS_ZGEMMTRSM3M1_L_UKERNEL_REF +#endif + +// gemmtrsm3m1_u micro-kernels + +#ifndef BLIS_CGEMMTRSM3M1_U_UKERNEL +#define BLIS_CGEMMTRSM3M1_U_UKERNEL BLIS_CGEMMTRSM3M1_U_UKERNEL_REF +#endif + +#ifndef BLIS_ZGEMMTRSM3M1_U_UKERNEL +#define BLIS_ZGEMMTRSM3M1_U_UKERNEL BLIS_ZGEMMTRSM3M1_U_UKERNEL_REF +#endif + +// trsm3m1_l micro-kernels + +#ifndef BLIS_CTRSM3M1_L_UKERNEL +#define BLIS_CTRSM3M1_L_UKERNEL BLIS_CTRSM3M1_L_UKERNEL_REF +#endif + +#ifndef BLIS_ZTRSM3M1_L_UKERNEL +#define BLIS_ZTRSM3M1_L_UKERNEL BLIS_ZTRSM3M1_L_UKERNEL_REF +#endif + +// trsm3m1_u micro-kernels + +#ifndef BLIS_CTRSM3M1_U_UKERNEL +#define BLIS_CTRSM3M1_U_UKERNEL BLIS_CTRSM3M1_U_UKERNEL_REF +#endif + +#ifndef BLIS_ZTRSM3M1_U_UKERNEL +#define BLIS_ZTRSM3M1_U_UKERNEL BLIS_ZTRSM3M1_U_UKERNEL_REF +#endif + + + +#endif diff --git a/frame/include/bli_kernel_3m_macro_defs.h b/frame/include/bli_kernel_3m_macro_defs.h deleted file mode 100644 index 8454efae6..000000000 --- a/frame/include/bli_kernel_3m_macro_defs.h +++ /dev/null @@ -1,201 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_KERNEL_3M_MACRO_DEFS_H -#define BLIS_KERNEL_3M_MACRO_DEFS_H - - -// -- Define row access bools -------------------------------------------------- - -// gemm3m micro-kernels - -#define BLIS_CGEMM3M_UKERNEL_PREFERS_CONTIG_ROWS \ - BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS -#define BLIS_ZGEMM3M_UKERNEL_PREFERS_CONTIG_ROWS \ - BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS - - -// -- Define default 3m-specific kernel names ---------------------------------- - -// -// Level-3 -// - -// gemm3m micro-kernels - -#ifndef BLIS_CGEMM3M_UKERNEL -#define BLIS_CGEMM3M_UKERNEL BLIS_CGEMM3M_UKERNEL_REF -#endif - -#ifndef BLIS_ZGEMM3M_UKERNEL -#define BLIS_ZGEMM3M_UKERNEL BLIS_ZGEMM3M_UKERNEL_REF -#endif - -// gemmtrsm3m_l micro-kernels - -#ifndef BLIS_CGEMMTRSM3M_L_UKERNEL -#define BLIS_CGEMMTRSM3M_L_UKERNEL BLIS_CGEMMTRSM3M_L_UKERNEL_REF -#endif - -#ifndef BLIS_ZGEMMTRSM3M_L_UKERNEL -#define BLIS_ZGEMMTRSM3M_L_UKERNEL BLIS_ZGEMMTRSM3M_L_UKERNEL_REF -#endif - -// gemmtrsm3m_u micro-kernels - -#ifndef BLIS_CGEMMTRSM3M_U_UKERNEL -#define BLIS_CGEMMTRSM3M_U_UKERNEL BLIS_CGEMMTRSM3M_U_UKERNEL_REF -#endif - -#ifndef BLIS_ZGEMMTRSM3M_U_UKERNEL -#define BLIS_ZGEMMTRSM3M_U_UKERNEL BLIS_ZGEMMTRSM3M_U_UKERNEL_REF -#endif - -// trsm3m_l micro-kernels - -#ifndef BLIS_CTRSM3M_L_UKERNEL -#define BLIS_CTRSM3M_L_UKERNEL BLIS_CTRSM3M_L_UKERNEL_REF -#endif - -#ifndef BLIS_ZTRSM3M_L_UKERNEL -#define BLIS_ZTRSM3M_L_UKERNEL BLIS_ZTRSM3M_L_UKERNEL_REF -#endif - -// trsm3m_u micro-kernels - -#ifndef BLIS_CTRSM3M_U_UKERNEL -#define BLIS_CTRSM3M_U_UKERNEL BLIS_CTRSM3M_U_UKERNEL_REF -#endif - -#ifndef BLIS_ZTRSM3M_U_UKERNEL -#define BLIS_ZTRSM3M_U_UKERNEL BLIS_ZTRSM3M_U_UKERNEL_REF -#endif - -// -// Level-1m -// - -// packm_2xk_3m kernels - -#ifndef BLIS_CPACKM_2XK_3M_KERNEL -#define BLIS_CPACKM_2XK_3M_KERNEL BLIS_CPACKM_2XK_3M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_2XK_3M_KERNEL -#define BLIS_ZPACKM_2XK_3M_KERNEL BLIS_ZPACKM_2XK_3M_KERNEL_REF -#endif - -// packm_4xk_3m kernels - -#ifndef BLIS_CPACKM_4XK_3M_KERNEL -#define BLIS_CPACKM_4XK_3M_KERNEL BLIS_CPACKM_4XK_3M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_4XK_3M_KERNEL -#define BLIS_ZPACKM_4XK_3M_KERNEL BLIS_ZPACKM_4XK_3M_KERNEL_REF -#endif - -// packm_6xk_3m kernels - -#ifndef BLIS_CPACKM_6XK_3M_KERNEL -#define BLIS_CPACKM_6XK_3M_KERNEL BLIS_CPACKM_6XK_3M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_6XK_3M_KERNEL -#define BLIS_ZPACKM_6XK_3M_KERNEL BLIS_ZPACKM_6XK_3M_KERNEL_REF -#endif - -// packm_8xk_3m kernels - -#ifndef BLIS_CPACKM_8XK_3M_KERNEL -#define BLIS_CPACKM_8XK_3M_KERNEL BLIS_CPACKM_8XK_3M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_8XK_3M_KERNEL -#define BLIS_ZPACKM_8XK_3M_KERNEL BLIS_ZPACKM_8XK_3M_KERNEL_REF -#endif - -// packm_10xk_3m kernels - -#ifndef BLIS_CPACKM_10XK_3M_KERNEL -#define BLIS_CPACKM_10XK_3M_KERNEL BLIS_CPACKM_10XK_3M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_10XK_3M_KERNEL -#define BLIS_ZPACKM_10XK_3M_KERNEL BLIS_ZPACKM_10XK_3M_KERNEL_REF -#endif - -// packm_12xk_3m kernels - -#ifndef BLIS_CPACKM_12XK_3M_KERNEL -#define BLIS_CPACKM_12XK_3M_KERNEL BLIS_CPACKM_12XK_3M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_12XK_3M_KERNEL -#define BLIS_ZPACKM_12XK_3M_KERNEL BLIS_ZPACKM_12XK_3M_KERNEL_REF -#endif - -// packm_14xk_3m kernels - -#ifndef BLIS_CPACKM_14XK_3M_KERNEL -#define BLIS_CPACKM_14XK_3M_KERNEL BLIS_CPACKM_14XK_3M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_14XK_3M_KERNEL -#define BLIS_ZPACKM_14XK_3M_KERNEL BLIS_ZPACKM_14XK_3M_KERNEL_REF -#endif - -// packm_16xk_3m kernels - -#ifndef BLIS_CPACKM_16XK_3M_KERNEL -#define BLIS_CPACKM_16XK_3M_KERNEL BLIS_CPACKM_16XK_3M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_16XK_3M_KERNEL -#define BLIS_ZPACKM_16XK_3M_KERNEL BLIS_ZPACKM_16XK_3M_KERNEL_REF -#endif - -// packm_30xk_3m kernels - -#ifndef BLIS_CPACKM_30XK_3M_KERNEL -#define BLIS_CPACKM_30XK_3M_KERNEL BLIS_CPACKM_30XK_3M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_30XK_3M_KERNEL -#define BLIS_ZPACKM_30XK_3M_KERNEL BLIS_ZPACKM_30XK_3M_KERNEL_REF -#endif - - - -#endif diff --git a/frame/include/bli_kernel_3mh_macro_defs.h b/frame/include/bli_kernel_3mh_macro_defs.h new file mode 100644 index 000000000..4ea960b7e --- /dev/null +++ b/frame/include/bli_kernel_3mh_macro_defs.h @@ -0,0 +1,67 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_KERNEL_3MH_MACRO_DEFS_H +#define BLIS_KERNEL_3MH_MACRO_DEFS_H + + +// -- Define 3mh row access bools ---------------------------------------------- + +// gemm3mh micro-kernels + +#define BLIS_CGEMM3MH_UKERNEL_PREFERS_CONTIG_ROWS \ + BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS +#define BLIS_ZGEMM3MH_UKERNEL_PREFERS_CONTIG_ROWS \ + BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS + + +// -- Define default 3mh-specific kernel names --------------------------------- + +// +// Level-3 +// + +// gemm3mh micro-kernels + +#ifndef BLIS_CGEMM3MH_UKERNEL +#define BLIS_CGEMM3MH_UKERNEL BLIS_CGEMM3MH_UKERNEL_REF +#endif + +#ifndef BLIS_ZGEMM3MH_UKERNEL +#define BLIS_ZGEMM3MH_UKERNEL BLIS_ZGEMM3MH_UKERNEL_REF +#endif + + + +#endif diff --git a/frame/include/bli_kernel_3mi_macro_defs.h b/frame/include/bli_kernel_3mi_macro_defs.h new file mode 100644 index 000000000..593622a6a --- /dev/null +++ b/frame/include/bli_kernel_3mi_macro_defs.h @@ -0,0 +1,137 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_KERNEL_3MI_MACRO_DEFS_H +#define BLIS_KERNEL_3MI_MACRO_DEFS_H + + +// -- Define default 3mi-specific kernel names --------------------------------- + +// +// Level-1m +// + +// packm_2xk_3mi kernels + +#ifndef BLIS_CPACKM_2XK_3MI_KERNEL +#define BLIS_CPACKM_2XK_3MI_KERNEL BLIS_CPACKM_2XK_3MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_2XK_3MI_KERNEL +#define BLIS_ZPACKM_2XK_3MI_KERNEL BLIS_ZPACKM_2XK_3MI_KERNEL_REF +#endif + +// packm_4xk_3mi kernels + +#ifndef BLIS_CPACKM_4XK_3MI_KERNEL +#define BLIS_CPACKM_4XK_3MI_KERNEL BLIS_CPACKM_4XK_3MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_4XK_3MI_KERNEL +#define BLIS_ZPACKM_4XK_3MI_KERNEL BLIS_ZPACKM_4XK_3MI_KERNEL_REF +#endif + +// packm_6xk_3mi kernels + +#ifndef BLIS_CPACKM_6XK_3MI_KERNEL +#define BLIS_CPACKM_6XK_3MI_KERNEL BLIS_CPACKM_6XK_3MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_6XK_3MI_KERNEL +#define BLIS_ZPACKM_6XK_3MI_KERNEL BLIS_ZPACKM_6XK_3MI_KERNEL_REF +#endif + +// packm_8xk_3mi kernels + +#ifndef BLIS_CPACKM_8XK_3MI_KERNEL +#define BLIS_CPACKM_8XK_3MI_KERNEL BLIS_CPACKM_8XK_3MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_8XK_3MI_KERNEL +#define BLIS_ZPACKM_8XK_3MI_KERNEL BLIS_ZPACKM_8XK_3MI_KERNEL_REF +#endif + +// packm_10xk_3mi kernels + +#ifndef BLIS_CPACKM_10XK_3MI_KERNEL +#define BLIS_CPACKM_10XK_3MI_KERNEL BLIS_CPACKM_10XK_3MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_10XK_3MI_KERNEL +#define BLIS_ZPACKM_10XK_3MI_KERNEL BLIS_ZPACKM_10XK_3MI_KERNEL_REF +#endif + +// packm_12xk_3mi kernels + +#ifndef BLIS_CPACKM_12XK_3MI_KERNEL +#define BLIS_CPACKM_12XK_3MI_KERNEL BLIS_CPACKM_12XK_3MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_12XK_3MI_KERNEL +#define BLIS_ZPACKM_12XK_3MI_KERNEL BLIS_ZPACKM_12XK_3MI_KERNEL_REF +#endif + +// packm_14xk_3mi kernels + +#ifndef BLIS_CPACKM_14XK_3MI_KERNEL +#define BLIS_CPACKM_14XK_3MI_KERNEL BLIS_CPACKM_14XK_3MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_14XK_3MI_KERNEL +#define BLIS_ZPACKM_14XK_3MI_KERNEL BLIS_ZPACKM_14XK_3MI_KERNEL_REF +#endif + +// packm_16xk_3mi kernels + +#ifndef BLIS_CPACKM_16XK_3MI_KERNEL +#define BLIS_CPACKM_16XK_3MI_KERNEL BLIS_CPACKM_16XK_3MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_16XK_3MI_KERNEL +#define BLIS_ZPACKM_16XK_3MI_KERNEL BLIS_ZPACKM_16XK_3MI_KERNEL_REF +#endif + +// packm_30xk_3mi kernels + +#ifndef BLIS_CPACKM_30XK_3MI_KERNEL +#define BLIS_CPACKM_30XK_3MI_KERNEL BLIS_CPACKM_30XK_3MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_30XK_3MI_KERNEL +#define BLIS_ZPACKM_30XK_3MI_KERNEL BLIS_ZPACKM_30XK_3MI_KERNEL_REF +#endif + + + +#endif diff --git a/frame/include/bli_kernel_4m1_macro_defs.h b/frame/include/bli_kernel_4m1_macro_defs.h new file mode 100644 index 000000000..19b3d74a1 --- /dev/null +++ b/frame/include/bli_kernel_4m1_macro_defs.h @@ -0,0 +1,107 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_KERNEL_4M1_MACRO_DEFS_H +#define BLIS_KERNEL_4M1_MACRO_DEFS_H + + +// -- Define row access bools -------------------------------------------------- + +// gemm4m1 micro-kernels + +#define BLIS_CGEMM4M1_UKERNEL_PREFERS_CONTIG_ROWS \ + BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS +#define BLIS_ZGEMM4M1_UKERNEL_PREFERS_CONTIG_ROWS \ + BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS + + +// -- Define default 4m1-specific kernel names --------------------------------- + +// +// Level-3 +// + +// gemm4m1 micro-kernels + +#ifndef BLIS_CGEMM4M1_UKERNEL +#define BLIS_CGEMM4M1_UKERNEL BLIS_CGEMM4M1_UKERNEL_REF +#endif + +#ifndef BLIS_ZGEMM4M1_UKERNEL +#define BLIS_ZGEMM4M1_UKERNEL BLIS_ZGEMM4M1_UKERNEL_REF +#endif + +// gemmtrsm4m1_l micro-kernels + +#ifndef BLIS_CGEMMTRSM4M1_L_UKERNEL +#define BLIS_CGEMMTRSM4M1_L_UKERNEL BLIS_CGEMMTRSM4M1_L_UKERNEL_REF +#endif + +#ifndef BLIS_ZGEMMTRSM4M1_L_UKERNEL +#define BLIS_ZGEMMTRSM4M1_L_UKERNEL BLIS_ZGEMMTRSM4M1_L_UKERNEL_REF +#endif + +// gemmtrsm4m1_u micro-kernels + +#ifndef BLIS_CGEMMTRSM4M1_U_UKERNEL +#define BLIS_CGEMMTRSM4M1_U_UKERNEL BLIS_CGEMMTRSM4M1_U_UKERNEL_REF +#endif + +#ifndef BLIS_ZGEMMTRSM4M1_U_UKERNEL +#define BLIS_ZGEMMTRSM4M1_U_UKERNEL BLIS_ZGEMMTRSM4M1_U_UKERNEL_REF +#endif + +// trsm4m1_l micro-kernels + +#ifndef BLIS_CTRSM4M1_L_UKERNEL +#define BLIS_CTRSM4M1_L_UKERNEL BLIS_CTRSM4M1_L_UKERNEL_REF +#endif + +#ifndef BLIS_ZTRSM4M1_L_UKERNEL +#define BLIS_ZTRSM4M1_L_UKERNEL BLIS_ZTRSM4M1_L_UKERNEL_REF +#endif + +// trsm4m1_u micro-kernels + +#ifndef BLIS_CTRSM4M1_U_UKERNEL +#define BLIS_CTRSM4M1_U_UKERNEL BLIS_CTRSM4M1_U_UKERNEL_REF +#endif + +#ifndef BLIS_ZTRSM4M1_U_UKERNEL +#define BLIS_ZTRSM4M1_U_UKERNEL BLIS_ZTRSM4M1_U_UKERNEL_REF +#endif + + + +#endif diff --git a/frame/include/bli_kernel_4m_macro_defs.h b/frame/include/bli_kernel_4m_macro_defs.h deleted file mode 100644 index c572122be..000000000 --- a/frame/include/bli_kernel_4m_macro_defs.h +++ /dev/null @@ -1,218 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_KERNEL_4M_MACRO_DEFS_H -#define BLIS_KERNEL_4M_MACRO_DEFS_H - - -// -- Define row access bools -------------------------------------------------- - -// gemm4m micro-kernels - -#define BLIS_CGEMM4M_UKERNEL_PREFERS_CONTIG_ROWS \ - BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS -#define BLIS_ZGEMM4M_UKERNEL_PREFERS_CONTIG_ROWS \ - BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS - -// gemm4mb micro-kernels - -#define BLIS_CGEMM4MB_UKERNEL_PREFERS_CONTIG_ROWS \ - BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS -#define BLIS_ZGEMM4MB_UKERNEL_PREFERS_CONTIG_ROWS \ - BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS - - -// -- Define default 4m-specific kernel names ---------------------------------- - -// -// Level-3 -// - -// gemm4m micro-kernels - -#ifndef BLIS_CGEMM4M_UKERNEL -#define BLIS_CGEMM4M_UKERNEL BLIS_CGEMM4M_UKERNEL_REF -#endif - -#ifndef BLIS_ZGEMM4M_UKERNEL -#define BLIS_ZGEMM4M_UKERNEL BLIS_ZGEMM4M_UKERNEL_REF -#endif - -// gemm4mb micro-kernels - -#ifndef BLIS_CGEMM4MB_UKERNEL -#define BLIS_CGEMM4MB_UKERNEL BLIS_CGEMM4MB_UKERNEL_REF -#endif - -#ifndef BLIS_ZGEMM4MB_UKERNEL -#define BLIS_ZGEMM4MB_UKERNEL BLIS_ZGEMM4MB_UKERNEL_REF -#endif - -// gemmtrsm4m_l micro-kernels - -#ifndef BLIS_CGEMMTRSM4M_L_UKERNEL -#define BLIS_CGEMMTRSM4M_L_UKERNEL BLIS_CGEMMTRSM4M_L_UKERNEL_REF -#endif - -#ifndef BLIS_ZGEMMTRSM4M_L_UKERNEL -#define BLIS_ZGEMMTRSM4M_L_UKERNEL BLIS_ZGEMMTRSM4M_L_UKERNEL_REF -#endif - -// gemmtrsm4m_u micro-kernels - -#ifndef BLIS_CGEMMTRSM4M_U_UKERNEL -#define BLIS_CGEMMTRSM4M_U_UKERNEL BLIS_CGEMMTRSM4M_U_UKERNEL_REF -#endif - -#ifndef BLIS_ZGEMMTRSM4M_U_UKERNEL -#define BLIS_ZGEMMTRSM4M_U_UKERNEL BLIS_ZGEMMTRSM4M_U_UKERNEL_REF -#endif - -// trsm4m_l micro-kernels - -#ifndef BLIS_CTRSM4M_L_UKERNEL -#define BLIS_CTRSM4M_L_UKERNEL BLIS_CTRSM4M_L_UKERNEL_REF -#endif - -#ifndef BLIS_ZTRSM4M_L_UKERNEL -#define BLIS_ZTRSM4M_L_UKERNEL BLIS_ZTRSM4M_L_UKERNEL_REF -#endif - -// trsm4m_u micro-kernels - -#ifndef BLIS_CTRSM4M_U_UKERNEL -#define BLIS_CTRSM4M_U_UKERNEL BLIS_CTRSM4M_U_UKERNEL_REF -#endif - -#ifndef BLIS_ZTRSM4M_U_UKERNEL -#define BLIS_ZTRSM4M_U_UKERNEL BLIS_ZTRSM4M_U_UKERNEL_REF -#endif - -// -// Level-1m -// - -// packm_2xk_4m kernels - -#ifndef BLIS_CPACKM_2XK_4M_KERNEL -#define BLIS_CPACKM_2XK_4M_KERNEL BLIS_CPACKM_2XK_4M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_2XK_4M_KERNEL -#define BLIS_ZPACKM_2XK_4M_KERNEL BLIS_ZPACKM_2XK_4M_KERNEL_REF -#endif - -// packm_4xk_4m kernels - -#ifndef BLIS_CPACKM_4XK_4M_KERNEL -#define BLIS_CPACKM_4XK_4M_KERNEL BLIS_CPACKM_4XK_4M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_4XK_4M_KERNEL -#define BLIS_ZPACKM_4XK_4M_KERNEL BLIS_ZPACKM_4XK_4M_KERNEL_REF -#endif - -// packm_6xk_4m kernels - -#ifndef BLIS_CPACKM_6XK_4M_KERNEL -#define BLIS_CPACKM_6XK_4M_KERNEL BLIS_CPACKM_6XK_4M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_6XK_4M_KERNEL -#define BLIS_ZPACKM_6XK_4M_KERNEL BLIS_ZPACKM_6XK_4M_KERNEL_REF -#endif - -// packm_8xk_4m kernels - -#ifndef BLIS_CPACKM_8XK_4M_KERNEL -#define BLIS_CPACKM_8XK_4M_KERNEL BLIS_CPACKM_8XK_4M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_8XK_4M_KERNEL -#define BLIS_ZPACKM_8XK_4M_KERNEL BLIS_ZPACKM_8XK_4M_KERNEL_REF -#endif - -// packm_10xk_4m kernels - -#ifndef BLIS_CPACKM_10XK_4M_KERNEL -#define BLIS_CPACKM_10XK_4M_KERNEL BLIS_CPACKM_10XK_4M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_10XK_4M_KERNEL -#define BLIS_ZPACKM_10XK_4M_KERNEL BLIS_ZPACKM_10XK_4M_KERNEL_REF -#endif - -// packm_12xk_4m kernels - -#ifndef BLIS_CPACKM_12XK_4M_KERNEL -#define BLIS_CPACKM_12XK_4M_KERNEL BLIS_CPACKM_12XK_4M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_12XK_4M_KERNEL -#define BLIS_ZPACKM_12XK_4M_KERNEL BLIS_ZPACKM_12XK_4M_KERNEL_REF -#endif - -// packm_14xk_4m kernels - -#ifndef BLIS_CPACKM_14XK_4M_KERNEL -#define BLIS_CPACKM_14XK_4M_KERNEL BLIS_CPACKM_14XK_4M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_14XK_4M_KERNEL -#define BLIS_ZPACKM_14XK_4M_KERNEL BLIS_ZPACKM_14XK_4M_KERNEL_REF -#endif - -// packm_16xk_4m kernels - -#ifndef BLIS_CPACKM_16XK_4M_KERNEL -#define BLIS_CPACKM_16XK_4M_KERNEL BLIS_CPACKM_16XK_4M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_16XK_4M_KERNEL -#define BLIS_ZPACKM_16XK_4M_KERNEL BLIS_ZPACKM_16XK_4M_KERNEL_REF -#endif - -// packm_30xk_4m kernels - -#ifndef BLIS_CPACKM_30XK_4M_KERNEL -#define BLIS_CPACKM_30XK_4M_KERNEL BLIS_CPACKM_30XK_4M_KERNEL_REF -#endif - -#ifndef BLIS_ZPACKM_30XK_4M_KERNEL -#define BLIS_ZPACKM_30XK_4M_KERNEL BLIS_ZPACKM_30XK_4M_KERNEL_REF -#endif - - - -#endif diff --git a/frame/include/bli_kernel_4mb_macro_defs.h b/frame/include/bli_kernel_4mb_macro_defs.h new file mode 100644 index 000000000..edea2c4eb --- /dev/null +++ b/frame/include/bli_kernel_4mb_macro_defs.h @@ -0,0 +1,67 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_KERNEL_4MB_MACRO_DEFS_H +#define BLIS_KERNEL_4MB_MACRO_DEFS_H + + +// -- Define row access bools -------------------------------------------------- + +// gemm4mb micro-kernels + +#define BLIS_CGEMM4MB_UKERNEL_PREFERS_CONTIG_ROWS \ + BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS +#define BLIS_ZGEMM4MB_UKERNEL_PREFERS_CONTIG_ROWS \ + BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS + + +// -- Define default 4m-specific kernel names ---------------------------------- + +// +// Level-3 +// + +// gemm4mb micro-kernels + +#ifndef BLIS_CGEMM4MB_UKERNEL +#define BLIS_CGEMM4MB_UKERNEL BLIS_CGEMM4MB_UKERNEL_REF +#endif + +#ifndef BLIS_ZGEMM4MB_UKERNEL +#define BLIS_ZGEMM4MB_UKERNEL BLIS_ZGEMM4MB_UKERNEL_REF +#endif + + + +#endif diff --git a/frame/include/bli_kernel_4mh_macro_defs.h b/frame/include/bli_kernel_4mh_macro_defs.h new file mode 100644 index 000000000..598e724d0 --- /dev/null +++ b/frame/include/bli_kernel_4mh_macro_defs.h @@ -0,0 +1,67 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_KERNEL_4MH_MACRO_DEFS_H +#define BLIS_KERNEL_4MH_MACRO_DEFS_H + + +// -- Define 4mh row access bools ---------------------------------------------- + +// gemm4mh micro-kernels + +#define BLIS_CGEMM4MH_UKERNEL_PREFERS_CONTIG_ROWS \ + BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS +#define BLIS_ZGEMM4MH_UKERNEL_PREFERS_CONTIG_ROWS \ + BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS + + +// -- Define default 4mh-specific kernel names --------------------------------- + +// +// Level-3 +// + +// gemm4mh micro-kernels + +#ifndef BLIS_CGEMM4MH_UKERNEL +#define BLIS_CGEMM4MH_UKERNEL BLIS_CGEMM4MH_UKERNEL_REF +#endif + +#ifndef BLIS_ZGEMM4MH_UKERNEL +#define BLIS_ZGEMM4MH_UKERNEL BLIS_ZGEMM4MH_UKERNEL_REF +#endif + + + +#endif diff --git a/frame/include/bli_kernel_4mi_macro_defs.h b/frame/include/bli_kernel_4mi_macro_defs.h new file mode 100644 index 000000000..2f36de349 --- /dev/null +++ b/frame/include/bli_kernel_4mi_macro_defs.h @@ -0,0 +1,137 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_KERNEL_4MI_MACRO_DEFS_H +#define BLIS_KERNEL_4MI_MACRO_DEFS_H + + +// -- Define default 4mi-specific kernel names --------------------------------- + +// +// Level-1m +// + +// packm_2xk_4mi kernels + +#ifndef BLIS_CPACKM_2XK_4MI_KERNEL +#define BLIS_CPACKM_2XK_4MI_KERNEL BLIS_CPACKM_2XK_4MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_2XK_4MI_KERNEL +#define BLIS_ZPACKM_2XK_4MI_KERNEL BLIS_ZPACKM_2XK_4MI_KERNEL_REF +#endif + +// packm_4xk_4mi kernels + +#ifndef BLIS_CPACKM_4XK_4MI_KERNEL +#define BLIS_CPACKM_4XK_4MI_KERNEL BLIS_CPACKM_4XK_4MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_4XK_4MI_KERNEL +#define BLIS_ZPACKM_4XK_4MI_KERNEL BLIS_ZPACKM_4XK_4MI_KERNEL_REF +#endif + +// packm_6xk_4mi kernels + +#ifndef BLIS_CPACKM_6XK_4MI_KERNEL +#define BLIS_CPACKM_6XK_4MI_KERNEL BLIS_CPACKM_6XK_4MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_6XK_4MI_KERNEL +#define BLIS_ZPACKM_6XK_4MI_KERNEL BLIS_ZPACKM_6XK_4MI_KERNEL_REF +#endif + +// packm_8xk_4mi kernels + +#ifndef BLIS_CPACKM_8XK_4MI_KERNEL +#define BLIS_CPACKM_8XK_4MI_KERNEL BLIS_CPACKM_8XK_4MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_8XK_4MI_KERNEL +#define BLIS_ZPACKM_8XK_4MI_KERNEL BLIS_ZPACKM_8XK_4MI_KERNEL_REF +#endif + +// packm_10xk_4mi kernels + +#ifndef BLIS_CPACKM_10XK_4MI_KERNEL +#define BLIS_CPACKM_10XK_4MI_KERNEL BLIS_CPACKM_10XK_4MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_10XK_4MI_KERNEL +#define BLIS_ZPACKM_10XK_4MI_KERNEL BLIS_ZPACKM_10XK_4MI_KERNEL_REF +#endif + +// packm_12xk_4mi kernels + +#ifndef BLIS_CPACKM_12XK_4MI_KERNEL +#define BLIS_CPACKM_12XK_4MI_KERNEL BLIS_CPACKM_12XK_4MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_12XK_4MI_KERNEL +#define BLIS_ZPACKM_12XK_4MI_KERNEL BLIS_ZPACKM_12XK_4MI_KERNEL_REF +#endif + +// packm_14xk_4mi kernels + +#ifndef BLIS_CPACKM_14XK_4MI_KERNEL +#define BLIS_CPACKM_14XK_4MI_KERNEL BLIS_CPACKM_14XK_4MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_14XK_4MI_KERNEL +#define BLIS_ZPACKM_14XK_4MI_KERNEL BLIS_ZPACKM_14XK_4MI_KERNEL_REF +#endif + +// packm_16xk_4mi kernels + +#ifndef BLIS_CPACKM_16XK_4MI_KERNEL +#define BLIS_CPACKM_16XK_4MI_KERNEL BLIS_CPACKM_16XK_4MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_16XK_4MI_KERNEL +#define BLIS_ZPACKM_16XK_4MI_KERNEL BLIS_ZPACKM_16XK_4MI_KERNEL_REF +#endif + +// packm_30xk_4mi kernels + +#ifndef BLIS_CPACKM_30XK_4MI_KERNEL +#define BLIS_CPACKM_30XK_4MI_KERNEL BLIS_CPACKM_30XK_4MI_KERNEL_REF +#endif + +#ifndef BLIS_ZPACKM_30XK_4MI_KERNEL +#define BLIS_ZPACKM_30XK_4MI_KERNEL BLIS_ZPACKM_30XK_4MI_KERNEL_REF +#endif + + + +#endif diff --git a/frame/include/bli_kernel_macro_defs.h b/frame/include/bli_kernel_macro_defs.h index 6426e15a4..68b382930 100644 --- a/frame/include/bli_kernel_macro_defs.h +++ b/frame/include/bli_kernel_macro_defs.h @@ -77,12 +77,12 @@ // In this section we consider each datatype-specific micro-kernel macro; // if it is undefined, we define it to be the corresponding reference kernel. -// In the case of complex gemm micro-kernels, we also define special macros so -// that later on we can tell whether or not to employ the 4m implementations. -// Note that in order to properly determine whether/ 4m is a viable option, we -// need to be able to test the existence of the real gemm micro-kernels, which -// means we must consider the complex gemm micro-kernel cases *BEFORE* the -// real cases. +// In the case of complex gemm micro-kernels, we also define special macros +// so that later on we can tell whether or not to employ the induced +// implementations. Note that in order to properly determine whether the +// induced method is a viable option, we need to be able to test the +// existence of the real gemm micro-kernels, which means we must consider +// the complex gemm micro-kernel cases *BEFORE* the real cases. // // Level-3 diff --git a/frame/include/bli_kernel_post_macro_defs.h b/frame/include/bli_kernel_post_macro_defs.h index f92144b6f..10fa892a2 100644 --- a/frame/include/bli_kernel_post_macro_defs.h +++ b/frame/include/bli_kernel_post_macro_defs.h @@ -83,28 +83,28 @@ // gemm4m micro-kernels -#define bli_cGEMM4M_UKERNEL BLIS_CGEMM4M_UKERNEL -#define bli_zGEMM4M_UKERNEL BLIS_ZGEMM4M_UKERNEL +#define bli_cGEMM4M1_UKERNEL BLIS_CGEMM4M1_UKERNEL +#define bli_zGEMM4M1_UKERNEL BLIS_ZGEMM4M1_UKERNEL // gemmtrsm4m_l micro-kernels -#define bli_cGEMMTRSM4M_L_UKERNEL BLIS_CGEMMTRSM4M_L_UKERNEL -#define bli_zGEMMTRSM4M_L_UKERNEL BLIS_ZGEMMTRSM4M_L_UKERNEL +#define bli_cGEMMTRSM4M1_L_UKERNEL BLIS_CGEMMTRSM4M1_L_UKERNEL +#define bli_zGEMMTRSM4M1_L_UKERNEL BLIS_ZGEMMTRSM4M1_L_UKERNEL // gemmtrsm4m_u micro-kernels -#define bli_cGEMMTRSM4M_U_UKERNEL BLIS_CGEMMTRSM4M_U_UKERNEL -#define bli_zGEMMTRSM4M_U_UKERNEL BLIS_ZGEMMTRSM4M_U_UKERNEL +#define bli_cGEMMTRSM4M1_U_UKERNEL BLIS_CGEMMTRSM4M1_U_UKERNEL +#define bli_zGEMMTRSM4M1_U_UKERNEL BLIS_ZGEMMTRSM4M1_U_UKERNEL // trsm4m_l micro-kernels -#define bli_cTRSM4M_L_UKERNEL BLIS_CTRSM4M_L_UKERNEL -#define bli_zTRSM4M_L_UKERNEL BLIS_ZTRSM4M_L_UKERNEL +#define bli_cTRSM4M1_L_UKERNEL BLIS_CTRSM4M1_L_UKERNEL +#define bli_zTRSM4M1_L_UKERNEL BLIS_ZTRSM4M1_L_UKERNEL // trsm4m_u micro-kernels -#define bli_cTRSM4M_U_UKERNEL BLIS_CTRSM4M_U_UKERNEL -#define bli_zTRSM4M_U_UKERNEL BLIS_ZTRSM4M_U_UKERNEL +#define bli_cTRSM4M1_U_UKERNEL BLIS_CTRSM4M1_U_UKERNEL +#define bli_zTRSM4M1_U_UKERNEL BLIS_ZTRSM4M1_U_UKERNEL // // Level-3 3m @@ -112,28 +112,28 @@ // gemm3m micro-kernels -#define bli_cGEMM3M_UKERNEL BLIS_CGEMM3M_UKERNEL -#define bli_zGEMM3M_UKERNEL BLIS_ZGEMM3M_UKERNEL +#define bli_cGEMM3M1_UKERNEL BLIS_CGEMM3M1_UKERNEL +#define bli_zGEMM3M1_UKERNEL BLIS_ZGEMM3M1_UKERNEL // gemmtrsm3m_l micro-kernels -#define bli_cGEMMTRSM3M_L_UKERNEL BLIS_CGEMMTRSM3M_L_UKERNEL -#define bli_zGEMMTRSM3M_L_UKERNEL BLIS_ZGEMMTRSM3M_L_UKERNEL +#define bli_cGEMMTRSM3M1_L_UKERNEL BLIS_CGEMMTRSM3M1_L_UKERNEL +#define bli_zGEMMTRSM3M1_L_UKERNEL BLIS_ZGEMMTRSM3M1_L_UKERNEL // gemmtrsm3m_u micro-kernels -#define bli_cGEMMTRSM3M_U_UKERNEL BLIS_CGEMMTRSM3M_U_UKERNEL -#define bli_zGEMMTRSM3M_U_UKERNEL BLIS_ZGEMMTRSM3M_U_UKERNEL +#define bli_cGEMMTRSM3M1_U_UKERNEL BLIS_CGEMMTRSM3M1_U_UKERNEL +#define bli_zGEMMTRSM3M1_U_UKERNEL BLIS_ZGEMMTRSM3M1_U_UKERNEL // trsm3m_l micro-kernels -#define bli_cTRSM3M_L_UKERNEL BLIS_CTRSM3M_L_UKERNEL -#define bli_zTRSM3M_L_UKERNEL BLIS_ZTRSM3M_L_UKERNEL +#define bli_cTRSM3M1_L_UKERNEL BLIS_CTRSM3M1_L_UKERNEL +#define bli_zTRSM3M1_L_UKERNEL BLIS_ZTRSM3M1_L_UKERNEL // trsm3m_u micro-kernels -#define bli_cTRSM3M_U_UKERNEL BLIS_CTRSM3M_U_UKERNEL -#define bli_zTRSM3M_U_UKERNEL BLIS_ZTRSM3M_U_UKERNEL +#define bli_cTRSM3M1_U_UKERNEL BLIS_CTRSM3M1_U_UKERNEL +#define bli_zTRSM3M1_U_UKERNEL BLIS_ZTRSM3M1_U_UKERNEL // // Level-1m diff --git a/frame/include/bli_kernel_pre_macro_defs.h b/frame/include/bli_kernel_pre_macro_defs.h index 7648583e1..ad0379ab3 100644 --- a/frame/include/bli_kernel_pre_macro_defs.h +++ b/frame/include/bli_kernel_pre_macro_defs.h @@ -76,64 +76,6 @@ #define BLIS_CTRSM_U_UKERNEL_REF bli_ctrsm_u_ukr_ref #define BLIS_ZTRSM_U_UKERNEL_REF bli_ztrsm_u_ukr_ref -// -// Level-3 4m -// - -// gemm4m micro-kernels - -#define BLIS_CGEMM4M_UKERNEL_REF bli_cgemm4m_ukr_ref -#define BLIS_ZGEMM4M_UKERNEL_REF bli_zgemm4m_ukr_ref - -// gemmtrsm4m_l micro-kernels - -#define BLIS_CGEMMTRSM4M_L_UKERNEL_REF bli_cgemmtrsm4m_l_ukr_ref -#define BLIS_ZGEMMTRSM4M_L_UKERNEL_REF bli_zgemmtrsm4m_l_ukr_ref - -// gemmtrsm4m_u micro-kernels - -#define BLIS_CGEMMTRSM4M_U_UKERNEL_REF bli_cgemmtrsm4m_u_ukr_ref -#define BLIS_ZGEMMTRSM4M_U_UKERNEL_REF bli_zgemmtrsm4m_u_ukr_ref - -// trsm4m_l micro-kernels - -#define BLIS_CTRSM4M_L_UKERNEL_REF bli_ctrsm4m_l_ukr_ref -#define BLIS_ZTRSM4M_L_UKERNEL_REF bli_ztrsm4m_l_ukr_ref - -// trsm4m_u micro-kernels - -#define BLIS_CTRSM4M_U_UKERNEL_REF bli_ctrsm4m_u_ukr_ref -#define BLIS_ZTRSM4M_U_UKERNEL_REF bli_ztrsm4m_u_ukr_ref - -// -// Level-3 3m -// - -// gemm3m micro-kernels - -#define BLIS_CGEMM3M_UKERNEL_REF bli_cgemm3m_ukr_ref -#define BLIS_ZGEMM3M_UKERNEL_REF bli_zgemm3m_ukr_ref - -// gemmtrsm3m_l micro-kernels - -#define BLIS_CGEMMTRSM3M_L_UKERNEL_REF bli_cgemmtrsm3m_l_ukr_ref -#define BLIS_ZGEMMTRSM3M_L_UKERNEL_REF bli_zgemmtrsm3m_l_ukr_ref - -// gemmtrsm3m_u micro-kernels - -#define BLIS_CGEMMTRSM3M_U_UKERNEL_REF bli_cgemmtrsm3m_u_ukr_ref -#define BLIS_ZGEMMTRSM3M_U_UKERNEL_REF bli_zgemmtrsm3m_u_ukr_ref - -// trsm3m_l micro-kernels - -#define BLIS_CTRSM3M_L_UKERNEL_REF bli_ctrsm3m_l_ukr_ref -#define BLIS_ZTRSM3M_L_UKERNEL_REF bli_ztrsm3m_l_ukr_ref - -// trsm3m_u micro-kernels - -#define BLIS_CTRSM3M_U_UKERNEL_REF bli_ctrsm3m_u_ukr_ref -#define BLIS_ZTRSM3M_U_UKERNEL_REF bli_ztrsm3m_u_ukr_ref - // // Level-3 4mh // @@ -143,6 +85,44 @@ #define BLIS_CGEMM4MH_UKERNEL_REF bli_cgemm4mh_ukr_ref #define BLIS_ZGEMM4MH_UKERNEL_REF bli_zgemm4mh_ukr_ref +// +// Level-3 4mb +// + +// gemm4mb micro-kernels + +#define BLIS_CGEMM4MB_UKERNEL_REF bli_cgemm4mb_ukr_ref +#define BLIS_ZGEMM4MB_UKERNEL_REF bli_zgemm4mb_ukr_ref + +// +// Level-3 4m1 +// + +// gemm4m1 micro-kernels + +#define BLIS_CGEMM4M1_UKERNEL_REF bli_cgemm4m1_ukr_ref +#define BLIS_ZGEMM4M1_UKERNEL_REF bli_zgemm4m1_ukr_ref + +// gemmtrsm4m1_l micro-kernels + +#define BLIS_CGEMMTRSM4M1_L_UKERNEL_REF bli_cgemmtrsm4m1_l_ukr_ref +#define BLIS_ZGEMMTRSM4M1_L_UKERNEL_REF bli_zgemmtrsm4m1_l_ukr_ref + +// gemmtrsm4m1_u micro-kernels + +#define BLIS_CGEMMTRSM4M1_U_UKERNEL_REF bli_cgemmtrsm4m1_u_ukr_ref +#define BLIS_ZGEMMTRSM4M1_U_UKERNEL_REF bli_zgemmtrsm4m1_u_ukr_ref + +// trsm4m1_l micro-kernels + +#define BLIS_CTRSM4M1_L_UKERNEL_REF bli_ctrsm4m1_l_ukr_ref +#define BLIS_ZTRSM4M1_L_UKERNEL_REF bli_ztrsm4m1_l_ukr_ref + +// trsm4m1_u micro-kernels + +#define BLIS_CTRSM4M1_U_UKERNEL_REF bli_ctrsm4m1_u_ukr_ref +#define BLIS_ZTRSM4M1_U_UKERNEL_REF bli_ztrsm4m1_u_ukr_ref + // // Level-3 3mh // @@ -153,13 +133,33 @@ #define BLIS_ZGEMM3MH_UKERNEL_REF bli_zgemm3mh_ukr_ref // -// Level-3 4mb +// Level-3 3m1 // -// gemm4mb micro-kernels +// gemm3m1 micro-kernels -#define BLIS_CGEMM4MB_UKERNEL_REF bli_cgemm4mb_ukr_ref -#define BLIS_ZGEMM4MB_UKERNEL_REF bli_zgemm4mb_ukr_ref +#define BLIS_CGEMM3M1_UKERNEL_REF bli_cgemm3m1_ukr_ref +#define BLIS_ZGEMM3M1_UKERNEL_REF bli_zgemm3m1_ukr_ref + +// gemmtrsm3m1_l micro-kernels + +#define BLIS_CGEMMTRSM3M1_L_UKERNEL_REF bli_cgemmtrsm3m1_l_ukr_ref +#define BLIS_ZGEMMTRSM3M1_L_UKERNEL_REF bli_zgemmtrsm3m1_l_ukr_ref + +// gemmtrsm3m1_u micro-kernels + +#define BLIS_CGEMMTRSM3M1_U_UKERNEL_REF bli_cgemmtrsm3m1_u_ukr_ref +#define BLIS_ZGEMMTRSM3M1_U_UKERNEL_REF bli_zgemmtrsm3m1_u_ukr_ref + +// trsm3m1_l micro-kernels + +#define BLIS_CTRSM3M1_L_UKERNEL_REF bli_ctrsm3m1_l_ukr_ref +#define BLIS_ZTRSM3M1_L_UKERNEL_REF bli_ztrsm3m1_l_ukr_ref + +// trsm3m1_u micro-kernels + +#define BLIS_CTRSM3M1_U_UKERNEL_REF bli_ctrsm3m1_u_ukr_ref +#define BLIS_ZTRSM3M1_U_UKERNEL_REF bli_ztrsm3m1_u_ukr_ref // // Level-1m @@ -235,95 +235,95 @@ #define BLIS_CPACKM_30XK_KERNEL_REF bli_cpackm_ref_30xk #define BLIS_ZPACKM_30XK_KERNEL_REF bli_zpackm_ref_30xk -// packm_2xk_4m kernels +// packm_2xk_4mi kernels -#define BLIS_CPACKM_2XK_4M_KERNEL_REF bli_cpackm_ref_2xk_4m -#define BLIS_ZPACKM_2XK_4M_KERNEL_REF bli_zpackm_ref_2xk_4m +#define BLIS_CPACKM_2XK_4MI_KERNEL_REF bli_cpackm_ref_2xk_4mi +#define BLIS_ZPACKM_2XK_4MI_KERNEL_REF bli_zpackm_ref_2xk_4mi -// packm_4xk_4m kernels +// packm_4xk_4mi kernels -#define BLIS_CPACKM_4XK_4M_KERNEL_REF bli_cpackm_ref_4xk_4m -#define BLIS_ZPACKM_4XK_4M_KERNEL_REF bli_zpackm_ref_4xk_4m +#define BLIS_CPACKM_4XK_4MI_KERNEL_REF bli_cpackm_ref_4xk_4mi +#define BLIS_ZPACKM_4XK_4MI_KERNEL_REF bli_zpackm_ref_4xk_4mi -// packm_6xk_4m kernels +// packm_6xk_4mi kernels -#define BLIS_CPACKM_6XK_4M_KERNEL_REF bli_cpackm_ref_6xk_4m -#define BLIS_ZPACKM_6XK_4M_KERNEL_REF bli_zpackm_ref_6xk_4m +#define BLIS_CPACKM_6XK_4MI_KERNEL_REF bli_cpackm_ref_6xk_4mi +#define BLIS_ZPACKM_6XK_4MI_KERNEL_REF bli_zpackm_ref_6xk_4mi -// packm_8xk_4m kernels +// packm_8xk_4mi kernels -#define BLIS_CPACKM_8XK_4M_KERNEL_REF bli_cpackm_ref_8xk_4m -#define BLIS_ZPACKM_8XK_4M_KERNEL_REF bli_zpackm_ref_8xk_4m +#define BLIS_CPACKM_8XK_4MI_KERNEL_REF bli_cpackm_ref_8xk_4mi +#define BLIS_ZPACKM_8XK_4MI_KERNEL_REF bli_zpackm_ref_8xk_4mi -// packm_10xk_4m kernels +// packm_10xk_4mi kernels -#define BLIS_CPACKM_10XK_4M_KERNEL_REF bli_cpackm_ref_10xk_4m -#define BLIS_ZPACKM_10XK_4M_KERNEL_REF bli_zpackm_ref_10xk_4m +#define BLIS_CPACKM_10XK_4MI_KERNEL_REF bli_cpackm_ref_10xk_4mi +#define BLIS_ZPACKM_10XK_4MI_KERNEL_REF bli_zpackm_ref_10xk_4mi -// packm_12xk_4m kernels +// packm_12xk_4mi kernels -#define BLIS_CPACKM_12XK_4M_KERNEL_REF bli_cpackm_ref_12xk_4m -#define BLIS_ZPACKM_12XK_4M_KERNEL_REF bli_zpackm_ref_12xk_4m +#define BLIS_CPACKM_12XK_4MI_KERNEL_REF bli_cpackm_ref_12xk_4mi +#define BLIS_ZPACKM_12XK_4MI_KERNEL_REF bli_zpackm_ref_12xk_4mi -// packm_14xk_4m kernels +// packm_14xk_4mi kernels -#define BLIS_CPACKM_14XK_4M_KERNEL_REF bli_cpackm_ref_14xk_4m -#define BLIS_ZPACKM_14XK_4M_KERNEL_REF bli_zpackm_ref_14xk_4m +#define BLIS_CPACKM_14XK_4MI_KERNEL_REF bli_cpackm_ref_14xk_4mi +#define BLIS_ZPACKM_14XK_4MI_KERNEL_REF bli_zpackm_ref_14xk_4mi -// packm_16xk_4m kernels +// packm_16xk_4mi kernels -#define BLIS_CPACKM_16XK_4M_KERNEL_REF bli_cpackm_ref_16xk_4m -#define BLIS_ZPACKM_16XK_4M_KERNEL_REF bli_zpackm_ref_16xk_4m +#define BLIS_CPACKM_16XK_4MI_KERNEL_REF bli_cpackm_ref_16xk_4mi +#define BLIS_ZPACKM_16XK_4MI_KERNEL_REF bli_zpackm_ref_16xk_4mi -// packm_30xk_4m kernels +// packm_30xk_4mi kernels -#define BLIS_CPACKM_30XK_4M_KERNEL_REF bli_cpackm_ref_30xk_4m -#define BLIS_ZPACKM_30XK_4M_KERNEL_REF bli_zpackm_ref_30xk_4m +#define BLIS_CPACKM_30XK_4MI_KERNEL_REF bli_cpackm_ref_30xk_4mi +#define BLIS_ZPACKM_30XK_4MI_KERNEL_REF bli_zpackm_ref_30xk_4mi -// packm_2xk_3m kernels +// packm_2xk_3mi kernels -#define BLIS_CPACKM_2XK_3M_KERNEL_REF bli_cpackm_ref_2xk_3m -#define BLIS_ZPACKM_2XK_3M_KERNEL_REF bli_zpackm_ref_2xk_3m +#define BLIS_CPACKM_2XK_3MI_KERNEL_REF bli_cpackm_ref_2xk_3mi +#define BLIS_ZPACKM_2XK_3MI_KERNEL_REF bli_zpackm_ref_2xk_3mi -// packm_4xk_3m kernels +// packm_4xk_3mi kernels -#define BLIS_CPACKM_4XK_3M_KERNEL_REF bli_cpackm_ref_4xk_3m -#define BLIS_ZPACKM_4XK_3M_KERNEL_REF bli_zpackm_ref_4xk_3m +#define BLIS_CPACKM_4XK_3MI_KERNEL_REF bli_cpackm_ref_4xk_3mi +#define BLIS_ZPACKM_4XK_3MI_KERNEL_REF bli_zpackm_ref_4xk_3mi -// packm_6xk_3m kernels +// packm_6xk_3mi kernels -#define BLIS_CPACKM_6XK_3M_KERNEL_REF bli_cpackm_ref_6xk_3m -#define BLIS_ZPACKM_6XK_3M_KERNEL_REF bli_zpackm_ref_6xk_3m +#define BLIS_CPACKM_6XK_3MI_KERNEL_REF bli_cpackm_ref_6xk_3mi +#define BLIS_ZPACKM_6XK_3MI_KERNEL_REF bli_zpackm_ref_6xk_3mi -// packm_8xk_3m kernels +// packm_8xk_3mi kernels -#define BLIS_CPACKM_8XK_3M_KERNEL_REF bli_cpackm_ref_8xk_3m -#define BLIS_ZPACKM_8XK_3M_KERNEL_REF bli_zpackm_ref_8xk_3m +#define BLIS_CPACKM_8XK_3MI_KERNEL_REF bli_cpackm_ref_8xk_3mi +#define BLIS_ZPACKM_8XK_3MI_KERNEL_REF bli_zpackm_ref_8xk_3mi -// packm_10xk_3m kernels +// packm_10xk_3mi kernels -#define BLIS_CPACKM_10XK_3M_KERNEL_REF bli_cpackm_ref_10xk_3m -#define BLIS_ZPACKM_10XK_3M_KERNEL_REF bli_zpackm_ref_10xk_3m +#define BLIS_CPACKM_10XK_3MI_KERNEL_REF bli_cpackm_ref_10xk_3mi +#define BLIS_ZPACKM_10XK_3MI_KERNEL_REF bli_zpackm_ref_10xk_3mi -// packm_12xk_3m kernels +// packm_12xk_3mi kernels -#define BLIS_CPACKM_12XK_3M_KERNEL_REF bli_cpackm_ref_12xk_3m -#define BLIS_ZPACKM_12XK_3M_KERNEL_REF bli_zpackm_ref_12xk_3m +#define BLIS_CPACKM_12XK_3MI_KERNEL_REF bli_cpackm_ref_12xk_3mi +#define BLIS_ZPACKM_12XK_3MI_KERNEL_REF bli_zpackm_ref_12xk_3mi -// packm_14xk_3m kernels +// packm_14xk_3mi kernels -#define BLIS_CPACKM_14XK_3M_KERNEL_REF bli_cpackm_ref_14xk_3m -#define BLIS_ZPACKM_14XK_3M_KERNEL_REF bli_zpackm_ref_14xk_3m +#define BLIS_CPACKM_14XK_3MI_KERNEL_REF bli_cpackm_ref_14xk_3mi +#define BLIS_ZPACKM_14XK_3MI_KERNEL_REF bli_zpackm_ref_14xk_3mi -// packm_16xk_3m kernels +// packm_16xk_3mi kernels -#define BLIS_CPACKM_16XK_3M_KERNEL_REF bli_cpackm_ref_16xk_3m -#define BLIS_ZPACKM_16XK_3M_KERNEL_REF bli_zpackm_ref_16xk_3m +#define BLIS_CPACKM_16XK_3MI_KERNEL_REF bli_cpackm_ref_16xk_3mi +#define BLIS_ZPACKM_16XK_3MI_KERNEL_REF bli_zpackm_ref_16xk_3mi -// packm_30xk_3m kernels +// packm_30xk_3mi kernels -#define BLIS_CPACKM_30XK_3M_KERNEL_REF bli_cpackm_ref_30xk_3m -#define BLIS_ZPACKM_30XK_3M_KERNEL_REF bli_zpackm_ref_30xk_3m +#define BLIS_CPACKM_30XK_3MI_KERNEL_REF bli_cpackm_ref_30xk_3mi +#define BLIS_ZPACKM_30XK_3MI_KERNEL_REF bli_zpackm_ref_30xk_3mi // packm_2xk_rih kernels diff --git a/frame/include/bli_kernel_prototypes.h b/frame/include/bli_kernel_prototypes.h index 7607872e7..7fce7987b 100644 --- a/frame/include/bli_kernel_prototypes.h +++ b/frame/include/bli_kernel_prototypes.h @@ -160,8 +160,8 @@ INSERT_GENTPROT_BASIC( TRSM_U_UKERNEL ) // gemm4m micro-kernels -#define bli_cGEMM4M_UKERNEL BLIS_CGEMM4M_UKERNEL -#define bli_zGEMM4M_UKERNEL BLIS_ZGEMM4M_UKERNEL +#define bli_cGEMM4M1_UKERNEL BLIS_CGEMM4M1_UKERNEL +#define bli_zGEMM4M1_UKERNEL BLIS_ZGEMM4M1_UKERNEL #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \ @@ -177,12 +177,12 @@ void PASTEMAC(ch,kername) \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( GEMM4M_UKERNEL ) +INSERT_GENTPROTCO_BASIC( GEMM4M1_UKERNEL ) // gemmtrsm4m_l micro-kernels -#define bli_cGEMMTRSM4M_L_UKERNEL BLIS_CGEMMTRSM4M_L_UKERNEL -#define bli_zGEMMTRSM4M_L_UKERNEL BLIS_ZGEMMTRSM4M_L_UKERNEL +#define bli_cGEMMTRSM4M1_L_UKERNEL BLIS_CGEMMTRSM4M1_L_UKERNEL +#define bli_zGEMMTRSM4M1_L_UKERNEL BLIS_ZGEMMTRSM4M1_L_UKERNEL #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \ @@ -199,12 +199,12 @@ void PASTEMAC(ch,kername) \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( GEMMTRSM4M_L_UKERNEL ) +INSERT_GENTPROTCO_BASIC( GEMMTRSM4M1_L_UKERNEL ) // gemmtrsm4m_u micro-kernels -#define bli_cGEMMTRSM4M_U_UKERNEL BLIS_CGEMMTRSM4M_U_UKERNEL -#define bli_zGEMMTRSM4M_U_UKERNEL BLIS_ZGEMMTRSM4M_U_UKERNEL +#define bli_cGEMMTRSM4M1_U_UKERNEL BLIS_CGEMMTRSM4M1_U_UKERNEL +#define bli_zGEMMTRSM4M1_U_UKERNEL BLIS_ZGEMMTRSM4M1_U_UKERNEL #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \ @@ -221,12 +221,12 @@ void PASTEMAC(ch,kername) \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( GEMMTRSM4M_U_UKERNEL ) +INSERT_GENTPROTCO_BASIC( GEMMTRSM4M1_U_UKERNEL ) // trsm4m_l micro-kernels -#define bli_cTRSM4M_L_UKERNEL BLIS_CTRSM4M_L_UKERNEL -#define bli_zTRSM4M_L_UKERNEL BLIS_ZTRSM4M_L_UKERNEL +#define bli_cTRSM4M1_L_UKERNEL BLIS_CTRSM4M1_L_UKERNEL +#define bli_zTRSM4M1_L_UKERNEL BLIS_ZTRSM4M1_L_UKERNEL #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \ @@ -239,12 +239,12 @@ void PASTEMAC(ch,kername) \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( TRSM4M_L_UKERNEL ) +INSERT_GENTPROTCO_BASIC( TRSM4M1_L_UKERNEL ) // trsm4m_u micro-kernels -#define bli_cTRSM4M_U_UKERNEL BLIS_CTRSM4M_U_UKERNEL -#define bli_zTRSM4M_U_UKERNEL BLIS_ZTRSM4M_U_UKERNEL +#define bli_cTRSM4M1_U_UKERNEL BLIS_CTRSM4M1_U_UKERNEL +#define bli_zTRSM4M1_U_UKERNEL BLIS_ZTRSM4M1_U_UKERNEL #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \ @@ -257,7 +257,7 @@ void PASTEMAC(ch,kername) \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( TRSM4M_U_UKERNEL ) +INSERT_GENTPROTCO_BASIC( TRSM4M1_U_UKERNEL ) // @@ -266,8 +266,8 @@ INSERT_GENTPROTCO_BASIC( TRSM4M_U_UKERNEL ) // gemm3m micro-kernels -#define bli_cGEMM3M_UKERNEL BLIS_CGEMM3M_UKERNEL -#define bli_zGEMM3M_UKERNEL BLIS_ZGEMM3M_UKERNEL +#define bli_cGEMM3M1_UKERNEL BLIS_CGEMM3M1_UKERNEL +#define bli_zGEMM3M1_UKERNEL BLIS_ZGEMM3M1_UKERNEL #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \ @@ -283,12 +283,12 @@ void PASTEMAC(ch,kername) \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( GEMM3M_UKERNEL ) +INSERT_GENTPROTCO_BASIC( GEMM3M1_UKERNEL ) // gemmtrsm3m_l micro-kernels -#define bli_cGEMMTRSM3M_L_UKERNEL BLIS_CGEMMTRSM3M_L_UKERNEL -#define bli_zGEMMTRSM3M_L_UKERNEL BLIS_ZGEMMTRSM3M_L_UKERNEL +#define bli_cGEMMTRSM3M1_L_UKERNEL BLIS_CGEMMTRSM3M1_L_UKERNEL +#define bli_zGEMMTRSM3M1_L_UKERNEL BLIS_ZGEMMTRSM3M1_L_UKERNEL #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \ @@ -305,12 +305,12 @@ void PASTEMAC(ch,kername) \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( GEMMTRSM3M_L_UKERNEL ) +INSERT_GENTPROTCO_BASIC( GEMMTRSM3M1_L_UKERNEL ) // gemmtrsm3m_u micro-kernels -#define bli_cGEMMTRSM3M_U_UKERNEL BLIS_CGEMMTRSM3M_U_UKERNEL -#define bli_zGEMMTRSM3M_U_UKERNEL BLIS_ZGEMMTRSM3M_U_UKERNEL +#define bli_cGEMMTRSM3M1_U_UKERNEL BLIS_CGEMMTRSM3M1_U_UKERNEL +#define bli_zGEMMTRSM3M1_U_UKERNEL BLIS_ZGEMMTRSM3M1_U_UKERNEL #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \ @@ -327,12 +327,12 @@ void PASTEMAC(ch,kername) \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( GEMMTRSM3M_U_UKERNEL ) +INSERT_GENTPROTCO_BASIC( GEMMTRSM3M1_U_UKERNEL ) // trsm3m_l micro-kernels -#define bli_cTRSM3M_L_UKERNEL BLIS_CTRSM3M_L_UKERNEL -#define bli_zTRSM3M_L_UKERNEL BLIS_ZTRSM3M_L_UKERNEL +#define bli_cTRSM3M1_L_UKERNEL BLIS_CTRSM3M1_L_UKERNEL +#define bli_zTRSM3M1_L_UKERNEL BLIS_ZTRSM3M1_L_UKERNEL #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \ @@ -345,12 +345,12 @@ void PASTEMAC(ch,kername) \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( TRSM3M_L_UKERNEL ) +INSERT_GENTPROTCO_BASIC( TRSM3M1_L_UKERNEL ) // trsm3m_u micro-kernels -#define bli_cTRSM3M_U_UKERNEL BLIS_CTRSM3M_U_UKERNEL -#define bli_zTRSM3M_U_UKERNEL BLIS_ZTRSM3M_U_UKERNEL +#define bli_cTRSM3M1_U_UKERNEL BLIS_CTRSM3M1_U_UKERNEL +#define bli_zTRSM3M1_U_UKERNEL BLIS_ZTRSM3M1_U_UKERNEL #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, kername ) \ @@ -363,7 +363,7 @@ void PASTEMAC(ch,kername) \ auxinfo_t* data \ ); -INSERT_GENTPROTCO_BASIC( TRSM3M_U_UKERNEL ) +INSERT_GENTPROTCO_BASIC( TRSM3M1_U_UKERNEL ) // diff --git a/frame/include/bli_kernel_rih_macro_defs.h b/frame/include/bli_kernel_rih_macro_defs.h index 94d45e93a..543d197a0 100644 --- a/frame/include/bli_kernel_rih_macro_defs.h +++ b/frame/include/bli_kernel_rih_macro_defs.h @@ -36,48 +36,7 @@ #define BLIS_KERNEL_RIH_MACRO_DEFS_H -// -- Define 4mh/3mh row access bools ------------------------------------------ - -// gemm4mh micro-kernels - -#define BLIS_CGEMM4MH_UKERNEL_PREFERS_CONTIG_ROWS \ - BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS -#define BLIS_ZGEMM4MH_UKERNEL_PREFERS_CONTIG_ROWS \ - BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS - -// gemm3mh micro-kernels - -#define BLIS_CGEMM3MH_UKERNEL_PREFERS_CONTIG_ROWS \ - BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS -#define BLIS_ZGEMM3MH_UKERNEL_PREFERS_CONTIG_ROWS \ - BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS - - -// -- Define default 4mh/3mh-specific kernel names ----------------------------- - -// -// Level-3 -// - -// gemm4mh micro-kernels - -#ifndef BLIS_CGEMM4MH_UKERNEL -#define BLIS_CGEMM4MH_UKERNEL BLIS_CGEMM4MH_UKERNEL_REF -#endif - -#ifndef BLIS_ZGEMM4MH_UKERNEL -#define BLIS_ZGEMM4MH_UKERNEL BLIS_ZGEMM4MH_UKERNEL_REF -#endif - -// gemm3mh micro-kernels - -#ifndef BLIS_CGEMM3MH_UKERNEL -#define BLIS_CGEMM3MH_UKERNEL BLIS_CGEMM3MH_UKERNEL_REF -#endif - -#ifndef BLIS_ZGEMM3MH_UKERNEL -#define BLIS_ZGEMM3MH_UKERNEL BLIS_ZGEMM3MH_UKERNEL_REF -#endif +// -- Define default rih-specific kernel names --------------------------------- // // Level-1m diff --git a/frame/include/blis.h b/frame/include/blis.h index 61adfdb4e..66ccbb39a 100644 --- a/frame/include/blis.h +++ b/frame/include/blis.h @@ -83,9 +83,18 @@ extern "C" { #include "bli_kernel_type_defs.h" #include "bli_kernel_pre_macro_defs.h" #include "bli_kernel_macro_defs.h" -#include "bli_kernel_4m_macro_defs.h" -#include "bli_kernel_3m_macro_defs.h" + +#include "bli_kernel_4mh_macro_defs.h" +#include "bli_kernel_4mb_macro_defs.h" +#include "bli_kernel_4m1_macro_defs.h" + +#include "bli_kernel_3mh_macro_defs.h" +#include "bli_kernel_3m1_macro_defs.h" + +#include "bli_kernel_4mi_macro_defs.h" +#include "bli_kernel_3mi_macro_defs.h" #include "bli_kernel_rih_macro_defs.h" + #include "bli_kernel_post_macro_defs.h" #include "bli_kernel_prototypes.h" @@ -120,7 +129,7 @@ extern "C" { #include "bli_4mb.h" #include "bli_4m.h" #include "bli_3mh.h" -#include "bli_3m.h" +#include "bli_3m1.h" // Control tree definitions. #include "bli_cntl.h" diff --git a/test/3m4m/Makefile b/test/3m4m/Makefile index 78acaf01c..07d9fd708 100644 --- a/test/3m4m/Makefile +++ b/test/3m4m/Makefile @@ -178,20 +178,20 @@ BLI_DEF := -DBLIS BLA_DEF := -DBLAS # Complex implementation type -D4M1 := -D_4M1 -D4M1B := -D_4M1B D4MHW := -D_4MHW -D3M1 := -D_3M1 +D4M1B := -D_4M1B +D4M1A := -D_4M1A D3MHW := -D_3MHW +D3M1 := -D_3M1 # Implementation string STR_OBL := -DSTR=\"openblas\" STR_ASM := -DSTR=\"asm\" -STR_4M1 := -DSTR=\"4m1\" -STR_4M1B := -DSTR=\"4m1b\" STR_4MHW := -DSTR=\"4mhw\" -STR_3M1 := -DSTR=\"3m1\" +STR_4M1B := -DSTR=\"4m1b\" +STR_4M1A := -DSTR=\"4m1a\" STR_3MHW := -DSTR=\"3mhw\" +STR_3M1 := -DSTR=\"3m1\" # Single or multithreaded string STR_ST := -DTHR_STR=\"st\" @@ -225,8 +225,8 @@ blis: \ \ test_cgemm_asm_blis_st.x \ test_zgemm_asm_blis_st.x \ - test_cgemm_4m1_blis_st.x \ - test_zgemm_4m1_blis_st.x \ + test_cgemm_4m1a_blis_st.x \ + test_zgemm_4m1a_blis_st.x \ test_cgemm_4m1b_blis_st.x \ test_zgemm_4m1b_blis_st.x \ test_cgemm_4mhw_blis_st.x \ @@ -242,8 +242,8 @@ blis-mt: \ \ test_cgemm_asm_blis_mt.x \ test_zgemm_asm_blis_mt.x \ - test_cgemm_4m1_blis_mt.x \ - test_zgemm_4m1_blis_mt.x \ + test_cgemm_4m1a_blis_mt.x \ + test_zgemm_4m1a_blis_mt.x \ test_cgemm_4m1b_blis_mt.x \ test_zgemm_4m1b_blis_mt.x \ test_cgemm_4mhw_blis_mt.x \ @@ -312,18 +312,18 @@ test_z%_asm_blis_mt.o: test_%.c test_c%_asm_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(STR_ASM) $(STR_MT) -c $< -o $@ -# blis 4m1 -test_z%_4m1_blis_st.o: test_%.c - $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4M1) $(STR_4M1) $(STR_ST) -c $< -o $@ +# blis 4mhw +test_z%_4mhw_blis_st.o: test_%.c + $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_ST) -c $< -o $@ -test_c%_4m1_blis_st.o: test_%.c - $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4M1) $(STR_4M1) $(STR_ST) -c $< -o $@ +test_c%_4mhw_blis_st.o: test_%.c + $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_ST) -c $< -o $@ -test_z%_4m1_blis_mt.o: test_%.c - $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4M1) $(STR_4M1) $(STR_MT) -c $< -o $@ +test_z%_4mhw_blis_mt.o: test_%.c + $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_MT) -c $< -o $@ -test_c%_4m1_blis_mt.o: test_%.c - $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4M1) $(STR_4M1) $(STR_MT) -c $< -o $@ +test_c%_4mhw_blis_mt.o: test_%.c + $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_MT) -c $< -o $@ # blis 4m1b test_z%_4m1b_blis_st.o: test_%.c @@ -338,31 +338,18 @@ test_z%_4m1b_blis_mt.o: test_%.c test_c%_4m1b_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_MT) -c $< -o $@ -# blis 4mhw -test_z%_4mhw_blis_st.o: test_%.c - $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_ST) -c $< -o $@ +# blis 4m1a +test_z%_4m1a_blis_st.o: test_%.c + $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_ST) -c $< -o $@ -test_c%_4mhw_blis_st.o: test_%.c - $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_ST) -c $< -o $@ +test_c%_4m1a_blis_st.o: test_%.c + $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_ST) -c $< -o $@ -test_z%_4mhw_blis_mt.o: test_%.c - $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_MT) -c $< -o $@ +test_z%_4m1a_blis_mt.o: test_%.c + $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_MT) -c $< -o $@ -test_c%_4mhw_blis_mt.o: test_%.c - $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_MT) -c $< -o $@ - -# blis 3m1 -test_z%_3m1_blis_st.o: test_%.c - $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_ST) -c $< -o $@ - -test_c%_3m1_blis_st.o: test_%.c - $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_ST) -c $< -o $@ - -test_z%_3m1_blis_mt.o: test_%.c - $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_MT) -c $< -o $@ - -test_c%_3m1_blis_mt.o: test_%.c - $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_MT) -c $< -o $@ +test_c%_4m1a_blis_mt.o: test_%.c + $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_MT) -c $< -o $@ # blis 3mhw test_z%_3mhw_blis_st.o: test_%.c @@ -377,6 +364,19 @@ test_z%_3mhw_blis_mt.o: test_%.c test_c%_3mhw_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_MT) -c $< -o $@ +# blis 3m1 +test_z%_3m1_blis_st.o: test_%.c + $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_ST) -c $< -o $@ + +test_c%_3m1_blis_st.o: test_%.c + $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_ST) -c $< -o $@ + +test_z%_3m1_blis_mt.o: test_%.c + $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_MT) -c $< -o $@ + +test_c%_3m1_blis_mt.o: test_%.c + $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_MT) -c $< -o $@ + # -- Executable file rules -- diff --git a/test/3m4m/runme.sh b/test/3m4m/runme.sh index d7305e4be..61372ab28 100755 --- a/test/3m4m/runme.sh +++ b/test/3m4m/runme.sh @@ -15,8 +15,8 @@ jc_nt=2 nt=16 # Threadedness to test. -threads="st mt" -threads_r="st mt" +threads="st" # mt" +threads_r="st" # mt" # Datatypes to test. dts="z c" @@ -28,7 +28,8 @@ test_ops="${l3_ops}" test_ops_r="${l3_ops}" # Implementations to test -test_impls="openblas asm_blis 4m1_blis 4m1b_blis 4mhw_blis 3m1_blis 3mhw_blis" +#test_impls="openblas asm_blis 4m1a_blis 4m1b_blis 4mhw_blis 3m1_blis 3mhw_blis" +test_impls="asm_blis" test_impls_r="asm_blis" # First perform real test cases. @@ -60,7 +61,7 @@ for th in ${threads_r}; do # Construct the name of the output file. out_file="${out_root}_${th}_${dt}${op}_${im}.m" - echo "Running (nt = 1) ./${exec_name} > ${out_file}" + echo "Running (nt = ${OMP_NUM_THREADS}) ./${exec_name} > ${out_file}" # Run executable. ./${exec_name} > ${out_file} diff --git a/test/3m4m/test_gemm.c b/test/3m4m/test_gemm.c index b79e10fe7..c89fcc0c5 100644 --- a/test/3m4m/test_gemm.c +++ b/test/3m4m/test_gemm.c @@ -144,7 +144,9 @@ int main( int argc, char** argv ) bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, k, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); - //bli_obj_create( dt, m, n, 4, 4*m, &c ); + //bli_obj_create( dt, m, k, 2, 2*m, &a ); + //bli_obj_create( dt, k, n, 2, 2*k, &b ); + //bli_obj_create( dt, m, n, 2, 2*m, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); @@ -178,16 +180,16 @@ int main( int argc, char** argv ) #ifdef BLIS - #if defined _4M1 - bli_gemm4m( &alpha, + #if defined _4MHW + bli_gemm4mh( &alpha, #elif defined _4M1B bli_gemm4mb( &alpha, - #elif defined _4MHW - bli_gemm4mh( &alpha, - #elif defined _3M1 - bli_gemm3m( &alpha, + #elif defined _4M1A + bli_gemm4m1( &alpha, #elif defined _3MHW bli_gemm3mh( &alpha, + #elif defined _3M1 + bli_gemm3m1( &alpha, #else bli_gemm( &alpha, #endif diff --git a/testsuite/input.general b/testsuite/input.general index df6b80442..f409ba843 100644 --- a/testsuite/input.general +++ b/testsuite/input.general @@ -21,13 +21,13 @@ sdcz # Datatype(s) to test: # 's' = single real; 'c' = single complex; # 'd' = double real; 'z' = double complex 100 # Problem size: first to test -300 # Problem size: maximum to test +400 # Problem size: maximum to test 100 # Problem size: increment between experiments # Complex level-3 implementations 0 # 3mh ('1' = enable; '0' = disable) -0 # 3m ('1' = enable; '0' = disable) +0 # 3m1 ('1' = enable; '0' = disable) 0 # 4mh ('1' = enable; '0' = disable) -1 # 4m ('1' = enable; '0' = disable) +1 # 4m1 ('1' = enable; '0' = disable) 1 # Error-checking level: # '0' = disable error checking; '1' = full error checking i # Reaction to test failure: diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index 4a1c8e088..d8cdf44cc 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -382,17 +382,17 @@ void libblis_test_read_params_file( char* input_filename, test_params_t* params libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->enable_3mh) ); - // Read whether to enable 3m. + // Read whether to enable 3m1. libblis_test_read_next_line( buffer, input_stream ); - sscanf( buffer, "%u ", &(params->enable_3m) ); + sscanf( buffer, "%u ", &(params->enable_3m1) ); // Read whether to enable 4mh. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->enable_4mh) ); - // Read whether to enable 4m. + // Read whether to enable 4m1. libblis_test_read_next_line( buffer, input_stream ); - sscanf( buffer, "%u ", &(params->enable_4m) ); + sscanf( buffer, "%u ", &(params->enable_4m1) ); // Read the requested error-checking level. libblis_test_read_next_line( buffer, input_stream ); @@ -424,12 +424,12 @@ void libblis_test_read_params_file( char* input_filename, test_params_t* params // Enable/disable the alternative complex implementations. if ( params->enable_3mh ) bli_3mh_enable(); else bli_3mh_disable(); - if ( params->enable_3m ) bli_3m_enable(); - else bli_3m_disable(); + if ( params->enable_3m1 ) bli_3m1_enable(); + else bli_3m1_disable(); if ( params->enable_4mh ) bli_4mh_enable(); else bli_4mh_disable(); - if ( params->enable_4m ) bli_4m_enable(); - else bli_4m_disable(); + if ( params->enable_4m1 ) bli_4m1_enable(); + else bli_4m1_disable(); // Output the parameter struct. libblis_test_output_params_struct( stdout, params ); @@ -836,9 +836,9 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) libblis_test_fprintf_c( os, "problem size: max to test %u\n", params->p_max ); libblis_test_fprintf_c( os, "problem size increment %u\n", params->p_inc ); libblis_test_fprintf_c( os, "enable 3mh? %u\n", params->enable_3mh ); - libblis_test_fprintf_c( os, "enable 3m? %u\n", params->enable_3m ); + libblis_test_fprintf_c( os, "enable 3m1? %u\n", params->enable_3m1 ); libblis_test_fprintf_c( os, "enable 4mh? %u\n", params->enable_4mh ); - libblis_test_fprintf_c( os, "enable 4m? %u\n", params->enable_4m ); + libblis_test_fprintf_c( os, "enable 4m1? %u\n", params->enable_4m1 ); libblis_test_fprintf_c( os, "error-checking level %u\n", params->error_checking_level ); libblis_test_fprintf_c( os, "reaction to failure %c\n", params->reaction_to_failure ); libblis_test_fprintf_c( os, "output in matlab format? %u\n", params->output_matlab_format ); diff --git a/testsuite/src/test_libblis.h b/testsuite/src/test_libblis.h index 31dbf429d..1b03c0619 100644 --- a/testsuite/src/test_libblis.h +++ b/testsuite/src/test_libblis.h @@ -161,9 +161,9 @@ typedef struct unsigned int p_max; unsigned int p_inc; unsigned int enable_3mh; - unsigned int enable_3m; + unsigned int enable_3m1; unsigned int enable_4mh; - unsigned int enable_4m; + unsigned int enable_4m1; char reaction_to_failure; unsigned int output_matlab_format; unsigned int output_files;