diff --git a/test/sup/Makefile b/test/sup/Makefile index daab19070..4e95305dd 100644 --- a/test/sup/Makefile +++ b/test/sup/Makefile @@ -505,6 +505,7 @@ eigen-mt: check-env $(EIGEN_MT_BINS) openblas-mt: check-env $(OPENBLAS_MT_BINS) vendor-mt: check-env $(VENDOR_MT_BINS) +# -- Multithreaded -- # --- Object file rules -------------------------------------------------------- @@ -540,6 +541,29 @@ $(eval $(call make-st-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(ld),$(imp # -- Multithreaded BLAS -- +# -- Multithreaded BLAS -- + +# Define the function that will be used to instantiate compilation rules +# for the various multithreaded implementations. +define make-mt-rule +test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_mt.o: test_gemm.c Makefile + $(CC) $(CFLAGS) $(ERRCHK) $(N_TRIALS) $(call get-pdefs,$(4)) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_MT) -c $$< -o $$@ +endef + +# Instantiate the rule function make-mt-rule() for each BLIS/BLAS/CBLAS +# implementation. +$(foreach dt,$(DTS), \ +$(foreach tr,$(TRANS), \ +$(foreach st,$(STORS), \ +$(foreach sh,$(SHAPES), \ +$(foreach sm,$(SMS_MT), \ +$(foreach sn,$(SNS_MT), \ +$(foreach sk,$(SKS_MT), \ +$(foreach impl,$(BIMPLS_MT), \ +$(eval $(call make-mt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl))))))))))) + +# -- Single-threaded Eigen -- + # Define the function that will be used to instantiate compilation rules # for the various multithreaded implementations. define make-mt-rule @@ -602,6 +626,26 @@ $(foreach ld,$(LDIMS), \ $(foreach impl,$(EIMPLS), \ $(eval $(call make-eigmt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(ld),$(impl)))))))))))) +# -- Multithreaded Eigen -- + +# Define the function that will be used to instantiate compilation rules +# for the multithreaded Eigen implementation. +define make-eigmt-rule +test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_mt.o: test_gemm.c Makefile + $(CXX) $(CXXFLAGS_MT) $(ERRCHK) $(N_TRIALS) $(call get-pdefs,$(4)) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_MT) -c $$< -o $$@ +endef + +# Instantiate the rule function make-st-rule() for each Eigen implementation. +$(foreach dt,$(DTS), \ +$(foreach tr,$(TRANS), \ +$(foreach st,$(STORS), \ +$(foreach sh,$(SHAPES), \ +$(foreach sm,$(SMS_MT), \ +$(foreach sn,$(SNS_MT), \ +$(foreach sk,$(SKS_MT), \ +$(foreach impl,$(EIMPLS), \ +$(eval $(call make-eigmt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl))))))))))) + # --- Executable file rules ---------------------------------------------------- diff --git a/test/sup/octave/runme.m b/test/sup/octave/runme.m deleted file mode 100644 index a3628b28f..000000000 --- a/test/sup/octave/runme.m +++ /dev/null @@ -1,8 +0,0 @@ - -% kabylake -plot_panel_trxsh(3.8,16,1,'st','d','rrr',[ 6 8 4 ],'../results/kabylake/20190619/4_800_4_mt201','kbl','MKL','matlab'); close; clear all; -plot_panel_trxsh(3.8,16,1,'st','d','ccc',[ 6 8 4 ],'../results/kabylake/20190619/4_800_4_mt201','kbl','MKL','matlab'); close; clear all; - -% epyc -plot_panel_trxsh(3.0,8,1,'st','d','rrr',[ 6 8 4 ],'../results/epyc/20190619/4_800_4_mt256','epyc','MKL','matlab'); close; clear all; -plot_panel_trxsh(3.0,8,1,'st','d','ccc',[ 6 8 4 ],'../results/epyc/20190619/4_800_4_mt256','epyc','MKL','matlab'); close; clear all; diff --git a/test/supmt/octave/gen_opsupnames.m b/test/sup/octave_mt/gen_opsupnames.m similarity index 100% rename from test/supmt/octave/gen_opsupnames.m rename to test/sup/octave_mt/gen_opsupnames.m diff --git a/test/supmt/octave/plot_l3sup_perf.m b/test/sup/octave_mt/plot_l3sup_perf.m similarity index 78% rename from test/supmt/octave/plot_l3sup_perf.m rename to test/sup/octave_mt/plot_l3sup_perf.m index 28056e25a..43a05e87b 100644 --- a/test/supmt/octave/plot_l3sup_perf.m +++ b/test/sup/octave_mt/plot_l3sup_perf.m @@ -9,6 +9,7 @@ function r_val = plot_l3sup_perf( opname, ... cfreq, ... dfps, ... theid, impl ) + %if ... %mod(theid-1,cols) == 2 || ... % ... %mod(theid-1,cols) == 3 || ... % ... %mod(theid-1,cols) == 4 || ... @@ -19,11 +20,11 @@ function r_val = plot_l3sup_perf( opname, ... %end %legend_plot_id = 11; -legend_plot_id = 1*cols + 1*5; +legend_plot_id = 0*cols + 1*6; if 1 -ax1 = subplot( rows, cols, theid ); -hold( ax1, 'on' ); + ax1 = subplot( rows, cols, theid ); + hold( ax1, 'on' ); end % Set line properties. @@ -77,9 +78,9 @@ end flopscol = size( data_blissup, 2 ); msize = 5; if 1 -fontsize = 11; + fontsize = 12; else -fontsize = 16; + fontsize = 16; end linesize = 0.5; legend_loc = 'southeast'; @@ -95,13 +96,14 @@ for psize_col = 1:3 end x_axis( :, 1 ) = data_blissup( :, psize_col ); -% Compute the number of data points we have in the x-axis. Note that -% we only use quarter the data points for the m = n = k column of graphs. -if mod(theid-1,cols) == 6 - np = size( data_blissup, 1 ) / 4; -else - np = size( data_blissup, 1 ); -end +% Compute the number of data points we have in the x-axis. Note that we +% only use half the data points for the m = n = k column of graphs. +%if mod(theid-1,cols) == 6 +% np = size( data_blissup, 1 ) / 2; +%else +% np = size( data_blissup, 1 ); +%end +np = size( data_blissup, 1 ); % Grab the last x-axis value. x_end = data_blissup( np, psize_col ); @@ -125,8 +127,7 @@ open_ln = line( x_axis( 1:np, 1 ), data_open( 1:np, flopscol ) / nth, ... vend_ln = line( x_axis( 1:np, 1 ), data_vend( 1:np, flopscol ) / nth, ... 'Color',color_vend, 'LineStyle',lines_vend, ... 'LineWidth',linesize ); -else -if theid == legend_plot_id +elseif theid == legend_plot_id blissup_ln = line( nan, nan, ... 'Color',color_blissup, 'LineStyle',lines_blissup, ... 'LineWidth',linesize ); @@ -143,15 +144,28 @@ vend_ln = line( nan, nan, ... 'Color',color_vend, 'LineStyle',lines_vend, ... 'LineWidth',linesize ); end -end xlim( ax1, [x_begin x_end] ); ylim( ax1, [y_begin y_end] ); -if 6000 <= x_end && x_end < 10000 +if mod(theid-1,cols) == 3 || mod(theid-1,cols) == 4 || mod(theid-1,cols) == 5 + if nth == 12 + ylim( ax1, [y_begin y_end/2] ); + elseif nth > 12 + ylim( ax1, [y_begin y_end/6] ); + end +end + +if 10000 <= x_end && x_end < 15000 x_tick2 = x_end - 2000; x_tick1 = x_tick2/2; + %xticks( ax1, [ x_tick1 x_tick2 ] ); + xticks( ax1, [ 4000 8000 12000 ] ); +elseif 6000 <= x_end && x_end < 10000 + x_tick2 = x_end - 2000; + x_tick1 = x_tick2/2; + %xticks( ax1, [ x_tick1 x_tick2 ] ); xticks( ax1, [ x_tick1 x_tick2 ] ); elseif 4000 <= x_end && x_end < 6000 x_tick2 = x_end - 1000; @@ -188,11 +202,12 @@ if show_plot == 1 || theid == legend_plot_id set( leg,'Color','none' ); set( leg,'Units','inches' ); if impl == 'octave' - set( leg,'FontSize',fontsize ); - set( leg,'Position',[12.50 10.35 1.5 0.9 ] ); % (1,4tl) + set( leg,'FontSize',fontsize ); + %set( leg,'Position',[12.40 10.60 1.9 0.95 ] ); % (1,4tl) + set( leg,'Position',[18.80 10.60 1.9 0.95 ] ); % (1,4tl) else - set( leg,'FontSize',fontsize-1 ); - set( leg,'Position',[18.24 10.15 1.15 0.7 ] ); % (1,4tl) + set( leg,'FontSize',fontsize-1 ); + set( leg,'Position',[18.24 10.15 1.15 0.7 ] ); % (1,4tl) end set( leg,'Box','off' ); set( leg,'Color','none' ); @@ -209,17 +224,31 @@ box( ax1, 'on' ); titl = title( titlename ); set( titl, 'FontWeight', 'normal' ); % default font style is now 'bold'. +% The default is to align the plot title across whole figure, not the box. +% This is a hack to nudge the title back to the center of the box. if impl == 'octave' -tpos = get( titl, 'Position' ); % default is to align across whole figure, not box. -tpos(1) = tpos(1) + -40; -set( titl, 'Position', tpos ); % here we nudge it back to centered with box. + tpos = get( titl, 'Position' ); + % For some reason, the titles in the graphs in the last column start + % off in a different relative position than the graphs in the other + % columns. Here, we manually account for that. + if mod(theid-1,cols) == 6 + tpos(1) = tpos(1) + -10; + else + tpos(1) = tpos(1) + -40; + end + set( titl, 'Position', tpos ); + set( titl, 'FontSize', fontsize ); +else % impl == 'matlab' + tpos = get( titl, 'Position' ); + tpos(1) = tpos(1) + 90; + set( titl, 'Position', tpos ); end if theid > (rows-1)*cols -%xlab = xlabel( ax1,xaxisname ); -%tpos = get( xlab, 'Position' ) -%tpos(2) = tpos(2) + 10; -%set( xlab, 'Position', tpos ); + %xlab = xlabel( ax1,xaxisname ); + %tpos = get( xlab, 'Position' ) + %tpos(2) = tpos(2) + 10; + %set( xlab, 'Position', tpos ); if theid == rows*cols - 6 xlab = xlabel( ax1, 'm = 6; n = k' ); elseif theid == rows*cols - 5 @@ -238,13 +267,8 @@ if theid > (rows-1)*cols end if mod(theid-1,cols) == 0 -ylab = ylabel( ax1,yaxisname ); + ylab = ylabel( ax1,yaxisname ); end -%export_fig( filename, colorflag, '-pdf', '-m2', '-painters', '-transparent' ); -%saveas( fig, filename_png ); - -%hold( ax1, 'off' ); - r_val = 0; diff --git a/test/supmt/octave/plot_panel_trxsh.m b/test/sup/octave_mt/plot_panel_trxsh.m similarity index 100% rename from test/supmt/octave/plot_panel_trxsh.m rename to test/sup/octave_mt/plot_panel_trxsh.m diff --git a/test/sup/octave_mt/runthese.m b/test/sup/octave_mt/runthese.m new file mode 100644 index 000000000..3d9afdfe4 --- /dev/null +++ b/test/sup/octave_mt/runthese.m @@ -0,0 +1,8 @@ +% kabylake +plot_panel_trxsh(3.80,16,4,'mt','d','rrr',[ 6 8 10 ],'../results/kabylake/20200302/mnkt100000_mt4','kbl','MKL','octave'); close; clear all; + +% haswell +plot_panel_trxsh(3.1,16,12,'mt','d','rrr',[ 6 8 10 ],'../results/haswell/20200302/mnkt100000_mt12','has','MKL','octave'); close; clear all; + +% epyc +plot_panel_trxsh(2.55,8,32,'mt','d','rrr',[ 6 8 10 ],'../results/epyc/20200302/mnkt100000_mt32','epyc','MKL','octave'); close; clear all; diff --git a/test/sup/octave/bkup/gen_opsupnames.m b/test/sup/octave_st/bkup/gen_opsupnames.m similarity index 100% rename from test/sup/octave/bkup/gen_opsupnames.m rename to test/sup/octave_st/bkup/gen_opsupnames.m diff --git a/test/sup/octave/bkup/plot_l3sup_perf.m b/test/sup/octave_st/bkup/plot_l3sup_perf.m similarity index 100% rename from test/sup/octave/bkup/plot_l3sup_perf.m rename to test/sup/octave_st/bkup/plot_l3sup_perf.m diff --git a/test/sup/octave/bkup/plot_panel_trxsh.m b/test/sup/octave_st/bkup/plot_panel_trxsh.m similarity index 100% rename from test/sup/octave/bkup/plot_panel_trxsh.m rename to test/sup/octave_st/bkup/plot_panel_trxsh.m diff --git a/test/sup/octave/bkup/runthese.m b/test/sup/octave_st/bkup/runthese.m similarity index 100% rename from test/sup/octave/bkup/runthese.m rename to test/sup/octave_st/bkup/runthese.m diff --git a/test/sup/octave/gen_opsupnames.m b/test/sup/octave_st/gen_opsupnames.m similarity index 100% rename from test/sup/octave/gen_opsupnames.m rename to test/sup/octave_st/gen_opsupnames.m diff --git a/test/sup/octave/load_data.m b/test/sup/octave_st/load_data.m similarity index 100% rename from test/sup/octave/load_data.m rename to test/sup/octave_st/load_data.m diff --git a/test/sup/octave/plot_l3sup_perf.m b/test/sup/octave_st/plot_l3sup_perf.m similarity index 52% rename from test/sup/octave/plot_l3sup_perf.m rename to test/sup/octave_st/plot_l3sup_perf.m index b9439bff5..8a615ada5 100644 --- a/test/sup/octave/plot_l3sup_perf.m +++ b/test/sup/octave_st/plot_l3sup_perf.m @@ -1,50 +1,29 @@ function r_val = plot_l3sup_perf( opname, ... - smalldims, ... data_blissup, ... - data_blisconv, ... + data_blislpab, ... data_eigen, ... data_open, ... data_bfeo, ... - data_vend, vend_str, ... - data_bfeo, ... data_xsmm, ... + data_vend, vend_str, ... nth, ... rows, cols, ... cfreq, ... dfps, ... theid, impl ) -% Define the column in which the performance rates are found. -flopscol = size( data_blissup, 2 ); - -% Check if blasfeo data is available. -has_bfeo = 1; -if data_bfeo( 1, flopscol ) == 0.0 - has_bfeo = 0; -end - -% Check if libxsmm data is available. -has_xsmm = 1; -if data_xsmm( 1, flopscol ) == 0.0 - has_xsmm = 0; -end - -% Define which plot id will have the legend. -% NOTE: We can draw the legend on any graph as long as it has already been -% rendered. Since the coordinates are global, we can simply always wait until -% the final graph to draw the legend. -%if nth == 1 -% if has_xsmm == 1 -% legend_plot_id = 2*cols + 1*5; -% else -% legend_plot_id = 1*cols + 1*5; -% end +%if ... %mod(theid-1,cols) == 2 || ... +% ... %mod(theid-1,cols) == 3 || ... +% ... %mod(theid-1,cols) == 4 || ... +% 0 == 1 ... %theid >= 19 +% show_plot = 0; %else -% legend_plot_id = 0*cols + 1*6; + show_plot = 1; %end -legend_plot_id = cols*rows; -% Hold the axes. +%legend_plot_id = 11; +legend_plot_id = 2*cols + 1*5; + if 1 ax1 = subplot( rows, cols, theid ); hold( ax1, 'on' ); @@ -52,13 +31,12 @@ end % Set line properties. color_blissup = 'k'; lines_blissup = '-'; markr_blissup = ''; -color_blisconv = 'k'; lines_blisconv = ':'; markr_blisconv = ''; +color_blislpab = 'k'; lines_blislpab = ':'; markr_blislpab = ''; color_eigen = 'm'; lines_eigen = '-.'; markr_eigen = 'o'; color_open = 'r'; lines_open = '--'; markr_open = 'o'; color_bfeo = 'c'; lines_bfeo = '-'; markr_bfeo = 'o'; -color_vend = 'b'; lines_vend = '-.'; markr_vend = '.'; -color_bfeo = 'c'; lines_bfeo = '-'; markr_bfeo = 'o'; color_xsmm = 'g'; lines_xsmm = '-'; markr_xsmm = 'o'; +color_vend = 'b'; lines_vend = '-.'; markr_vend = '.'; % Compute the peak performance in terms of the number of double flops % executable per cycle and the clock rate. @@ -77,13 +55,15 @@ titlename = '%s'; titlename = sprintf( titlename, title_opname ); % Set the legend strings. -blissup_lg = sprintf( 'BLIS sup' ); -blisconv_lg = sprintf( 'BLIS conv' ); -eigen_lg = sprintf( 'Eigen' ); -open_lg = sprintf( 'OpenBLAS' ); -vend_lg = vend_str; -bfeo_lg = sprintf( 'BLASFEO' ); -xsmm_lg = sprintf( 'libxsmm' ); +blissup_legend = sprintf( 'BLIS sup' ); +blislpab_legend = sprintf( 'BLIS conv' ); +eigen_legend = sprintf( 'Eigen' ); +open_legend = sprintf( 'OpenBLAS' ); +bfeo_legend = sprintf( 'BLASFEO' ); +xsmm_legend = sprintf( 'libxsmm' ); +%vend_legend = sprintf( 'MKL' ); +%vend_legend = sprintf( 'ARMPL' ); +vend_legend = vend_str; % Set axes range values. y_scale = 1.00; @@ -101,6 +81,7 @@ end %flopscol = 4; +flopscol = size( data_blissup, 2 ); msize = 5; if 1 fontsize = 12; @@ -130,44 +111,67 @@ x_axis( :, 1 ) = data_blissup( :, psize_col ); %end np = size( data_blissup, 1 ); +has_xsmm = 1; +if data_xsmm( 1, flopscol ) == 0.0 + has_xsmm = 0; +end + % Grab the last x-axis value. x_end = data_blissup( np, psize_col ); %data_peak( 1, 1:2 ) = [ 0 max_perf_core ]; %data_peak( 2, 1:2 ) = [ x_end max_perf_core ]; +if show_plot == 1 blissup_ln = line( x_axis( 1:np, 1 ), data_blissup( 1:np, flopscol ) / nth, ... - 'Color',color_blissup, 'LineStyle',lines_blissup, ... - 'LineWidth',linesize ); -blisconv_ln = line( x_axis( 1:np, 1 ), data_blisconv( 1:np, flopscol ) / nth, ... - 'Color',color_blisconv, 'LineStyle',lines_blisconv, ... - 'LineWidth',linesize ); + 'Color',color_blissup, 'LineStyle',lines_blissup, ... + 'LineWidth',linesize ); +blislpab_ln = line( x_axis( 1:np, 1 ), data_blislpab( 1:np, flopscol ) / nth, ... + 'Color',color_blislpab, 'LineStyle',lines_blislpab, ... + 'LineWidth',linesize ); eigen_ln = line( x_axis( 1:np, 1 ), data_eigen( 1:np, flopscol ) / nth, ... - 'Color',color_eigen, 'LineStyle',lines_eigen, ... - 'LineWidth',linesize ); + 'Color',color_eigen, 'LineStyle',lines_eigen, ... + 'LineWidth',linesize ); open_ln = line( x_axis( 1:np, 1 ), data_open( 1:np, flopscol ) / nth, ... - 'Color',color_open, 'LineStyle',lines_open, ... - 'LineWidth',linesize ); -vend_ln = line( x_axis( 1:np, 1 ), data_vend( 1:np, flopscol ) / nth, ... - 'Color',color_vend, 'LineStyle',lines_vend, ... - 'LineWidth',linesize ); -if has_bfeo == 1 - bfeo_ln = line( x_axis( 1:np, 1 ), data_bfeo( 1:np, flopscol ) / nth, ... - 'Color',color_bfeo, 'LineStyle',lines_bfeo, ... - 'LineWidth',linesize ); -else - bfeo_ln = line( nan, nan, ... - 'Color',color_bfeo, 'LineStyle',lines_bfeo, ... - 'LineWidth',linesize ); -end + 'Color',color_open, 'LineStyle',lines_open, ... + 'LineWidth',linesize ); +bfeo_ln = line( x_axis( 1:np, 1 ), data_bfeo( 1:np, flopscol ) / nth, ... + 'Color',color_bfeo, 'LineStyle',lines_bfeo, ... + 'LineWidth',linesize ); if has_xsmm == 1 - xsmm_ln = line( x_axis( 1:np, 1 ), data_xsmm( 1:np, flopscol ) / nth, ... - 'Color',color_xsmm, 'LineStyle',lines_xsmm, ... - 'LineWidth',linesize ); +xsmm_ln = line( x_axis( 1:np, 1 ), data_xsmm( 1:np, flopscol ) / nth, ... + 'Color',color_xsmm, 'LineStyle',lines_xsmm, ... + 'LineWidth',linesize ); else - xsmm_ln = line( nan, nan, ... - 'Color',color_xsmm, 'LineStyle',lines_xsmm, ... - 'LineWidth',linesize ); +xsmm_ln = line( nan, nan, ... + 'Color',color_xsmm, 'LineStyle',lines_xsmm, ... + 'LineWidth',linesize ); +end +vend_ln = line( x_axis( 1:np, 1 ), data_vend( 1:np, flopscol ) / nth, ... + 'Color',color_vend, 'LineStyle',lines_vend, ... + 'LineWidth',linesize ); +elseif theid == legend_plot_id +blissup_ln = line( nan, nan, ... + 'Color',color_blissup, 'LineStyle',lines_blissup, ... + 'LineWidth',linesize ); +blislpab_ln = line( nan, nan, ... + 'Color',color_blislpab, 'LineStyle',lines_blislpab, ... + 'LineWidth',linesize ); +eigen_ln = line( nan, nan, ... + 'Color',color_eigen, 'LineStyle',lines_eigen, ... + 'LineWidth',linesize ); +open_ln = line( nan, nan, ... + 'Color',color_open, 'LineStyle',lines_open, ... + 'LineWidth',linesize ); +bfeo_ln = line( nan, nan, ... + 'Color',color_bfeo, 'LineStyle',lines_bfeo, ... + 'LineWidth',linesize ); +xsmm_ln = line( nan, nan, ... + 'Color',color_xsmm, 'LineStyle',lines_xsmm, ... + 'LineWidth',linesize ); +vend_ln = line( nan, nan, ... + 'Color',color_vend, 'LineStyle',lines_vend, ... + 'LineWidth',linesize ); end @@ -199,51 +203,71 @@ elseif 500 <= x_end && x_end < 1000 xticks( ax1, [ x_tick1 x_tick2 x_tick3 ] ); end - % xpos ypos - %set( leg,'Position',[11.32 6.36 1.15 0.7 ] ); % (1,4tl) -if nth == 1 && theid == legend_plot_id - if has_xsmm == 1 - % single-threaded, with libxsmm (ccc) - leg = legend( ... - [ blissup_ln blisconv_ln eigen_ln open_ln vend_ln bfeo_ln xsmm_ln ], ... - blissup_lg, blisconv_lg, eigen_lg, open_lg, vend_lg, bfeo_lg, xsmm_lg, ... - 'Location', legend_loc ); - set( leg,'Box','off','Color','none','Units','inches' ); - if impl == 'octave' - set( leg,'FontSize',fontsize ); - set( leg,'Position',[15.35 4.62 1.9 1.20] ); % (1,4tl) +if show_plot == 1 || theid == legend_plot_id + if nth == 1 && theid == legend_plot_id + if has_xsmm == 1 + leg = legend( ... + [ ... + blissup_ln ... + blislpab_ln ... + eigen_ln ... + open_ln ... + bfeo_ln ... + xsmm_ln ... + vend_ln ... + ], ... + blissup_legend, ... + blislpab_legend, ... + eigen_legend, ... + open_legend, ... + bfeo_legend, ... + xsmm_legend, ... + vend_legend, ... + 'Location', legend_loc ); + set( leg,'Box','off' ); + set( leg,'Color','none' ); + set( leg,'Units','inches' ); + if impl == 'octave' + set( leg,'FontSize',fontsize ); + set( leg,'Position',[15.40 4.75 1.9 1.20] ); % (1,4tl) + else + set( leg,'FontSize',fontsize-3 ); + set( leg,'Position',[18.20 10.20 1.15 0.7 ] ); % (1,4tl) + end else - set( leg,'FontSize',fontsize-3 ); - set( leg,'Position',[18.20 10.20 1.15 0.7 ] ); % (1,4tl) + leg = legend( ... + [ ... + blissup_ln ... + blislpab_ln ... + eigen_ln ... + open_ln ... + bfeo_ln ... + vend_ln ... + ], ... + blissup_legend, ... + blislpab_legend, ... + eigen_legend, ... + open_legend, ... + bfeo_legend, ... + vend_legend, ... + 'Location', legend_loc ); + set( leg,'Box','off' ); + set( leg,'Color','none' ); + set( leg,'Units','inches' ); + if impl == 'octave' + set( leg,'FontSize',fontsize ); + set( leg,'Position',[15.40 7.65 1.9 1.10] ); % (1,4tl) + else + set( leg,'FontSize',fontsize-1 ); + set( leg,'Position',[18.24 10.15 1.15 0.7] ); % (1,4tl) + end end - else - % single-threaded, without libxsmm (rrr, or other) - leg = legend( ... - [ blissup_ln blisconv_ln eigen_ln open_ln vend_ln bfeo_ln ], ... - blissup_lg, blisconv_lg, eigen_lg, open_lg, vend_lg, bfeo_lg, ... - 'Location', legend_loc ); - set( leg,'Box','off','Color','none','Units','inches' ); - if impl == 'octave' - set( leg,'FontSize',fontsize ); - set( leg,'Position',[15.35 7.40 1.9 1.10] ); % (1,4tl) - else - set( leg,'FontSize',fontsize-1 ); - set( leg,'Position',[18.24 10.15 1.15 0.7] ); % (1,4tl) - end - end -elseif nth > 1 && theid == legend_plot_id - % multithreaded - leg = legend( ... - [ blissup_ln blisconv_ln eigen_ln open_ln vend_ln ], ... - blissup_lg, blisconv_lg, eigen_lg, open_lg, vend_lg, ... - 'Location', legend_loc ); - set( leg,'Box','off','Color','none','Units','inches' ); - if impl == 'octave' - set( leg,'FontSize',fontsize ); - set( leg,'Position',[18.20 10.30 1.9 0.95] ); % (1,4tl) - else - set( leg,'FontSize',fontsize-1 ); - set( leg,'Position',[18.24 10.15 1.15 0.7] ); % (1,4tl) + set( leg,'Box','off' ); + set( leg,'Color','none' ); + set( leg,'Units','inches' ); + % xpos ypos + %set( leg,'Position',[11.32 6.36 1.15 0.7 ] ); % (1,4tl) + elseif nth > 1 && theid == legend_plot_id end end @@ -274,38 +298,28 @@ else % impl == 'matlab' set( titl, 'Position', tpos ); end -sll_str = sprintf( 'm = %u; n = k', smalldims(1) ); -lsl_str = sprintf( 'n = %u; m = k', smalldims(2) ); -lls_str = sprintf( 'k = %u; m = n', smalldims(3) ); -lss_str = sprintf( 'm; n = %u, k = %u', smalldims(2), smalldims(3) ); -sls_str = sprintf( 'n; m = %u, k = %u', smalldims(1), smalldims(3) ); -ssl_str = sprintf( 'k; m = %u, n = %u', smalldims(1), smalldims(2) ); -lll_str = sprintf( 'm = n = k' ); - -% Place labels on the bottom row of graphs. if theid > (rows-1)*cols %xlab = xlabel( ax1,xaxisname ); %tpos = get( xlab, 'Position' ) %tpos(2) = tpos(2) + 10; %set( xlab, 'Position', tpos ); if theid == rows*cols - 6 - xlab = xlabel( ax1, sll_str ); + xlab = xlabel( ax1, 'm = 6; n = k' ); elseif theid == rows*cols - 5 - xlab = xlabel( ax1, lsl_str ); + xlab = xlabel( ax1, 'n = 8; m = k' ); elseif theid == rows*cols - 4 - xlab = xlabel( ax1, lls_str ); + xlab = xlabel( ax1, 'k = 4; m = n' ); elseif theid == rows*cols - 3 - xlab = xlabel( ax1, lss_str ); + xlab = xlabel( ax1, 'm; n = 8, k = 4' ); elseif theid == rows*cols - 2 - xlab = xlabel( ax1, sls_str ); + xlab = xlabel( ax1, 'n; m = 6, k = 4' ); elseif theid == rows*cols - 1 - xlab = xlabel( ax1, ssl_str ); + xlab = xlabel( ax1, 'k; m = 6, n = 8' ); elseif theid == rows*cols - 0 - xlab = xlabel( ax1, lll_str ); + xlab = xlabel( ax1, 'm = n = k' ); end end -% Place labels on the left-hand column of graphs. if mod(theid-1,cols) == 0 ylab = ylabel( ax1,yaxisname ); end diff --git a/test/sup/octave/plot_panel_trxsh.m b/test/sup/octave_st/plot_panel_trxsh.m similarity index 100% rename from test/sup/octave/plot_panel_trxsh.m rename to test/sup/octave_st/plot_panel_trxsh.m diff --git a/test/sup/octave_st/runthese.m b/test/sup/octave_st/runthese.m new file mode 100644 index 000000000..86b738b2e --- /dev/null +++ b/test/sup/octave_st/runthese.m @@ -0,0 +1,8 @@ +% kabylake +plot_panel_trxsh(3.80,16,1,'st','d','rrr',[ 6 8 4 ],'../results/kabylake/20200302/mnkt100000_st','kbl','MKL','octave'); close; clear all; + +% haswell +plot_panel_trxsh(3.5,16,1,'st','d','rrr',[ 6 8 4 ],'../results/haswell/20200302/mnkt100000_st','has','MKL','octave'); close; clear all; + +% epyc +plot_panel_trxsh(3.00, 8,1,'st','d','rrr',[ 6 8 4 ],'../results/epyc/20200302/mnkt100000_st','epyc','MKL','octave'); close; clear all; diff --git a/test/supmt/Makefile b/test/supmt/Makefile deleted file mode 100644 index 2ed93565d..000000000 --- a/test/supmt/Makefile +++ /dev/null @@ -1,580 +0,0 @@ -#!/bin/bash -# -# BLIS -# An object-based framework for developing high-performance BLAS-like -# libraries. -# -# Copyright (C) 2014, The University of Texas at Austin -# Copyright (C) 2019, Advanced Micro Devices, Inc. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# - Neither the name(s) of the copyright holder(s) nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# - -# -# Makefile -# -# Field G. Van Zee -# -# Makefile for standalone BLIS test drivers. -# - -# -# --- Makefile PHONY target definitions ---------------------------------------- -# - -.PHONY: all all-st all-mt \ - blis blis-st blis-mt \ - clean cleanx - - - -# -# --- Determine makefile fragment location ------------------------------------- -# - -# Comments: -# - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. -# - We must use recursively expanded assignment for LIB_PATH and INC_PATH in -# the second case because CONFIG_NAME is not yet set. -ifneq ($(strip $(BLIS_INSTALL_PATH)),) -LIB_PATH := $(BLIS_INSTALL_PATH)/lib -INC_PATH := $(BLIS_INSTALL_PATH)/include/blis -SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis -else -DIST_PATH := ../.. -LIB_PATH = ../../lib/$(CONFIG_NAME) -INC_PATH = ../../include/$(CONFIG_NAME) -SHARE_PATH := ../.. -endif - - - -# -# --- Include common makefile definitions -------------------------------------- -# - -# Include the common makefile fragment. --include $(SHARE_PATH)/common.mk - - - -# -# --- BLAS and LAPACK implementations ------------------------------------------ -# - -# BLIS library and header path. This is simply wherever it was installed. -#BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib -#BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis - -# BLIS library. -#BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a - -# BLAS library path(s). This is where the BLAS libraries reside. -HOME_LIB_PATH := $(HOME)/flame/lib -MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64 - -# netlib BLAS -NETLIB_LIB := $(HOME_LIB_PATH)/libblas.a - -# OpenBLAS -OPENBLAS_LIB := $(HOME_LIB_PATH)/libopenblas.a -OPENBLASP_LIB := $(HOME_LIB_PATH)/libopenblasp.a - -# BLASFEO -BLASFEO_LIB := $(HOME_LIB_PATH)/libblasfeo.a - -# libxsmm -LIBXSMM_LIB := $(HOME_LIB_PATH)/libxsmm.a -ldl \ - $(NETLIB_LIB) -lgfortran - -# ATLAS -ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \ - $(HOME_LIB_PATH)/libatlas.a - -# Eigen -EIGEN_INC := $(HOME)/flame/eigen/include/eigen3 -EIGEN_LIB := $(HOME_LIB_PATH)/libeigen_blas_static.a -EIGENP_LIB := $(EIGEN_LIB) - -# MKL -MKL_LIB := -L$(MKL_LIB_PATH) \ - -lmkl_intel_lp64 \ - -lmkl_core \ - -lmkl_sequential \ - -lpthread -lm -ldl -MKLP_LIB := -L$(MKL_LIB_PATH) \ - -lmkl_intel_lp64 \ - -lmkl_core \ - -lmkl_gnu_thread \ - -lpthread -lm -ldl -fopenmp - #-L$(ICC_LIB_PATH) \ - #-lgomp - -VENDOR_LIB := $(MKL_LIB) -VENDORP_LIB := $(MKLP_LIB) - - -# -# --- Problem size definitions ------------------------------------------------- -# - -# Single core -PS_BEGIN := 4 -PS_MAX := 800 -PS_INC := 4 - -# Multicore -P1_BEGIN := 8 -P1_MAX := 1600 -P1_INC := 8 - - -# -# --- General build definitions ------------------------------------------------ -# - -TEST_SRC_PATH := . -TEST_OBJ_PATH := . - -# Gather all local object files. -TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \ - $(TEST_OBJ_PATH)/%.o, \ - $(wildcard $(TEST_SRC_PATH)/*.c))) - -# Override the value of CINCFLAGS so that the value of CFLAGS returned by -# get-frame-cflags-for() is not cluttered up with include paths needed only -# while building BLIS. -CINCFLAGS := -I$(INC_PATH) - -# Use the "framework" CFLAGS for the configuration family. -CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) - -# Add local header paths to CFLAGS. -CFLAGS += -I$(TEST_SRC_PATH) - -# Locate the libblis library to which we will link. -LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) - -# Define a set of CFLAGS for use with C++ and Eigen. -CXXFLAGS := $(subst -std=c99,-std=c++11,$(CFLAGS)) -CXXFLAGS += -I$(EIGEN_INC) - -# Create a copy of CXXFLAGS without -fopenmp in order to disable multithreading. -CXXFLAGS_ST := -march=native $(subst -fopenmp,,$(CXXFLAGS)) -CXXFLAGS_MT := -march=native $(CXXFLAGS) - -# Single or multithreaded string -STR_ST := -DTHR_STR=\"st\" -STR_MT := -DTHR_STR=\"mt\" - -# Number of trials per problem size. -N_TRIALS := -DN_TRIALS=3 - -# Problem size specification -PDEF_ST := -DP_BEGIN=$(PS_BEGIN) \ - -DP_MAX=$(PS_MAX) \ - -DP_INC=$(PS_INC) - -PDEF_MT := -DP_BEGIN=$(P1_BEGIN) \ - -DP_MAX=$(P1_MAX) \ - -DP_INC=$(P1_INC) - -ifeq ($(E),1) -ERRCHK := -DERROR_CHECK -else -ERRCHK := -DNO_ERROR_CHECK -endif - -# Enumerate possible datatypes and computation precisions. -#dts := s d c z -DTS := d - -TRANS := n_n \ - n_t \ - t_n \ - t_t - -# While BLIS supports all combinations of row and column storage for matrices -# C, A, and B, the alternatives mostly only support CBLAS APIs, which inherently -# support only "all row-storage" or "all column-storage". Thus, we disable the -# building of those other drivers so that compilation/linking completes sooner. -#STORS := r_r_r \ -# r_r_c \ -# r_c_r \ -# r_c_c \ -# c_r_r \ -# c_r_c \ -# c_c_r \ -# c_c_c -STORS := r_r_r \ - c_c_c - - -SHAPES := l_l_s \ - l_s_l \ - s_l_l \ - s_s_l \ - s_l_s \ - l_s_s \ - l_l_l - -SMS := 6 -SNS := 8 -SKS := 10 - - -# -# --- Function definitions ----------------------------------------------------- -# - -# A function to strip the underscores from a list of strings. -stripu = $(subst _,,$(1)) - -# Various functions that help us construct the datatype combinations and then -# extract the needed datatype strings and C preprocessor define flags. -get-1of2 = $(word 1,$(subst _, ,$(1))) -get-2of2 = $(word 2,$(subst _, ,$(1))) - -get-1of3 = $(word 1,$(subst _, ,$(1))) -get-2of3 = $(word 2,$(subst _, ,$(1))) -get-3of3 = $(word 3,$(subst _, ,$(1))) - -# Datatype defs. -get-dt-cpp = $(strip \ - $(if $(findstring s,$(1)),-DDT=BLIS_FLOAT -DIS_FLOAT,\ - $(if $(findstring d,$(1)),-DDT=BLIS_DOUBLE -DIS_DOUBLE,\ - $(if $(findstring c,$(1)),-DDT=BLIS_SCOMPLEX -DIS_SCOMPLEX,\ - -DDT=BLIS_DCOMPLEX -DIS_DCOMPLEX)))) - -# Transpose defs. -get-tra-defs-a = $(strip $(subst n,-DTRANSA=BLIS_NO_TRANSPOSE -DA_NOTRANS, \ - $(subst t,-DTRANSA=BLIS_TRANSPOSE -DA_TRANS,$(call get-1of2,$(1))))) -get-tra-defs-b = $(strip $(subst n,-DTRANSB=BLIS_NO_TRANSPOSE -DB_NOTRANS, \ - $(subst t,-DTRANSB=BLIS_TRANSPOSE -DB_TRANS,$(call get-2of2,$(1))))) -get-tra-defs = $(call get-tra-defs-a,$(1)) $(call get-tra-defs-b,$(1)) - -# Storage defs. -get-sto-uch-a = $(strip $(subst r,R, \ - $(subst c,C,$(call get-1of3,$(1))))) -get-sto-uch-b = $(strip $(subst r,R, \ - $(subst c,C,$(call get-2of3,$(1))))) -get-sto-uch-c = $(strip $(subst r,R, \ - $(subst c,C,$(call get-3of3,$(1))))) -get-sto-defs = $(strip \ - -DSTOR3=BLIS_$(call get-sto-uch-a,$(1))$(call get-sto-uch-b,$(1))$(call get-sto-uch-c,$(1)) \ - -DA_STOR_$(call get-sto-uch-a,$(1)) \ - -DB_STOR_$(call get-sto-uch-b,$(1)) \ - -DC_STOR_$(call get-sto-uch-c,$(1))) - -# Dimension defs. -get-shape-defs-cm = $(if $(findstring l,$(1)),-DM_DIM=-1,-DM_DIM=$(2)) -get-shape-defs-cn = $(if $(findstring l,$(1)),-DN_DIM=-1,-DN_DIM=$(2)) -get-shape-defs-ck = $(if $(findstring l,$(1)),-DK_DIM=-1,-DK_DIM=$(2)) -get-shape-defs-m = $(call get-shape-defs-cm,$(call get-1of3,$(1)),$(2)) -get-shape-defs-n = $(call get-shape-defs-cn,$(call get-2of3,$(1)),$(2)) -get-shape-defs-k = $(call get-shape-defs-ck,$(call get-3of3,$(1)),$(2)) - -# arguments: 1: shape (w/ underscores) 2: smallm 3: smalln 4: smallk -get-shape-defs = $(strip $(call get-shape-defs-m,$(1),$(2)) \ - $(call get-shape-defs-n,$(1),$(3)) \ - $(call get-shape-defs-k,$(1),$(4))) - -#$(error l_l_s 6 8 4 = $(call get-shape-defs,l_l_s,6,8,4)) - -# Shape-dimension string. -get-shape-str-ch = $(if $(findstring l,$(1)),p,$(2)) -get-shape-str-m = $(call get-shape-str-ch,$(call get-1of3,$(1)),$(2)) -get-shape-str-n = $(call get-shape-str-ch,$(call get-2of3,$(1)),$(2)) -get-shape-str-k = $(call get-shape-str-ch,$(call get-3of3,$(1)),$(2)) - -# arguments: 1: shape (w/ underscores) 2: smallm 3: smalln 4: smallk -get-shape-dim-str = m$(call get-shape-str-m,$(1),$(2))n$(call get-shape-str-n,$(1),$(3))k$(call get-shape-str-k,$(1),$(4)) - -# Implementation defs. -# Define a function to return the appropriate -DSTR= and -D[BLIS|BLAS] flags. -get-imp-defs = $(strip $(subst blissup,-DSTR=\"$(1)\" -DBLIS -DSUP, \ - $(subst blislpab,-DSTR=\"$(1)\" -DBLIS, \ - $(subst eigen,-DSTR=\"$(1)\" -DEIGEN, \ - $(subst openblas,-DSTR=\"$(1)\" -DCBLAS, \ - $(subst blasfeo,-DSTR=\"$(1)\" -DCBLAS, \ - $(subst libxsmm,-DSTR=\"$(1)\" -DBLAS -DXSMM, \ - $(subst vendor,-DSTR=\"$(1)\" -DCBLAS,$(1))))))))) - -TRANS0 = $(call stripu,$(TRANS)) -STORS0 = $(call stripu,$(STORS)) - -# Limit BLAS and Eigen to only using all row-stored, or all column-stored matrices. -# Also, limit libxsmm to using all column-stored matrices since it does not offer -# CBLAS interfaces. -BSTORS0 = rrr ccc -ESTORS0 = rrr ccc -XSTORS0 = ccc - - -# -# --- Object and binary file definitons ---------------------------------------- -# - -get-st-objs = $(foreach dt,$(1),$(foreach tr,$(2),$(foreach st,$(3),$(foreach sh,$(4),$(foreach sm,$(5),$(foreach sn,$(6),$(foreach sk,$(7),test_$(dt)gemm_$(tr)_$(st)_$(call get-shape-dim-str,$(sh),$(sm),$(sn),$(sk))_$(8)_st.o))))))) - -# Build a list of object files and binaries for each single-threaded -# implementation using the get-st-objs() function defined above. -BLISSUP_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(STORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blissup) -BLISSUP_ST_BINS := $(patsubst %.o,%.x,$(BLISSUP_ST_OBJS)) - -BLISLPAB_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(STORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blislpab) -BLISLPAB_ST_BINS := $(patsubst %.o,%.x,$(BLISLPAB_ST_OBJS)) - -EIGEN_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(ESTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),eigen) -EIGEN_ST_BINS := $(patsubst %.o,%.x,$(EIGEN_ST_OBJS)) - -OPENBLAS_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),openblas) -OPENBLAS_ST_BINS := $(patsubst %.o,%.x,$(OPENBLAS_ST_OBJS)) - -BLASFEO_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blasfeo) -BLASFEO_ST_BINS := $(patsubst %.o,%.x,$(BLASFEO_ST_OBJS)) - -LIBXSMM_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(XSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),libxsmm) -LIBXSMM_ST_BINS := $(patsubst %.o,%.x,$(LIBXSMM_ST_OBJS)) - -VENDOR_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),vendor) -VENDOR_ST_BINS := $(patsubst %.o,%.x,$(VENDOR_ST_OBJS)) - -# Mark the object files as intermediate so that make will remove them -# automatically after building the binaries on which they depend. -.INTERMEDIATE: $(BLISSUP_ST_OBJS) \ - $(BLISLPAB_ST_OBJS) \ - $(EIGEN_ST_OBJS) \ - $(OPENBLAS_ST_OBJS) \ - $(BLASFEO_ST_OBJS) \ - $(LIBXSMM_ST_OBJS) \ - $(VENDOR_ST_OBJS) - -get-mt-objs = $(foreach dt,$(1),$(foreach tr,$(2),$(foreach st,$(3),$(foreach sh,$(4),$(foreach sm,$(5),$(foreach sn,$(6),$(foreach sk,$(7),test_$(dt)gemm_$(tr)_$(st)_$(call get-shape-dim-str,$(sh),$(sm),$(sn),$(sk))_$(8)_mt.o))))))) - -# Build a list of object files and binaries for each multithreaded -# implementation using the get-st-objs() function defined above. -BLISSUP_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(STORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blissup) -BLISSUP_MT_BINS := $(patsubst %.o,%.x,$(BLISSUP_MT_OBJS)) - -BLISLPAB_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(STORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blislpab) -BLISLPAB_MT_BINS := $(patsubst %.o,%.x,$(BLISLPAB_MT_OBJS)) - -EIGEN_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(ESTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),eigen) -EIGEN_MT_BINS := $(patsubst %.o,%.x,$(EIGEN_MT_OBJS)) - -OPENBLAS_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),openblas) -OPENBLAS_MT_BINS := $(patsubst %.o,%.x,$(OPENBLAS_MT_OBJS)) - -VENDOR_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),vendor) -VENDOR_MT_BINS := $(patsubst %.o,%.x,$(VENDOR_MT_OBJS)) - -#$(error "objs = $(EIGEN_ST_BINS)" ) - -# Mark the object files as intermediate so that make will remove them -# automatically after building the binaries on which they depend. -.INTERMEDIATE: $(BLISSUP_MT_OBJS) \ - $(BLISLPAB_MT_OBJS) \ - $(EIGEN_MT_OBJS) \ - $(OPENBLAS_MT_OBJS) \ - $(VENDOR_MT_OBJS) - - -# -# --- Targets/rules ------------------------------------------------------------ -# - -all: st - -blis: blissup-st blislpab-st - -blissup: blissup-st -blislpab: blislpab-st -eigen: eigen-st -openblas: openblas-st -blasfeo: blasfeo-st -libxsmm: libxsmm-st -vendor: vendor-st - -st: blissup-st blislpab-st \ - eigen-st openblas-st blasfeo-st libxsmm-st vendor-st - -blissup-st: $(BLISSUP_ST_BINS) -blislpab-st: $(BLISLPAB_ST_BINS) -eigen-st: $(EIGEN_ST_BINS) -openblas-st: $(OPENBLAS_ST_BINS) -blasfeo-st: $(BLASFEO_ST_BINS) -libxsmm-st: $(LIBXSMM_ST_BINS) -vendor-st: $(VENDOR_ST_BINS) - -mt: blissup-mt blislpab-mt \ - eigen-mt openblas-mt vendor-mt - -blissup-mt: $(BLISSUP_MT_BINS) -blislpab-mt: $(BLISLPAB_MT_BINS) -eigen-mt: $(EIGEN_MT_BINS) -openblas-mt: $(OPENBLAS_MT_BINS) -vendor-mt: $(VENDOR_MT_BINS) - - - -# --Object file rules -- - -# Define the implementations for which we will instantiate compilation rules. -BIMPLS_ST := blissup blislpab openblas blasfeo libxsmm vendor -BIMPLS_MT := blissup blislpab openblas vendor -EIMPLS := eigen - -# 1 2 3 4 567 8 -# test_dgemm_nn_rrr_mpn6kp_blissup_st.x - -# Define the function that will be used to instantiate compilation rules -# for the various single-threaded implementations. -define make-st-rule -test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_st.o: test_gemm.c Makefile - $(CC) $(CFLAGS) $(ERRCHK) $(N_TRIALS) $(PDEF_ST) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_ST) -c $$< -o $$@ -endef - -# Instantiate the rule function make-st-rule() for each BLIS/BLAS/CBLAS -# implementation. -$(foreach dt,$(DTS), \ -$(foreach tr,$(TRANS), \ -$(foreach st,$(STORS), \ -$(foreach sh,$(SHAPES), \ -$(foreach sm,$(SMS), \ -$(foreach sn,$(SNS), \ -$(foreach sk,$(SKS), \ -$(foreach impl,$(BIMPLS_ST), \ -$(eval $(call make-st-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl))))))))))) - -# Define the function that will be used to instantiate compilation rules -# for the various multithreaded implementations. -define make-mt-rule -test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_mt.o: test_gemm.c Makefile - $(CC) $(CFLAGS) $(ERRCHK) $(N_TRIALS) $(PDEF_MT) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_MT) -c $$< -o $$@ -endef - -# Instantiate the rule function make-mt-rule() for each BLIS/BLAS/CBLAS -# implementation. -$(foreach dt,$(DTS), \ -$(foreach tr,$(TRANS), \ -$(foreach st,$(STORS), \ -$(foreach sh,$(SHAPES), \ -$(foreach sm,$(SMS), \ -$(foreach sn,$(SNS), \ -$(foreach sk,$(SKS), \ -$(foreach impl,$(BIMPLS_MT), \ -$(eval $(call make-mt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl))))))))))) - -# Define the function that will be used to instantiate compilation rules -# for the single-threaded Eigen implementation. -define make-eigst-rule -test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_st.o: test_gemm.c Makefile - $(CXX) $(CXXFLAGS_ST) $(ERRCHK) $(N_TRIALS) $(PDEF_ST) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_ST) -c $$< -o $$@ -endef - -# Instantiate the rule function make-st-rule() for each Eigen implementation. -$(foreach dt,$(DTS), \ -$(foreach tr,$(TRANS), \ -$(foreach st,$(STORS), \ -$(foreach sh,$(SHAPES), \ -$(foreach sm,$(SMS), \ -$(foreach sn,$(SNS), \ -$(foreach sk,$(SKS), \ -$(foreach impl,$(EIMPLS), \ -$(eval $(call make-eigst-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl))))))))))) - -# Define the function that will be used to instantiate compilation rules -# for the multithreaded Eigen implementation. -define make-eigmt-rule -test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_mt.o: test_gemm.c Makefile - $(CXX) $(CXXFLAGS_MT) $(ERRCHK) $(N_TRIALS) $(PDEF_MT) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_MT) -c $$< -o $$@ -endef - -# Instantiate the rule function make-st-rule() for each Eigen implementation. -$(foreach dt,$(DTS), \ -$(foreach tr,$(TRANS), \ -$(foreach st,$(STORS), \ -$(foreach sh,$(SHAPES), \ -$(foreach sm,$(SMS), \ -$(foreach sn,$(SNS), \ -$(foreach sk,$(SKS), \ -$(foreach impl,$(EIMPLS), \ -$(eval $(call make-eigmt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl))))))))))) - - -# -- Executable file rules -- - -# NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS -# on the link command line in case BLIS was configured with the BLAS -# compatibility layer. This prevents BLIS from inadvertently getting called -# for the BLAS routines we are trying to test with. - -test_%_blissup_st.x: test_%_blissup_st.o $(LIBBLIS_LINK) - $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) - -test_%_blislpab_st.x: test_%_blislpab_st.o $(LIBBLIS_LINK) - $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) - -test_%_eigen_st.x: test_%_eigen_st.o $(LIBBLIS_LINK) - $(CXX) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) - -test_%_openblas_st.x: test_%_openblas_st.o $(LIBBLIS_LINK) - $(CC) $(strip $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) - -test_%_blasfeo_st.x: test_%_blasfeo_st.o $(LIBBLIS_LINK) - $(CC) $(strip $< $(BLASFEO_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) - -test_%_libxsmm_st.x: test_%_libxsmm_st.o $(LIBBLIS_LINK) - $(CC) $(strip $< $(LIBXSMM_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) - -test_%_vendor_st.x: test_%_vendor_st.o $(LIBBLIS_LINK) - $(CC) $(strip $< $(VENDOR_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) - - -test_%_blissup_mt.x: test_%_blissup_mt.o $(LIBBLIS_LINK) - $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) - -test_%_blislpab_mt.x: test_%_blislpab_mt.o $(LIBBLIS_LINK) - $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) - -test_%_eigen_mt.x: test_%_eigen_mt.o $(LIBBLIS_LINK) - $(CXX) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) - -test_%_openblas_mt.x: test_%_openblas_mt.o $(LIBBLIS_LINK) - $(CC) $(strip $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) - -test_%_vendor_mt.x: test_%_vendor_mt.o $(LIBBLIS_LINK) - $(CC) $(strip $< $(VENDORP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) - - -# -- Clean rules -- - -clean: cleanx - -cleanx: - - $(RM_F) *.x *.o - diff --git a/test/supmt/octave/runthese.m b/test/supmt/octave/runthese.m deleted file mode 100644 index 5946d4796..000000000 --- a/test/supmt/octave/runthese.m +++ /dev/null @@ -1,12 +0,0 @@ - -% haswell -plot_panel_trxsh(3.25,16,1,'mt','d','ccc',[ 6 8 10 ],'../results/haswell/20190823/4_800_4_mt201','has','MKL','matlab'); close; clear all; -plot_panel_trxsh(3.25,16,1,'mt','d','rrr',[ 6 8 10 ],'../results/haswell/20190823/4_800_4_mt201','has','MKL','matlab'); close; clear all; - -% kabylake -plot_panel_trxsh(3.80,16,1,'mt','d','rrr',[ 6 8 10 ],'..','kbl','MKL','matlab'); close; clear all; -plot_panel_trxsh(3.80,16,1,'mt','d','ccc',[ 6 8 10 ],'..','kbl','MKL','matlab'); close; clear all; - -% epyc -plot_panel_trxsh(3.00, 8,1,'mt','d','rrr',[ 6 8 10 ],'../results/epyc/20190826/4_800_4_mt256','epyc','MKL','matlab'); close; clear all; -plot_panel_trxsh(3.00, 8,1,'mt','d','ccc',[ 6 8 10 ],'../results/epyc/20190826/4_800_4_mt256','epyc','MKL','matlab'); close; clear all; diff --git a/test/supmt/runme.sh b/test/supmt/runme.sh deleted file mode 100755 index e878d76b0..000000000 --- a/test/supmt/runme.sh +++ /dev/null @@ -1,188 +0,0 @@ -#!/bin/bash - -# File pefixes. -exec_root="test" -out_root="output" - -sys="blis" -#sys="lonestar5" -#sys="ul252" -#sys="ul264" - -if [ ${sys} = "blis" ]; then - - export GOMP_CPU_AFFINITY="0-3" - nt=4 - -elif [ ${sys} = "lonestar5" ]; then - - export GOMP_CPU_AFFINITY="0-23" - nt=24 - -elif [ ${sys} = "ul252" ]; then - - export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64" - export GOMP_CPU_AFFINITY="0-51" - nt=52 - -elif [ ${sys} = "ul264" ]; then - - export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64" - export GOMP_CPU_AFFINITY="0-63" - nt=64 - -fi - -# Delay between test cases. -delay=0.02 - -# Threadedness to test. -threads="mt" - -# Datatypes to test. -#dts="d s" -dts="d" - -# Operations to test. -ops="gemm" - -# Transpose combintions to test. -trans="nn nt tn tt" - -# Storage combinations to test. -#stors="rrr rrc rcr rcc crr crc ccr ccc" -stors="rrr ccc" - -# Problem shapes to test. -shapes="sll lsl lls lss sls ssl lll" - -# FGVZ: figure out how to probe what's in the directory and -# execute everything that's there? -sms="6" -sns="8" -sks="10" - -# Implementations to test. -impls="vendor blissup blislpab openblas eigen" -#impls="vendor" -#impls="blissup" -#impls="blislpab" -#impls="openblas" -#impls="eigen" - -# Save a copy of GOMP_CPU_AFFINITY so that if we have to unset it, we can -# restore the value. -GOMP_CPU_AFFINITYsave=${GOMP_CPU_AFFINITY} - -# Example: test_dgemm_nn_rrc_m6npkp_blissup_st.x - -for th in ${threads}; do - - for dt in ${dts}; do - - for op in ${ops}; do - - for tr in ${trans}; do - - for st in ${stors}; do - - for sh in ${shapes}; do - - for sm in ${sms}; do - - for sn in ${sns}; do - - for sk in ${sks}; do - - for im in ${impls}; do - - if [ "${im:0:4}" = "blis" ]; then - unset OMP_NUM_THREADS - export BLIS_NUM_THREADS=${nt} - elif [ "${im}" = "openblas" ]; then - unset OMP_NUM_THREADS - export OPENBLAS_NUM_THREADS=${nt} - elif [ "${im}" = "eigen" ]; then - export OMP_NUM_THREADS=${nt} - elif [ "${im}" = "vendor" ]; then - unset OMP_NUM_THREADS - export MKL_NUM_THREADS=${nt} - fi - - # Multithreaded OpenBLAS seems to have a problem - # running properly if GOMP_CPU_AFFINITY is set. - # So we temporarily unset it here if we are about - # to execute OpenBLAS, but otherwise restore it. - if [ ${im} = "openblas" ]; then - unset GOMP_CPU_AFFINITY - else - export GOMP_CPU_AFFINITY="${GOMP_CPU_AFFINITYsave}" - fi - - # Limit execution of non-BLIS implementations to - # rrr/ccc storage cases. - if [ "${im:0:4}" != "blis" ] && \ - [ "${st}" != "rrr" ] && \ - [ "${st}" != "ccc" ]; then - continue; - fi - - # Further limit execution of libxsmm to - # ccc storage cases. - if [ "${im:0:7}" = "libxsmm" ] && \ - [ "${st}" != "ccc" ]; then - continue; - fi - - # Extract the shape chars for m, n, k. - chm=${sh:0:1} - chn=${sh:1:1} - chk=${sh:2:1} - - # Construct the shape substring (e.g. m6npkp) - shstr="" - - if [ ${chm} = "s" ]; then - shstr="${shstr}m${sm}" - else - shstr="${shstr}mp" - fi - - if [ ${chn} = "s" ]; then - shstr="${shstr}n${sn}" - else - shstr="${shstr}np" - fi - - if [ ${chk} = "s" ]; then - shstr="${shstr}k${sk}" - else - shstr="${shstr}kp" - fi - - # Ex: test_dgemm_nn_rrc_m6npkp_blissup_st.x - - # Construct the name of the test executable. - exec_name="${exec_root}_${dt}${op}_${tr}_${st}_${shstr}_${im}_${th}.x" - - # Construct the name of the output file. - out_file="${out_root}_${th}_${dt}${op}_${tr}_${st}_${shstr}_${im}.m" - - echo "Running (nt = ${nt}) ./${exec_name} > ${out_file}" - - # Run executable. - ./${exec_name} > ${out_file} - - sleep ${delay} - - done - done - done - done - done - done - done - done - done -done - diff --git a/test/supmt/test_gemm.c b/test/supmt/test_gemm.c deleted file mode 100644 index 95e9d45b2..000000000 --- a/test/supmt/test_gemm.c +++ /dev/null @@ -1,589 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019, Advanced Micro Devices, Inc. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include -#ifdef EIGEN - #define BLIS_DISABLE_BLAS_DEFS - #include "blis.h" - #include - //#include - using namespace Eigen; -#else - #include "blis.h" -#endif - -//#define PRINT - -int main( int argc, char** argv ) -{ - rntm_t rntm_g; - - bli_init(); - - // Copy the global rntm_t object in case we need it later when disabling - // sup. - bli_rntm_init_from_global( &rntm_g ); - -#ifndef ERROR_CHECK - bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); -#endif - - - dim_t n_trials = N_TRIALS; - - num_t dt = DT; - -#if 1 - dim_t p_begin = P_BEGIN; - dim_t p_max = P_MAX; - dim_t p_inc = P_INC; -#else - dim_t p_begin = 4; - dim_t p_max = 40; - dim_t p_inc = 4; -#endif - -#if 1 - dim_t m_input = M_DIM; - dim_t n_input = N_DIM; - dim_t k_input = K_DIM; -#else - p_begin = p_inc = 32; - dim_t m_input = 6; - dim_t n_input = -1; - dim_t k_input = -1; -#endif - -#if 1 - trans_t transa = TRANSA; - trans_t transb = TRANSB; -#else - trans_t transa = BLIS_NO_TRANSPOSE; - trans_t transb = BLIS_NO_TRANSPOSE; -#endif - -#if 1 - stor3_t sc = STOR3; -#else - stor3_t sc = BLIS_RRR; -#endif - - - inc_t rs_c, cs_c; - inc_t rs_a, cs_a; - inc_t rs_b, cs_b; - - if ( sc == BLIS_RRR ) { rs_c = cs_c = -1; rs_a = cs_a = -1; rs_b = cs_b = -1; } - else if ( sc == BLIS_RRC ) { rs_c = cs_c = -1; rs_a = cs_a = -1; rs_b = cs_b = 0; } - else if ( sc == BLIS_RCR ) { rs_c = cs_c = -1; rs_a = cs_a = 0; rs_b = cs_b = -1; } - else if ( sc == BLIS_RCC ) { rs_c = cs_c = -1; rs_a = cs_a = 0; rs_b = cs_b = 0; } - else if ( sc == BLIS_CRR ) { rs_c = cs_c = 0; rs_a = cs_a = -1; rs_b = cs_b = -1; } - else if ( sc == BLIS_CRC ) { rs_c = cs_c = 0; rs_a = cs_a = -1; rs_b = cs_b = 0; } - else if ( sc == BLIS_CCR ) { rs_c = cs_c = 0; rs_a = cs_a = 0; rs_b = cs_b = -1; } - else if ( sc == BLIS_CCC ) { rs_c = cs_c = 0; rs_a = cs_a = 0; rs_b = cs_b = 0; } - else { bli_abort(); } - - f77_int cbla_storage; - - if ( sc == BLIS_RRR ) cbla_storage = CblasRowMajor; - else if ( sc == BLIS_CCC ) cbla_storage = CblasColMajor; - else cbla_storage = -1; - - ( void )cbla_storage; - - - char dt_ch; - - // Choose the char corresponding to the requested datatype. - if ( bli_is_float( dt ) ) dt_ch = 's'; - else if ( bli_is_double( dt ) ) dt_ch = 'd'; - else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; - else dt_ch = 'z'; - - f77_char f77_transa; - f77_char f77_transb; - char transal, transbl; - - bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); - bli_param_map_blis_to_netlib_trans( transb, &f77_transb ); - - transal = tolower( f77_transa ); - transbl = tolower( f77_transb ); - - f77_int cbla_transa = ( transal == 'n' ? CblasNoTrans : CblasTrans ); - f77_int cbla_transb = ( transbl == 'n' ? CblasNoTrans : CblasTrans ); - - ( void )cbla_transa; - ( void )cbla_transb; - - dim_t p; - - // Begin with initializing the last entry to zero so that - // matlab allocates space for the entire array once up-front. - for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ; - - printf( "data_%s_%cgemm_%c%c_%s", THR_STR, dt_ch, - transal, transbl, STR ); - printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", - ( unsigned long )(p - p_begin)/p_inc + 1, - ( unsigned long )0, - ( unsigned long )0, - ( unsigned long )0, 0.0 ); - - - //for ( p = p_begin; p <= p_max; p += p_inc ) - for ( p = p_max; p_begin <= p; p -= p_inc ) - { - obj_t a, b, c; - obj_t c_save; - obj_t alpha, beta; - dim_t m, n, k; - - if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); - else m = ( dim_t ) m_input; - if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); - else n = ( dim_t ) n_input; - if ( k_input < 0 ) k = p / ( dim_t )abs(k_input); - else k = ( dim_t ) k_input; - - bli_obj_create( dt, 1, 1, 0, 0, &alpha ); - bli_obj_create( dt, 1, 1, 0, 0, &beta ); - - bli_obj_create( dt, m, n, rs_c, cs_c, &c ); - bli_obj_create( dt, m, n, rs_c, cs_c, &c_save ); - - if ( bli_does_notrans( transa ) ) - bli_obj_create( dt, m, k, rs_a, cs_a, &a ); - else - bli_obj_create( dt, k, m, rs_a, cs_a, &a ); - - if ( bli_does_notrans( transb ) ) - bli_obj_create( dt, k, n, rs_b, cs_b, &b ); - else - bli_obj_create( dt, n, k, rs_b, cs_b, &b ); - - bli_randm( &a ); - bli_randm( &b ); - bli_randm( &c ); - - bli_obj_set_conjtrans( transa, &a ); - bli_obj_set_conjtrans( transb, &b ); - - bli_setsc( (1.0/1.0), 0.0, &alpha ); - bli_setsc( (1.0/1.0), 0.0, &beta ); - - bli_copym( &c, &c_save ); - -#ifdef EIGEN - double alpha_r, alpha_i; - - bli_getsc( &alpha, &alpha_r, &alpha_i ); - - void* ap = bli_obj_buffer_at_off( &a ); - void* bp = bli_obj_buffer_at_off( &b ); - void* cp = bli_obj_buffer_at_off( &c ); - - const int os_a = ( bli_obj_is_col_stored( &a ) ? bli_obj_col_stride( &a ) - : bli_obj_row_stride( &a ) ); - const int os_b = ( bli_obj_is_col_stored( &b ) ? bli_obj_col_stride( &b ) - : bli_obj_row_stride( &b ) ); - const int os_c = ( bli_obj_is_col_stored( &c ) ? bli_obj_col_stride( &c ) - : bli_obj_row_stride( &c ) ); - - Stride stride_a( os_a, 1 ); - Stride stride_b( os_b, 1 ); - Stride stride_c( os_c, 1 ); - - #if defined(IS_FLOAT) - #elif defined (IS_DOUBLE) - #ifdef A_STOR_R - typedef Matrix MatrixXd_A; - #else - typedef Matrix MatrixXd_A; - #endif - #ifdef B_STOR_R - typedef Matrix MatrixXd_B; - #else - typedef Matrix MatrixXd_B; - #endif - #ifdef C_STOR_R - typedef Matrix MatrixXd_C; - #else - typedef Matrix MatrixXd_C; - #endif - - #ifdef A_NOTRANS // A is not transposed - Map > A( ( double* )ap, m, k, stride_a ); - #else // A is transposed - Map > A( ( double* )ap, k, m, stride_a ); - #endif - - #ifdef B_NOTRANS // B is not transposed - Map > B( ( double* )bp, k, n, stride_b ); - #else // B is transposed - Map > B( ( double* )bp, n, k, stride_b ); - #endif - - Map > C( ( double* )cp, m, n, stride_c ); - #endif -#endif - - - double dtime_save = DBL_MAX; - - for ( dim_t r = 0; r < n_trials; ++r ) - { - bli_copym( &c_save, &c ); - - - double dtime = bli_clock(); - - -#ifdef EIGEN - - #ifdef A_NOTRANS - #ifdef B_NOTRANS - C.noalias() += alpha_r * A * B; - #else // B_TRANS - C.noalias() += alpha_r * A * B.transpose(); - #endif - #else // A_TRANS - #ifdef B_NOTRANS - C.noalias() += alpha_r * A.transpose() * B; - #else // B_TRANS - C.noalias() += alpha_r * A.transpose() * B.transpose(); - #endif - #endif - -#endif -#ifdef BLIS - #ifdef SUP - // Allow sup. - bli_gemm( &alpha, - &a, - &b, - &beta, - &c ); - #else - // Disable sup and use the expert interface. - //rntm_t rntm = BLIS_RNTM_INITIALIZER; - rntm_t rntm = rntm_g; - bli_rntm_disable_l3_sup( &rntm ); - - bli_gemm_ex( &alpha, - &a, - &b, - &beta, - &c, NULL, &rntm ); - #endif -#endif -#ifdef BLAS - if ( bli_is_float( dt ) ) - { - f77_int mm = bli_obj_length( &c ); - f77_int kk = bli_obj_width_after_trans( &a ); - f77_int nn = bli_obj_width( &c ); - f77_int lda = bli_obj_col_stride( &a ); - f77_int ldb = bli_obj_col_stride( &b ); - f77_int ldc = bli_obj_col_stride( &c ); - float* alphap = ( float* )bli_obj_buffer( &alpha ); - float* ap = ( float* )bli_obj_buffer( &a ); - float* bp = ( float* )bli_obj_buffer( &b ); - float* betap = ( float* )bli_obj_buffer( &beta ); - float* cp = ( float* )bli_obj_buffer( &c ); - - #ifdef XSMM - libxsmm_sgemm( &f77_transa, - #else - sgemm_( &f77_transa, - #endif - &f77_transb, - &mm, - &nn, - &kk, - alphap, - ap, &lda, - bp, &ldb, - betap, - cp, &ldc ); - } - else if ( bli_is_double( dt ) ) - { - f77_int mm = bli_obj_length( &c ); - f77_int kk = bli_obj_width_after_trans( &a ); - f77_int nn = bli_obj_width( &c ); - f77_int lda = bli_obj_col_stride( &a ); - f77_int ldb = bli_obj_col_stride( &b ); - f77_int ldc = bli_obj_col_stride( &c ); - double* alphap = ( double* )bli_obj_buffer( &alpha ); - double* ap = ( double* )bli_obj_buffer( &a ); - double* bp = ( double* )bli_obj_buffer( &b ); - double* betap = ( double* )bli_obj_buffer( &beta ); - double* cp = ( double* )bli_obj_buffer( &c ); - - #ifdef XSMM - libxsmm_dgemm( &f77_transa, - #else - dgemm_( &f77_transa, - #endif - &f77_transb, - &mm, - &nn, - &kk, - alphap, - ap, &lda, - bp, &ldb, - betap, - cp, &ldc ); - } - else if ( bli_is_scomplex( dt ) ) - { - f77_int mm = bli_obj_length( &c ); - f77_int kk = bli_obj_width_after_trans( &a ); - f77_int nn = bli_obj_width( &c ); - f77_int lda = bli_obj_col_stride( &a ); - f77_int ldb = bli_obj_col_stride( &b ); - f77_int ldc = bli_obj_col_stride( &c ); - scomplex* alphap = ( scomplex* )bli_obj_buffer( &alpha ); - scomplex* ap = ( scomplex* )bli_obj_buffer( &a ); - scomplex* bp = ( scomplex* )bli_obj_buffer( &b ); - scomplex* betap = ( scomplex* )bli_obj_buffer( &beta ); - scomplex* cp = ( scomplex* )bli_obj_buffer( &c ); - - #ifdef XSMM - libxsmm_cgemm( &f77_transa, - #else - cgemm_( &f77_transa, - #endif - &f77_transb, - &mm, - &nn, - &kk, - alphap, - ap, &lda, - bp, &ldb, - betap, - cp, &ldc ); - } - else if ( bli_is_dcomplex( dt ) ) - { - f77_int mm = bli_obj_length( &c ); - f77_int kk = bli_obj_width_after_trans( &a ); - f77_int nn = bli_obj_width( &c ); - f77_int lda = bli_obj_col_stride( &a ); - f77_int ldb = bli_obj_col_stride( &b ); - f77_int ldc = bli_obj_col_stride( &c ); - dcomplex* alphap = ( dcomplex* )bli_obj_buffer( &alpha ); - dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a ); - dcomplex* bp = ( dcomplex* )bli_obj_buffer( &b ); - dcomplex* betap = ( dcomplex* )bli_obj_buffer( &beta ); - dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c ); - - #ifdef XSMM - libxsmm_zgemm( &f77_transa, - #else - zgemm_( &f77_transa, - #endif - &f77_transb, - &mm, - &nn, - &kk, - alphap, - ap, &lda, - bp, &ldb, - betap, - cp, &ldc ); - } -#endif -#ifdef CBLAS - if ( bli_is_float( dt ) ) - { - f77_int mm = bli_obj_length( &c ); - f77_int kk = bli_obj_width_after_trans( &a ); - f77_int nn = bli_obj_width( &c ); - #ifdef C_STOR_R - f77_int lda = bli_obj_row_stride( &a ); - f77_int ldb = bli_obj_row_stride( &b ); - f77_int ldc = bli_obj_row_stride( &c ); - #else - f77_int lda = bli_obj_col_stride( &a ); - f77_int ldb = bli_obj_col_stride( &b ); - f77_int ldc = bli_obj_col_stride( &c ); - #endif - float* alphap = bli_obj_buffer( &alpha ); - float* ap = bli_obj_buffer( &a ); - float* bp = bli_obj_buffer( &b ); - float* betap = bli_obj_buffer( &beta ); - float* cp = bli_obj_buffer( &c ); - - cblas_sgemm( cbla_storage, - cbla_transa, - cbla_transb, - mm, - nn, - kk, - *alphap, - ap, lda, - bp, ldb, - *betap, - cp, ldc ); - } - else if ( bli_is_double( dt ) ) - { - f77_int mm = bli_obj_length( &c ); - f77_int kk = bli_obj_width_after_trans( &a ); - f77_int nn = bli_obj_width( &c ); - #ifdef C_STOR_R - f77_int lda = bli_obj_row_stride( &a ); - f77_int ldb = bli_obj_row_stride( &b ); - f77_int ldc = bli_obj_row_stride( &c ); - #else - f77_int lda = bli_obj_col_stride( &a ); - f77_int ldb = bli_obj_col_stride( &b ); - f77_int ldc = bli_obj_col_stride( &c ); - #endif - double* alphap = bli_obj_buffer( &alpha ); - double* ap = bli_obj_buffer( &a ); - double* bp = bli_obj_buffer( &b ); - double* betap = bli_obj_buffer( &beta ); - double* cp = bli_obj_buffer( &c ); - - cblas_dgemm( cbla_storage, - cbla_transa, - cbla_transb, - mm, - nn, - kk, - *alphap, - ap, lda, - bp, ldb, - *betap, - cp, ldc ); - } - else if ( bli_is_scomplex( dt ) ) - { - f77_int mm = bli_obj_length( &c ); - f77_int kk = bli_obj_width_after_trans( &a ); - f77_int nn = bli_obj_width( &c ); - #ifdef C_STOR_R - f77_int lda = bli_obj_row_stride( &a ); - f77_int ldb = bli_obj_row_stride( &b ); - f77_int ldc = bli_obj_row_stride( &c ); - #else - f77_int lda = bli_obj_col_stride( &a ); - f77_int ldb = bli_obj_col_stride( &b ); - f77_int ldc = bli_obj_col_stride( &c ); - #endif - scomplex* alphap = bli_obj_buffer( &alpha ); - scomplex* ap = bli_obj_buffer( &a ); - scomplex* bp = bli_obj_buffer( &b ); - scomplex* betap = bli_obj_buffer( &beta ); - scomplex* cp = bli_obj_buffer( &c ); - - cblas_cgemm( cbla_storage, - cbla_transa, - cbla_transb, - mm, - nn, - kk, - alphap, - ap, lda, - bp, ldb, - betap, - cp, ldc ); - } - else if ( bli_is_dcomplex( dt ) ) - { - f77_int mm = bli_obj_length( &c ); - f77_int kk = bli_obj_width_after_trans( &a ); - f77_int nn = bli_obj_width( &c ); - #ifdef C_STOR_R - f77_int lda = bli_obj_row_stride( &a ); - f77_int ldb = bli_obj_row_stride( &b ); - f77_int ldc = bli_obj_row_stride( &c ); - #else - f77_int lda = bli_obj_col_stride( &a ); - f77_int ldb = bli_obj_col_stride( &b ); - f77_int ldc = bli_obj_col_stride( &c ); - #endif - dcomplex* alphap = bli_obj_buffer( &alpha ); - dcomplex* ap = bli_obj_buffer( &a ); - dcomplex* bp = bli_obj_buffer( &b ); - dcomplex* betap = bli_obj_buffer( &beta ); - dcomplex* cp = bli_obj_buffer( &c ); - - cblas_zgemm( cbla_storage, - cbla_transa, - cbla_transb, - mm, - nn, - kk, - alphap, - ap, lda, - bp, ldb, - betap, - cp, ldc ); - } -#endif - - dtime_save = bli_clock_min_diff( dtime_save, dtime ); - } - - double gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); - - if ( bli_is_complex( dt ) ) gflops *= 4.0; - - printf( "data_%s_%cgemm_%c%c_%s", THR_STR, dt_ch, - transal, transbl, STR ); - printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", - ( unsigned long )(p - p_begin)/p_inc + 1, - ( unsigned long )m, - ( unsigned long )n, - ( unsigned long )k, gflops ); - - bli_obj_free( &alpha ); - bli_obj_free( &beta ); - - bli_obj_free( &a ); - bli_obj_free( &b ); - bli_obj_free( &c ); - bli_obj_free( &c_save ); - } - - //bli_finalize(); - - return 0; -} -