Merged test/sup, test/supmt into test/sup.

Details:
- Updated the Makefile, test_gemm.c, and runme.sh in test/sup to be able
  to compile and run both single-threaded and multithreaded experiments.
  This should help with maintenance going forward.
- Created a test/sup/octave_st directory of scripts (based on the
  previous test/sup/octave scripts) as well as a test/sup/octave_mt
  directory (based on the previous test/supmt/octave scripts). The
  octave scripts are slightly different and not easily mergeable, and
  thus for now I'll maintain them separately.
- Preserved the previous test/sup directory as test/sup/old/supst and
  the previous test/supmt directory as test/sup/old/supmt.

Change-Id: Ia230fc65185fd9a34eec714721004aa9e0bd40ed
This commit is contained in:
Field G. Van Zee
2020-03-10 13:55:29 -05:00
committed by dzambare
parent 01e1a41c95
commit 26cd966af7
19 changed files with 260 additions and 1539 deletions

View File

@@ -505,6 +505,7 @@ eigen-mt: check-env $(EIGEN_MT_BINS)
openblas-mt: check-env $(OPENBLAS_MT_BINS)
vendor-mt: check-env $(VENDOR_MT_BINS)
# -- Multithreaded --
# --- Object file rules --------------------------------------------------------
@@ -540,6 +541,29 @@ $(eval $(call make-st-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(ld),$(imp
# -- Multithreaded BLAS --
# -- Multithreaded BLAS --
# Define the function that will be used to instantiate compilation rules
# for the various multithreaded implementations.
define make-mt-rule
test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_mt.o: test_gemm.c Makefile
$(CC) $(CFLAGS) $(ERRCHK) $(N_TRIALS) $(call get-pdefs,$(4)) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_MT) -c $$< -o $$@
endef
# Instantiate the rule function make-mt-rule() for each BLIS/BLAS/CBLAS
# implementation.
$(foreach dt,$(DTS), \
$(foreach tr,$(TRANS), \
$(foreach st,$(STORS), \
$(foreach sh,$(SHAPES), \
$(foreach sm,$(SMS_MT), \
$(foreach sn,$(SNS_MT), \
$(foreach sk,$(SKS_MT), \
$(foreach impl,$(BIMPLS_MT), \
$(eval $(call make-mt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl)))))))))))
# -- Single-threaded Eigen --
# Define the function that will be used to instantiate compilation rules
# for the various multithreaded implementations.
define make-mt-rule
@@ -602,6 +626,26 @@ $(foreach ld,$(LDIMS), \
$(foreach impl,$(EIMPLS), \
$(eval $(call make-eigmt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(ld),$(impl))))))))))))
# -- Multithreaded Eigen --
# Define the function that will be used to instantiate compilation rules
# for the multithreaded Eigen implementation.
define make-eigmt-rule
test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_mt.o: test_gemm.c Makefile
$(CXX) $(CXXFLAGS_MT) $(ERRCHK) $(N_TRIALS) $(call get-pdefs,$(4)) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_MT) -c $$< -o $$@
endef
# Instantiate the rule function make-st-rule() for each Eigen implementation.
$(foreach dt,$(DTS), \
$(foreach tr,$(TRANS), \
$(foreach st,$(STORS), \
$(foreach sh,$(SHAPES), \
$(foreach sm,$(SMS_MT), \
$(foreach sn,$(SNS_MT), \
$(foreach sk,$(SKS_MT), \
$(foreach impl,$(EIMPLS), \
$(eval $(call make-eigmt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl)))))))))))
# --- Executable file rules ----------------------------------------------------

View File

@@ -1,8 +0,0 @@
% kabylake
plot_panel_trxsh(3.8,16,1,'st','d','rrr',[ 6 8 4 ],'../results/kabylake/20190619/4_800_4_mt201','kbl','MKL','matlab'); close; clear all;
plot_panel_trxsh(3.8,16,1,'st','d','ccc',[ 6 8 4 ],'../results/kabylake/20190619/4_800_4_mt201','kbl','MKL','matlab'); close; clear all;
% epyc
plot_panel_trxsh(3.0,8,1,'st','d','rrr',[ 6 8 4 ],'../results/epyc/20190619/4_800_4_mt256','epyc','MKL','matlab'); close; clear all;
plot_panel_trxsh(3.0,8,1,'st','d','ccc',[ 6 8 4 ],'../results/epyc/20190619/4_800_4_mt256','epyc','MKL','matlab'); close; clear all;

View File

@@ -9,6 +9,7 @@ function r_val = plot_l3sup_perf( opname, ...
cfreq, ...
dfps, ...
theid, impl )
%if ... %mod(theid-1,cols) == 2 || ...
% ... %mod(theid-1,cols) == 3 || ...
% ... %mod(theid-1,cols) == 4 || ...
@@ -19,11 +20,11 @@ function r_val = plot_l3sup_perf( opname, ...
%end
%legend_plot_id = 11;
legend_plot_id = 1*cols + 1*5;
legend_plot_id = 0*cols + 1*6;
if 1
ax1 = subplot( rows, cols, theid );
hold( ax1, 'on' );
ax1 = subplot( rows, cols, theid );
hold( ax1, 'on' );
end
% Set line properties.
@@ -77,9 +78,9 @@ end
flopscol = size( data_blissup, 2 );
msize = 5;
if 1
fontsize = 11;
fontsize = 12;
else
fontsize = 16;
fontsize = 16;
end
linesize = 0.5;
legend_loc = 'southeast';
@@ -95,13 +96,14 @@ for psize_col = 1:3
end
x_axis( :, 1 ) = data_blissup( :, psize_col );
% Compute the number of data points we have in the x-axis. Note that
% we only use quarter the data points for the m = n = k column of graphs.
if mod(theid-1,cols) == 6
np = size( data_blissup, 1 ) / 4;
else
np = size( data_blissup, 1 );
end
% Compute the number of data points we have in the x-axis. Note that we
% only use half the data points for the m = n = k column of graphs.
%if mod(theid-1,cols) == 6
% np = size( data_blissup, 1 ) / 2;
%else
% np = size( data_blissup, 1 );
%end
np = size( data_blissup, 1 );
% Grab the last x-axis value.
x_end = data_blissup( np, psize_col );
@@ -125,8 +127,7 @@ open_ln = line( x_axis( 1:np, 1 ), data_open( 1:np, flopscol ) / nth, ...
vend_ln = line( x_axis( 1:np, 1 ), data_vend( 1:np, flopscol ) / nth, ...
'Color',color_vend, 'LineStyle',lines_vend, ...
'LineWidth',linesize );
else
if theid == legend_plot_id
elseif theid == legend_plot_id
blissup_ln = line( nan, nan, ...
'Color',color_blissup, 'LineStyle',lines_blissup, ...
'LineWidth',linesize );
@@ -143,15 +144,28 @@ vend_ln = line( nan, nan, ...
'Color',color_vend, 'LineStyle',lines_vend, ...
'LineWidth',linesize );
end
end
xlim( ax1, [x_begin x_end] );
ylim( ax1, [y_begin y_end] );
if 6000 <= x_end && x_end < 10000
if mod(theid-1,cols) == 3 || mod(theid-1,cols) == 4 || mod(theid-1,cols) == 5
if nth == 12
ylim( ax1, [y_begin y_end/2] );
elseif nth > 12
ylim( ax1, [y_begin y_end/6] );
end
end
if 10000 <= x_end && x_end < 15000
x_tick2 = x_end - 2000;
x_tick1 = x_tick2/2;
%xticks( ax1, [ x_tick1 x_tick2 ] );
xticks( ax1, [ 4000 8000 12000 ] );
elseif 6000 <= x_end && x_end < 10000
x_tick2 = x_end - 2000;
x_tick1 = x_tick2/2;
%xticks( ax1, [ x_tick1 x_tick2 ] );
xticks( ax1, [ x_tick1 x_tick2 ] );
elseif 4000 <= x_end && x_end < 6000
x_tick2 = x_end - 1000;
@@ -188,11 +202,12 @@ if show_plot == 1 || theid == legend_plot_id
set( leg,'Color','none' );
set( leg,'Units','inches' );
if impl == 'octave'
set( leg,'FontSize',fontsize );
set( leg,'Position',[12.50 10.35 1.5 0.9 ] ); % (1,4tl)
set( leg,'FontSize',fontsize );
%set( leg,'Position',[12.40 10.60 1.9 0.95 ] ); % (1,4tl)
set( leg,'Position',[18.80 10.60 1.9 0.95 ] ); % (1,4tl)
else
set( leg,'FontSize',fontsize-1 );
set( leg,'Position',[18.24 10.15 1.15 0.7 ] ); % (1,4tl)
set( leg,'FontSize',fontsize-1 );
set( leg,'Position',[18.24 10.15 1.15 0.7 ] ); % (1,4tl)
end
set( leg,'Box','off' );
set( leg,'Color','none' );
@@ -209,17 +224,31 @@ box( ax1, 'on' );
titl = title( titlename );
set( titl, 'FontWeight', 'normal' ); % default font style is now 'bold'.
% The default is to align the plot title across whole figure, not the box.
% This is a hack to nudge the title back to the center of the box.
if impl == 'octave'
tpos = get( titl, 'Position' ); % default is to align across whole figure, not box.
tpos(1) = tpos(1) + -40;
set( titl, 'Position', tpos ); % here we nudge it back to centered with box.
tpos = get( titl, 'Position' );
% For some reason, the titles in the graphs in the last column start
% off in a different relative position than the graphs in the other
% columns. Here, we manually account for that.
if mod(theid-1,cols) == 6
tpos(1) = tpos(1) + -10;
else
tpos(1) = tpos(1) + -40;
end
set( titl, 'Position', tpos );
set( titl, 'FontSize', fontsize );
else % impl == 'matlab'
tpos = get( titl, 'Position' );
tpos(1) = tpos(1) + 90;
set( titl, 'Position', tpos );
end
if theid > (rows-1)*cols
%xlab = xlabel( ax1,xaxisname );
%tpos = get( xlab, 'Position' )
%tpos(2) = tpos(2) + 10;
%set( xlab, 'Position', tpos );
%xlab = xlabel( ax1,xaxisname );
%tpos = get( xlab, 'Position' )
%tpos(2) = tpos(2) + 10;
%set( xlab, 'Position', tpos );
if theid == rows*cols - 6
xlab = xlabel( ax1, 'm = 6; n = k' );
elseif theid == rows*cols - 5
@@ -238,13 +267,8 @@ if theid > (rows-1)*cols
end
if mod(theid-1,cols) == 0
ylab = ylabel( ax1,yaxisname );
ylab = ylabel( ax1,yaxisname );
end
%export_fig( filename, colorflag, '-pdf', '-m2', '-painters', '-transparent' );
%saveas( fig, filename_png );
%hold( ax1, 'off' );
r_val = 0;

View File

@@ -0,0 +1,8 @@
% kabylake
plot_panel_trxsh(3.80,16,4,'mt','d','rrr',[ 6 8 10 ],'../results/kabylake/20200302/mnkt100000_mt4','kbl','MKL','octave'); close; clear all;
% haswell
plot_panel_trxsh(3.1,16,12,'mt','d','rrr',[ 6 8 10 ],'../results/haswell/20200302/mnkt100000_mt12','has','MKL','octave'); close; clear all;
% epyc
plot_panel_trxsh(2.55,8,32,'mt','d','rrr',[ 6 8 10 ],'../results/epyc/20200302/mnkt100000_mt32','epyc','MKL','octave'); close; clear all;

View File

@@ -1,50 +1,29 @@
function r_val = plot_l3sup_perf( opname, ...
smalldims, ...
data_blissup, ...
data_blisconv, ...
data_blislpab, ...
data_eigen, ...
data_open, ...
data_bfeo, ...
data_vend, vend_str, ...
data_bfeo, ...
data_xsmm, ...
data_vend, vend_str, ...
nth, ...
rows, cols, ...
cfreq, ...
dfps, ...
theid, impl )
% Define the column in which the performance rates are found.
flopscol = size( data_blissup, 2 );
% Check if blasfeo data is available.
has_bfeo = 1;
if data_bfeo( 1, flopscol ) == 0.0
has_bfeo = 0;
end
% Check if libxsmm data is available.
has_xsmm = 1;
if data_xsmm( 1, flopscol ) == 0.0
has_xsmm = 0;
end
% Define which plot id will have the legend.
% NOTE: We can draw the legend on any graph as long as it has already been
% rendered. Since the coordinates are global, we can simply always wait until
% the final graph to draw the legend.
%if nth == 1
% if has_xsmm == 1
% legend_plot_id = 2*cols + 1*5;
% else
% legend_plot_id = 1*cols + 1*5;
% end
%if ... %mod(theid-1,cols) == 2 || ...
% ... %mod(theid-1,cols) == 3 || ...
% ... %mod(theid-1,cols) == 4 || ...
% 0 == 1 ... %theid >= 19
% show_plot = 0;
%else
% legend_plot_id = 0*cols + 1*6;
show_plot = 1;
%end
legend_plot_id = cols*rows;
% Hold the axes.
%legend_plot_id = 11;
legend_plot_id = 2*cols + 1*5;
if 1
ax1 = subplot( rows, cols, theid );
hold( ax1, 'on' );
@@ -52,13 +31,12 @@ end
% Set line properties.
color_blissup = 'k'; lines_blissup = '-'; markr_blissup = '';
color_blisconv = 'k'; lines_blisconv = ':'; markr_blisconv = '';
color_blislpab = 'k'; lines_blislpab = ':'; markr_blislpab = '';
color_eigen = 'm'; lines_eigen = '-.'; markr_eigen = 'o';
color_open = 'r'; lines_open = '--'; markr_open = 'o';
color_bfeo = 'c'; lines_bfeo = '-'; markr_bfeo = 'o';
color_vend = 'b'; lines_vend = '-.'; markr_vend = '.';
color_bfeo = 'c'; lines_bfeo = '-'; markr_bfeo = 'o';
color_xsmm = 'g'; lines_xsmm = '-'; markr_xsmm = 'o';
color_vend = 'b'; lines_vend = '-.'; markr_vend = '.';
% Compute the peak performance in terms of the number of double flops
% executable per cycle and the clock rate.
@@ -77,13 +55,15 @@ titlename = '%s';
titlename = sprintf( titlename, title_opname );
% Set the legend strings.
blissup_lg = sprintf( 'BLIS sup' );
blisconv_lg = sprintf( 'BLIS conv' );
eigen_lg = sprintf( 'Eigen' );
open_lg = sprintf( 'OpenBLAS' );
vend_lg = vend_str;
bfeo_lg = sprintf( 'BLASFEO' );
xsmm_lg = sprintf( 'libxsmm' );
blissup_legend = sprintf( 'BLIS sup' );
blislpab_legend = sprintf( 'BLIS conv' );
eigen_legend = sprintf( 'Eigen' );
open_legend = sprintf( 'OpenBLAS' );
bfeo_legend = sprintf( 'BLASFEO' );
xsmm_legend = sprintf( 'libxsmm' );
%vend_legend = sprintf( 'MKL' );
%vend_legend = sprintf( 'ARMPL' );
vend_legend = vend_str;
% Set axes range values.
y_scale = 1.00;
@@ -101,6 +81,7 @@ end
%flopscol = 4;
flopscol = size( data_blissup, 2 );
msize = 5;
if 1
fontsize = 12;
@@ -130,44 +111,67 @@ x_axis( :, 1 ) = data_blissup( :, psize_col );
%end
np = size( data_blissup, 1 );
has_xsmm = 1;
if data_xsmm( 1, flopscol ) == 0.0
has_xsmm = 0;
end
% Grab the last x-axis value.
x_end = data_blissup( np, psize_col );
%data_peak( 1, 1:2 ) = [ 0 max_perf_core ];
%data_peak( 2, 1:2 ) = [ x_end max_perf_core ];
if show_plot == 1
blissup_ln = line( x_axis( 1:np, 1 ), data_blissup( 1:np, flopscol ) / nth, ...
'Color',color_blissup, 'LineStyle',lines_blissup, ...
'LineWidth',linesize );
blisconv_ln = line( x_axis( 1:np, 1 ), data_blisconv( 1:np, flopscol ) / nth, ...
'Color',color_blisconv, 'LineStyle',lines_blisconv, ...
'LineWidth',linesize );
'Color',color_blissup, 'LineStyle',lines_blissup, ...
'LineWidth',linesize );
blislpab_ln = line( x_axis( 1:np, 1 ), data_blislpab( 1:np, flopscol ) / nth, ...
'Color',color_blislpab, 'LineStyle',lines_blislpab, ...
'LineWidth',linesize );
eigen_ln = line( x_axis( 1:np, 1 ), data_eigen( 1:np, flopscol ) / nth, ...
'Color',color_eigen, 'LineStyle',lines_eigen, ...
'LineWidth',linesize );
'Color',color_eigen, 'LineStyle',lines_eigen, ...
'LineWidth',linesize );
open_ln = line( x_axis( 1:np, 1 ), data_open( 1:np, flopscol ) / nth, ...
'Color',color_open, 'LineStyle',lines_open, ...
'LineWidth',linesize );
vend_ln = line( x_axis( 1:np, 1 ), data_vend( 1:np, flopscol ) / nth, ...
'Color',color_vend, 'LineStyle',lines_vend, ...
'LineWidth',linesize );
if has_bfeo == 1
bfeo_ln = line( x_axis( 1:np, 1 ), data_bfeo( 1:np, flopscol ) / nth, ...
'Color',color_bfeo, 'LineStyle',lines_bfeo, ...
'LineWidth',linesize );
else
bfeo_ln = line( nan, nan, ...
'Color',color_bfeo, 'LineStyle',lines_bfeo, ...
'LineWidth',linesize );
end
'Color',color_open, 'LineStyle',lines_open, ...
'LineWidth',linesize );
bfeo_ln = line( x_axis( 1:np, 1 ), data_bfeo( 1:np, flopscol ) / nth, ...
'Color',color_bfeo, 'LineStyle',lines_bfeo, ...
'LineWidth',linesize );
if has_xsmm == 1
xsmm_ln = line( x_axis( 1:np, 1 ), data_xsmm( 1:np, flopscol ) / nth, ...
'Color',color_xsmm, 'LineStyle',lines_xsmm, ...
'LineWidth',linesize );
xsmm_ln = line( x_axis( 1:np, 1 ), data_xsmm( 1:np, flopscol ) / nth, ...
'Color',color_xsmm, 'LineStyle',lines_xsmm, ...
'LineWidth',linesize );
else
xsmm_ln = line( nan, nan, ...
'Color',color_xsmm, 'LineStyle',lines_xsmm, ...
'LineWidth',linesize );
xsmm_ln = line( nan, nan, ...
'Color',color_xsmm, 'LineStyle',lines_xsmm, ...
'LineWidth',linesize );
end
vend_ln = line( x_axis( 1:np, 1 ), data_vend( 1:np, flopscol ) / nth, ...
'Color',color_vend, 'LineStyle',lines_vend, ...
'LineWidth',linesize );
elseif theid == legend_plot_id
blissup_ln = line( nan, nan, ...
'Color',color_blissup, 'LineStyle',lines_blissup, ...
'LineWidth',linesize );
blislpab_ln = line( nan, nan, ...
'Color',color_blislpab, 'LineStyle',lines_blislpab, ...
'LineWidth',linesize );
eigen_ln = line( nan, nan, ...
'Color',color_eigen, 'LineStyle',lines_eigen, ...
'LineWidth',linesize );
open_ln = line( nan, nan, ...
'Color',color_open, 'LineStyle',lines_open, ...
'LineWidth',linesize );
bfeo_ln = line( nan, nan, ...
'Color',color_bfeo, 'LineStyle',lines_bfeo, ...
'LineWidth',linesize );
xsmm_ln = line( nan, nan, ...
'Color',color_xsmm, 'LineStyle',lines_xsmm, ...
'LineWidth',linesize );
vend_ln = line( nan, nan, ...
'Color',color_vend, 'LineStyle',lines_vend, ...
'LineWidth',linesize );
end
@@ -199,51 +203,71 @@ elseif 500 <= x_end && x_end < 1000
xticks( ax1, [ x_tick1 x_tick2 x_tick3 ] );
end
% xpos ypos
%set( leg,'Position',[11.32 6.36 1.15 0.7 ] ); % (1,4tl)
if nth == 1 && theid == legend_plot_id
if has_xsmm == 1
% single-threaded, with libxsmm (ccc)
leg = legend( ...
[ blissup_ln blisconv_ln eigen_ln open_ln vend_ln bfeo_ln xsmm_ln ], ...
blissup_lg, blisconv_lg, eigen_lg, open_lg, vend_lg, bfeo_lg, xsmm_lg, ...
'Location', legend_loc );
set( leg,'Box','off','Color','none','Units','inches' );
if impl == 'octave'
set( leg,'FontSize',fontsize );
set( leg,'Position',[15.35 4.62 1.9 1.20] ); % (1,4tl)
if show_plot == 1 || theid == legend_plot_id
if nth == 1 && theid == legend_plot_id
if has_xsmm == 1
leg = legend( ...
[ ...
blissup_ln ...
blislpab_ln ...
eigen_ln ...
open_ln ...
bfeo_ln ...
xsmm_ln ...
vend_ln ...
], ...
blissup_legend, ...
blislpab_legend, ...
eigen_legend, ...
open_legend, ...
bfeo_legend, ...
xsmm_legend, ...
vend_legend, ...
'Location', legend_loc );
set( leg,'Box','off' );
set( leg,'Color','none' );
set( leg,'Units','inches' );
if impl == 'octave'
set( leg,'FontSize',fontsize );
set( leg,'Position',[15.40 4.75 1.9 1.20] ); % (1,4tl)
else
set( leg,'FontSize',fontsize-3 );
set( leg,'Position',[18.20 10.20 1.15 0.7 ] ); % (1,4tl)
end
else
set( leg,'FontSize',fontsize-3 );
set( leg,'Position',[18.20 10.20 1.15 0.7 ] ); % (1,4tl)
leg = legend( ...
[ ...
blissup_ln ...
blislpab_ln ...
eigen_ln ...
open_ln ...
bfeo_ln ...
vend_ln ...
], ...
blissup_legend, ...
blislpab_legend, ...
eigen_legend, ...
open_legend, ...
bfeo_legend, ...
vend_legend, ...
'Location', legend_loc );
set( leg,'Box','off' );
set( leg,'Color','none' );
set( leg,'Units','inches' );
if impl == 'octave'
set( leg,'FontSize',fontsize );
set( leg,'Position',[15.40 7.65 1.9 1.10] ); % (1,4tl)
else
set( leg,'FontSize',fontsize-1 );
set( leg,'Position',[18.24 10.15 1.15 0.7] ); % (1,4tl)
end
end
else
% single-threaded, without libxsmm (rrr, or other)
leg = legend( ...
[ blissup_ln blisconv_ln eigen_ln open_ln vend_ln bfeo_ln ], ...
blissup_lg, blisconv_lg, eigen_lg, open_lg, vend_lg, bfeo_lg, ...
'Location', legend_loc );
set( leg,'Box','off','Color','none','Units','inches' );
if impl == 'octave'
set( leg,'FontSize',fontsize );
set( leg,'Position',[15.35 7.40 1.9 1.10] ); % (1,4tl)
else
set( leg,'FontSize',fontsize-1 );
set( leg,'Position',[18.24 10.15 1.15 0.7] ); % (1,4tl)
end
end
elseif nth > 1 && theid == legend_plot_id
% multithreaded
leg = legend( ...
[ blissup_ln blisconv_ln eigen_ln open_ln vend_ln ], ...
blissup_lg, blisconv_lg, eigen_lg, open_lg, vend_lg, ...
'Location', legend_loc );
set( leg,'Box','off','Color','none','Units','inches' );
if impl == 'octave'
set( leg,'FontSize',fontsize );
set( leg,'Position',[18.20 10.30 1.9 0.95] ); % (1,4tl)
else
set( leg,'FontSize',fontsize-1 );
set( leg,'Position',[18.24 10.15 1.15 0.7] ); % (1,4tl)
set( leg,'Box','off' );
set( leg,'Color','none' );
set( leg,'Units','inches' );
% xpos ypos
%set( leg,'Position',[11.32 6.36 1.15 0.7 ] ); % (1,4tl)
elseif nth > 1 && theid == legend_plot_id
end
end
@@ -274,38 +298,28 @@ else % impl == 'matlab'
set( titl, 'Position', tpos );
end
sll_str = sprintf( 'm = %u; n = k', smalldims(1) );
lsl_str = sprintf( 'n = %u; m = k', smalldims(2) );
lls_str = sprintf( 'k = %u; m = n', smalldims(3) );
lss_str = sprintf( 'm; n = %u, k = %u', smalldims(2), smalldims(3) );
sls_str = sprintf( 'n; m = %u, k = %u', smalldims(1), smalldims(3) );
ssl_str = sprintf( 'k; m = %u, n = %u', smalldims(1), smalldims(2) );
lll_str = sprintf( 'm = n = k' );
% Place labels on the bottom row of graphs.
if theid > (rows-1)*cols
%xlab = xlabel( ax1,xaxisname );
%tpos = get( xlab, 'Position' )
%tpos(2) = tpos(2) + 10;
%set( xlab, 'Position', tpos );
if theid == rows*cols - 6
xlab = xlabel( ax1, sll_str );
xlab = xlabel( ax1, 'm = 6; n = k' );
elseif theid == rows*cols - 5
xlab = xlabel( ax1, lsl_str );
xlab = xlabel( ax1, 'n = 8; m = k' );
elseif theid == rows*cols - 4
xlab = xlabel( ax1, lls_str );
xlab = xlabel( ax1, 'k = 4; m = n' );
elseif theid == rows*cols - 3
xlab = xlabel( ax1, lss_str );
xlab = xlabel( ax1, 'm; n = 8, k = 4' );
elseif theid == rows*cols - 2
xlab = xlabel( ax1, sls_str );
xlab = xlabel( ax1, 'n; m = 6, k = 4' );
elseif theid == rows*cols - 1
xlab = xlabel( ax1, ssl_str );
xlab = xlabel( ax1, 'k; m = 6, n = 8' );
elseif theid == rows*cols - 0
xlab = xlabel( ax1, lll_str );
xlab = xlabel( ax1, 'm = n = k' );
end
end
% Place labels on the left-hand column of graphs.
if mod(theid-1,cols) == 0
ylab = ylabel( ax1,yaxisname );
end

View File

@@ -0,0 +1,8 @@
% kabylake
plot_panel_trxsh(3.80,16,1,'st','d','rrr',[ 6 8 4 ],'../results/kabylake/20200302/mnkt100000_st','kbl','MKL','octave'); close; clear all;
% haswell
plot_panel_trxsh(3.5,16,1,'st','d','rrr',[ 6 8 4 ],'../results/haswell/20200302/mnkt100000_st','has','MKL','octave'); close; clear all;
% epyc
plot_panel_trxsh(3.00, 8,1,'st','d','rrr',[ 6 8 4 ],'../results/epyc/20200302/mnkt100000_st','epyc','MKL','octave'); close; clear all;

View File

@@ -1,580 +0,0 @@
#!/bin/bash
#
# BLIS
# An object-based framework for developing high-performance BLAS-like
# libraries.
#
# Copyright (C) 2014, The University of Texas at Austin
# Copyright (C) 2019, Advanced Micro Devices, Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# - Neither the name(s) of the copyright holder(s) nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
#
# Makefile
#
# Field G. Van Zee
#
# Makefile for standalone BLIS test drivers.
#
#
# --- Makefile PHONY target definitions ----------------------------------------
#
.PHONY: all all-st all-mt \
blis blis-st blis-mt \
clean cleanx
#
# --- Determine makefile fragment location -------------------------------------
#
# Comments:
# - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given.
# - We must use recursively expanded assignment for LIB_PATH and INC_PATH in
# the second case because CONFIG_NAME is not yet set.
ifneq ($(strip $(BLIS_INSTALL_PATH)),)
LIB_PATH := $(BLIS_INSTALL_PATH)/lib
INC_PATH := $(BLIS_INSTALL_PATH)/include/blis
SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis
else
DIST_PATH := ../..
LIB_PATH = ../../lib/$(CONFIG_NAME)
INC_PATH = ../../include/$(CONFIG_NAME)
SHARE_PATH := ../..
endif
#
# --- Include common makefile definitions --------------------------------------
#
# Include the common makefile fragment.
-include $(SHARE_PATH)/common.mk
#
# --- BLAS and LAPACK implementations ------------------------------------------
#
# BLIS library and header path. This is simply wherever it was installed.
#BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib
#BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis
# BLIS library.
#BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a
# BLAS library path(s). This is where the BLAS libraries reside.
HOME_LIB_PATH := $(HOME)/flame/lib
MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64
# netlib BLAS
NETLIB_LIB := $(HOME_LIB_PATH)/libblas.a
# OpenBLAS
OPENBLAS_LIB := $(HOME_LIB_PATH)/libopenblas.a
OPENBLASP_LIB := $(HOME_LIB_PATH)/libopenblasp.a
# BLASFEO
BLASFEO_LIB := $(HOME_LIB_PATH)/libblasfeo.a
# libxsmm
LIBXSMM_LIB := $(HOME_LIB_PATH)/libxsmm.a -ldl \
$(NETLIB_LIB) -lgfortran
# ATLAS
ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \
$(HOME_LIB_PATH)/libatlas.a
# Eigen
EIGEN_INC := $(HOME)/flame/eigen/include/eigen3
EIGEN_LIB := $(HOME_LIB_PATH)/libeigen_blas_static.a
EIGENP_LIB := $(EIGEN_LIB)
# MKL
MKL_LIB := -L$(MKL_LIB_PATH) \
-lmkl_intel_lp64 \
-lmkl_core \
-lmkl_sequential \
-lpthread -lm -ldl
MKLP_LIB := -L$(MKL_LIB_PATH) \
-lmkl_intel_lp64 \
-lmkl_core \
-lmkl_gnu_thread \
-lpthread -lm -ldl -fopenmp
#-L$(ICC_LIB_PATH) \
#-lgomp
VENDOR_LIB := $(MKL_LIB)
VENDORP_LIB := $(MKLP_LIB)
#
# --- Problem size definitions -------------------------------------------------
#
# Single core
PS_BEGIN := 4
PS_MAX := 800
PS_INC := 4
# Multicore
P1_BEGIN := 8
P1_MAX := 1600
P1_INC := 8
#
# --- General build definitions ------------------------------------------------
#
TEST_SRC_PATH := .
TEST_OBJ_PATH := .
# Gather all local object files.
TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \
$(TEST_OBJ_PATH)/%.o, \
$(wildcard $(TEST_SRC_PATH)/*.c)))
# Override the value of CINCFLAGS so that the value of CFLAGS returned by
# get-frame-cflags-for() is not cluttered up with include paths needed only
# while building BLIS.
CINCFLAGS := -I$(INC_PATH)
# Use the "framework" CFLAGS for the configuration family.
CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME))
# Add local header paths to CFLAGS.
CFLAGS += -I$(TEST_SRC_PATH)
# Locate the libblis library to which we will link.
LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L)
# Define a set of CFLAGS for use with C++ and Eigen.
CXXFLAGS := $(subst -std=c99,-std=c++11,$(CFLAGS))
CXXFLAGS += -I$(EIGEN_INC)
# Create a copy of CXXFLAGS without -fopenmp in order to disable multithreading.
CXXFLAGS_ST := -march=native $(subst -fopenmp,,$(CXXFLAGS))
CXXFLAGS_MT := -march=native $(CXXFLAGS)
# Single or multithreaded string
STR_ST := -DTHR_STR=\"st\"
STR_MT := -DTHR_STR=\"mt\"
# Number of trials per problem size.
N_TRIALS := -DN_TRIALS=3
# Problem size specification
PDEF_ST := -DP_BEGIN=$(PS_BEGIN) \
-DP_MAX=$(PS_MAX) \
-DP_INC=$(PS_INC)
PDEF_MT := -DP_BEGIN=$(P1_BEGIN) \
-DP_MAX=$(P1_MAX) \
-DP_INC=$(P1_INC)
ifeq ($(E),1)
ERRCHK := -DERROR_CHECK
else
ERRCHK := -DNO_ERROR_CHECK
endif
# Enumerate possible datatypes and computation precisions.
#dts := s d c z
DTS := d
TRANS := n_n \
n_t \
t_n \
t_t
# While BLIS supports all combinations of row and column storage for matrices
# C, A, and B, the alternatives mostly only support CBLAS APIs, which inherently
# support only "all row-storage" or "all column-storage". Thus, we disable the
# building of those other drivers so that compilation/linking completes sooner.
#STORS := r_r_r \
# r_r_c \
# r_c_r \
# r_c_c \
# c_r_r \
# c_r_c \
# c_c_r \
# c_c_c
STORS := r_r_r \
c_c_c
SHAPES := l_l_s \
l_s_l \
s_l_l \
s_s_l \
s_l_s \
l_s_s \
l_l_l
SMS := 6
SNS := 8
SKS := 10
#
# --- Function definitions -----------------------------------------------------
#
# A function to strip the underscores from a list of strings.
stripu = $(subst _,,$(1))
# Various functions that help us construct the datatype combinations and then
# extract the needed datatype strings and C preprocessor define flags.
get-1of2 = $(word 1,$(subst _, ,$(1)))
get-2of2 = $(word 2,$(subst _, ,$(1)))
get-1of3 = $(word 1,$(subst _, ,$(1)))
get-2of3 = $(word 2,$(subst _, ,$(1)))
get-3of3 = $(word 3,$(subst _, ,$(1)))
# Datatype defs.
get-dt-cpp = $(strip \
$(if $(findstring s,$(1)),-DDT=BLIS_FLOAT -DIS_FLOAT,\
$(if $(findstring d,$(1)),-DDT=BLIS_DOUBLE -DIS_DOUBLE,\
$(if $(findstring c,$(1)),-DDT=BLIS_SCOMPLEX -DIS_SCOMPLEX,\
-DDT=BLIS_DCOMPLEX -DIS_DCOMPLEX))))
# Transpose defs.
get-tra-defs-a = $(strip $(subst n,-DTRANSA=BLIS_NO_TRANSPOSE -DA_NOTRANS, \
$(subst t,-DTRANSA=BLIS_TRANSPOSE -DA_TRANS,$(call get-1of2,$(1)))))
get-tra-defs-b = $(strip $(subst n,-DTRANSB=BLIS_NO_TRANSPOSE -DB_NOTRANS, \
$(subst t,-DTRANSB=BLIS_TRANSPOSE -DB_TRANS,$(call get-2of2,$(1)))))
get-tra-defs = $(call get-tra-defs-a,$(1)) $(call get-tra-defs-b,$(1))
# Storage defs.
get-sto-uch-a = $(strip $(subst r,R, \
$(subst c,C,$(call get-1of3,$(1)))))
get-sto-uch-b = $(strip $(subst r,R, \
$(subst c,C,$(call get-2of3,$(1)))))
get-sto-uch-c = $(strip $(subst r,R, \
$(subst c,C,$(call get-3of3,$(1)))))
get-sto-defs = $(strip \
-DSTOR3=BLIS_$(call get-sto-uch-a,$(1))$(call get-sto-uch-b,$(1))$(call get-sto-uch-c,$(1)) \
-DA_STOR_$(call get-sto-uch-a,$(1)) \
-DB_STOR_$(call get-sto-uch-b,$(1)) \
-DC_STOR_$(call get-sto-uch-c,$(1)))
# Dimension defs.
get-shape-defs-cm = $(if $(findstring l,$(1)),-DM_DIM=-1,-DM_DIM=$(2))
get-shape-defs-cn = $(if $(findstring l,$(1)),-DN_DIM=-1,-DN_DIM=$(2))
get-shape-defs-ck = $(if $(findstring l,$(1)),-DK_DIM=-1,-DK_DIM=$(2))
get-shape-defs-m = $(call get-shape-defs-cm,$(call get-1of3,$(1)),$(2))
get-shape-defs-n = $(call get-shape-defs-cn,$(call get-2of3,$(1)),$(2))
get-shape-defs-k = $(call get-shape-defs-ck,$(call get-3of3,$(1)),$(2))
# arguments: 1: shape (w/ underscores) 2: smallm 3: smalln 4: smallk
get-shape-defs = $(strip $(call get-shape-defs-m,$(1),$(2)) \
$(call get-shape-defs-n,$(1),$(3)) \
$(call get-shape-defs-k,$(1),$(4)))
#$(error l_l_s 6 8 4 = $(call get-shape-defs,l_l_s,6,8,4))
# Shape-dimension string.
get-shape-str-ch = $(if $(findstring l,$(1)),p,$(2))
get-shape-str-m = $(call get-shape-str-ch,$(call get-1of3,$(1)),$(2))
get-shape-str-n = $(call get-shape-str-ch,$(call get-2of3,$(1)),$(2))
get-shape-str-k = $(call get-shape-str-ch,$(call get-3of3,$(1)),$(2))
# arguments: 1: shape (w/ underscores) 2: smallm 3: smalln 4: smallk
get-shape-dim-str = m$(call get-shape-str-m,$(1),$(2))n$(call get-shape-str-n,$(1),$(3))k$(call get-shape-str-k,$(1),$(4))
# Implementation defs.
# Define a function to return the appropriate -DSTR= and -D[BLIS|BLAS] flags.
get-imp-defs = $(strip $(subst blissup,-DSTR=\"$(1)\" -DBLIS -DSUP, \
$(subst blislpab,-DSTR=\"$(1)\" -DBLIS, \
$(subst eigen,-DSTR=\"$(1)\" -DEIGEN, \
$(subst openblas,-DSTR=\"$(1)\" -DCBLAS, \
$(subst blasfeo,-DSTR=\"$(1)\" -DCBLAS, \
$(subst libxsmm,-DSTR=\"$(1)\" -DBLAS -DXSMM, \
$(subst vendor,-DSTR=\"$(1)\" -DCBLAS,$(1)))))))))
TRANS0 = $(call stripu,$(TRANS))
STORS0 = $(call stripu,$(STORS))
# Limit BLAS and Eigen to only using all row-stored, or all column-stored matrices.
# Also, limit libxsmm to using all column-stored matrices since it does not offer
# CBLAS interfaces.
BSTORS0 = rrr ccc
ESTORS0 = rrr ccc
XSTORS0 = ccc
#
# --- Object and binary file definitons ----------------------------------------
#
get-st-objs = $(foreach dt,$(1),$(foreach tr,$(2),$(foreach st,$(3),$(foreach sh,$(4),$(foreach sm,$(5),$(foreach sn,$(6),$(foreach sk,$(7),test_$(dt)gemm_$(tr)_$(st)_$(call get-shape-dim-str,$(sh),$(sm),$(sn),$(sk))_$(8)_st.o)))))))
# Build a list of object files and binaries for each single-threaded
# implementation using the get-st-objs() function defined above.
BLISSUP_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(STORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blissup)
BLISSUP_ST_BINS := $(patsubst %.o,%.x,$(BLISSUP_ST_OBJS))
BLISLPAB_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(STORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blislpab)
BLISLPAB_ST_BINS := $(patsubst %.o,%.x,$(BLISLPAB_ST_OBJS))
EIGEN_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(ESTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),eigen)
EIGEN_ST_BINS := $(patsubst %.o,%.x,$(EIGEN_ST_OBJS))
OPENBLAS_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),openblas)
OPENBLAS_ST_BINS := $(patsubst %.o,%.x,$(OPENBLAS_ST_OBJS))
BLASFEO_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blasfeo)
BLASFEO_ST_BINS := $(patsubst %.o,%.x,$(BLASFEO_ST_OBJS))
LIBXSMM_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(XSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),libxsmm)
LIBXSMM_ST_BINS := $(patsubst %.o,%.x,$(LIBXSMM_ST_OBJS))
VENDOR_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),vendor)
VENDOR_ST_BINS := $(patsubst %.o,%.x,$(VENDOR_ST_OBJS))
# Mark the object files as intermediate so that make will remove them
# automatically after building the binaries on which they depend.
.INTERMEDIATE: $(BLISSUP_ST_OBJS) \
$(BLISLPAB_ST_OBJS) \
$(EIGEN_ST_OBJS) \
$(OPENBLAS_ST_OBJS) \
$(BLASFEO_ST_OBJS) \
$(LIBXSMM_ST_OBJS) \
$(VENDOR_ST_OBJS)
get-mt-objs = $(foreach dt,$(1),$(foreach tr,$(2),$(foreach st,$(3),$(foreach sh,$(4),$(foreach sm,$(5),$(foreach sn,$(6),$(foreach sk,$(7),test_$(dt)gemm_$(tr)_$(st)_$(call get-shape-dim-str,$(sh),$(sm),$(sn),$(sk))_$(8)_mt.o)))))))
# Build a list of object files and binaries for each multithreaded
# implementation using the get-st-objs() function defined above.
BLISSUP_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(STORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blissup)
BLISSUP_MT_BINS := $(patsubst %.o,%.x,$(BLISSUP_MT_OBJS))
BLISLPAB_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(STORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blislpab)
BLISLPAB_MT_BINS := $(patsubst %.o,%.x,$(BLISLPAB_MT_OBJS))
EIGEN_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(ESTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),eigen)
EIGEN_MT_BINS := $(patsubst %.o,%.x,$(EIGEN_MT_OBJS))
OPENBLAS_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),openblas)
OPENBLAS_MT_BINS := $(patsubst %.o,%.x,$(OPENBLAS_MT_OBJS))
VENDOR_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),vendor)
VENDOR_MT_BINS := $(patsubst %.o,%.x,$(VENDOR_MT_OBJS))
#$(error "objs = $(EIGEN_ST_BINS)" )
# Mark the object files as intermediate so that make will remove them
# automatically after building the binaries on which they depend.
.INTERMEDIATE: $(BLISSUP_MT_OBJS) \
$(BLISLPAB_MT_OBJS) \
$(EIGEN_MT_OBJS) \
$(OPENBLAS_MT_OBJS) \
$(VENDOR_MT_OBJS)
#
# --- Targets/rules ------------------------------------------------------------
#
all: st
blis: blissup-st blislpab-st
blissup: blissup-st
blislpab: blislpab-st
eigen: eigen-st
openblas: openblas-st
blasfeo: blasfeo-st
libxsmm: libxsmm-st
vendor: vendor-st
st: blissup-st blislpab-st \
eigen-st openblas-st blasfeo-st libxsmm-st vendor-st
blissup-st: $(BLISSUP_ST_BINS)
blislpab-st: $(BLISLPAB_ST_BINS)
eigen-st: $(EIGEN_ST_BINS)
openblas-st: $(OPENBLAS_ST_BINS)
blasfeo-st: $(BLASFEO_ST_BINS)
libxsmm-st: $(LIBXSMM_ST_BINS)
vendor-st: $(VENDOR_ST_BINS)
mt: blissup-mt blislpab-mt \
eigen-mt openblas-mt vendor-mt
blissup-mt: $(BLISSUP_MT_BINS)
blislpab-mt: $(BLISLPAB_MT_BINS)
eigen-mt: $(EIGEN_MT_BINS)
openblas-mt: $(OPENBLAS_MT_BINS)
vendor-mt: $(VENDOR_MT_BINS)
# --Object file rules --
# Define the implementations for which we will instantiate compilation rules.
BIMPLS_ST := blissup blislpab openblas blasfeo libxsmm vendor
BIMPLS_MT := blissup blislpab openblas vendor
EIMPLS := eigen
# 1 2 3 4 567 8
# test_dgemm_nn_rrr_mpn6kp_blissup_st.x
# Define the function that will be used to instantiate compilation rules
# for the various single-threaded implementations.
define make-st-rule
test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_st.o: test_gemm.c Makefile
$(CC) $(CFLAGS) $(ERRCHK) $(N_TRIALS) $(PDEF_ST) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_ST) -c $$< -o $$@
endef
# Instantiate the rule function make-st-rule() for each BLIS/BLAS/CBLAS
# implementation.
$(foreach dt,$(DTS), \
$(foreach tr,$(TRANS), \
$(foreach st,$(STORS), \
$(foreach sh,$(SHAPES), \
$(foreach sm,$(SMS), \
$(foreach sn,$(SNS), \
$(foreach sk,$(SKS), \
$(foreach impl,$(BIMPLS_ST), \
$(eval $(call make-st-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl)))))))))))
# Define the function that will be used to instantiate compilation rules
# for the various multithreaded implementations.
define make-mt-rule
test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_mt.o: test_gemm.c Makefile
$(CC) $(CFLAGS) $(ERRCHK) $(N_TRIALS) $(PDEF_MT) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_MT) -c $$< -o $$@
endef
# Instantiate the rule function make-mt-rule() for each BLIS/BLAS/CBLAS
# implementation.
$(foreach dt,$(DTS), \
$(foreach tr,$(TRANS), \
$(foreach st,$(STORS), \
$(foreach sh,$(SHAPES), \
$(foreach sm,$(SMS), \
$(foreach sn,$(SNS), \
$(foreach sk,$(SKS), \
$(foreach impl,$(BIMPLS_MT), \
$(eval $(call make-mt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl)))))))))))
# Define the function that will be used to instantiate compilation rules
# for the single-threaded Eigen implementation.
define make-eigst-rule
test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_st.o: test_gemm.c Makefile
$(CXX) $(CXXFLAGS_ST) $(ERRCHK) $(N_TRIALS) $(PDEF_ST) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_ST) -c $$< -o $$@
endef
# Instantiate the rule function make-st-rule() for each Eigen implementation.
$(foreach dt,$(DTS), \
$(foreach tr,$(TRANS), \
$(foreach st,$(STORS), \
$(foreach sh,$(SHAPES), \
$(foreach sm,$(SMS), \
$(foreach sn,$(SNS), \
$(foreach sk,$(SKS), \
$(foreach impl,$(EIMPLS), \
$(eval $(call make-eigst-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl)))))))))))
# Define the function that will be used to instantiate compilation rules
# for the multithreaded Eigen implementation.
define make-eigmt-rule
test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_mt.o: test_gemm.c Makefile
$(CXX) $(CXXFLAGS_MT) $(ERRCHK) $(N_TRIALS) $(PDEF_MT) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_MT) -c $$< -o $$@
endef
# Instantiate the rule function make-st-rule() for each Eigen implementation.
$(foreach dt,$(DTS), \
$(foreach tr,$(TRANS), \
$(foreach st,$(STORS), \
$(foreach sh,$(SHAPES), \
$(foreach sm,$(SMS), \
$(foreach sn,$(SNS), \
$(foreach sk,$(SKS), \
$(foreach impl,$(EIMPLS), \
$(eval $(call make-eigmt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl)))))))))))
# -- Executable file rules --
# NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS
# on the link command line in case BLIS was configured with the BLAS
# compatibility layer. This prevents BLIS from inadvertently getting called
# for the BLAS routines we are trying to test with.
test_%_blissup_st.x: test_%_blissup_st.o $(LIBBLIS_LINK)
$(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
test_%_blislpab_st.x: test_%_blislpab_st.o $(LIBBLIS_LINK)
$(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
test_%_eigen_st.x: test_%_eigen_st.o $(LIBBLIS_LINK)
$(CXX) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
test_%_openblas_st.x: test_%_openblas_st.o $(LIBBLIS_LINK)
$(CC) $(strip $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
test_%_blasfeo_st.x: test_%_blasfeo_st.o $(LIBBLIS_LINK)
$(CC) $(strip $< $(BLASFEO_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
test_%_libxsmm_st.x: test_%_libxsmm_st.o $(LIBBLIS_LINK)
$(CC) $(strip $< $(LIBXSMM_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
test_%_vendor_st.x: test_%_vendor_st.o $(LIBBLIS_LINK)
$(CC) $(strip $< $(VENDOR_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
test_%_blissup_mt.x: test_%_blissup_mt.o $(LIBBLIS_LINK)
$(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
test_%_blislpab_mt.x: test_%_blislpab_mt.o $(LIBBLIS_LINK)
$(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
test_%_eigen_mt.x: test_%_eigen_mt.o $(LIBBLIS_LINK)
$(CXX) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
test_%_openblas_mt.x: test_%_openblas_mt.o $(LIBBLIS_LINK)
$(CC) $(strip $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
test_%_vendor_mt.x: test_%_vendor_mt.o $(LIBBLIS_LINK)
$(CC) $(strip $< $(VENDORP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
# -- Clean rules --
clean: cleanx
cleanx:
- $(RM_F) *.x *.o

View File

@@ -1,12 +0,0 @@
% haswell
plot_panel_trxsh(3.25,16,1,'mt','d','ccc',[ 6 8 10 ],'../results/haswell/20190823/4_800_4_mt201','has','MKL','matlab'); close; clear all;
plot_panel_trxsh(3.25,16,1,'mt','d','rrr',[ 6 8 10 ],'../results/haswell/20190823/4_800_4_mt201','has','MKL','matlab'); close; clear all;
% kabylake
plot_panel_trxsh(3.80,16,1,'mt','d','rrr',[ 6 8 10 ],'..','kbl','MKL','matlab'); close; clear all;
plot_panel_trxsh(3.80,16,1,'mt','d','ccc',[ 6 8 10 ],'..','kbl','MKL','matlab'); close; clear all;
% epyc
plot_panel_trxsh(3.00, 8,1,'mt','d','rrr',[ 6 8 10 ],'../results/epyc/20190826/4_800_4_mt256','epyc','MKL','matlab'); close; clear all;
plot_panel_trxsh(3.00, 8,1,'mt','d','ccc',[ 6 8 10 ],'../results/epyc/20190826/4_800_4_mt256','epyc','MKL','matlab'); close; clear all;

View File

@@ -1,188 +0,0 @@
#!/bin/bash
# File pefixes.
exec_root="test"
out_root="output"
sys="blis"
#sys="lonestar5"
#sys="ul252"
#sys="ul264"
if [ ${sys} = "blis" ]; then
export GOMP_CPU_AFFINITY="0-3"
nt=4
elif [ ${sys} = "lonestar5" ]; then
export GOMP_CPU_AFFINITY="0-23"
nt=24
elif [ ${sys} = "ul252" ]; then
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
export GOMP_CPU_AFFINITY="0-51"
nt=52
elif [ ${sys} = "ul264" ]; then
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
export GOMP_CPU_AFFINITY="0-63"
nt=64
fi
# Delay between test cases.
delay=0.02
# Threadedness to test.
threads="mt"
# Datatypes to test.
#dts="d s"
dts="d"
# Operations to test.
ops="gemm"
# Transpose combintions to test.
trans="nn nt tn tt"
# Storage combinations to test.
#stors="rrr rrc rcr rcc crr crc ccr ccc"
stors="rrr ccc"
# Problem shapes to test.
shapes="sll lsl lls lss sls ssl lll"
# FGVZ: figure out how to probe what's in the directory and
# execute everything that's there?
sms="6"
sns="8"
sks="10"
# Implementations to test.
impls="vendor blissup blislpab openblas eigen"
#impls="vendor"
#impls="blissup"
#impls="blislpab"
#impls="openblas"
#impls="eigen"
# Save a copy of GOMP_CPU_AFFINITY so that if we have to unset it, we can
# restore the value.
GOMP_CPU_AFFINITYsave=${GOMP_CPU_AFFINITY}
# Example: test_dgemm_nn_rrc_m6npkp_blissup_st.x
for th in ${threads}; do
for dt in ${dts}; do
for op in ${ops}; do
for tr in ${trans}; do
for st in ${stors}; do
for sh in ${shapes}; do
for sm in ${sms}; do
for sn in ${sns}; do
for sk in ${sks}; do
for im in ${impls}; do
if [ "${im:0:4}" = "blis" ]; then
unset OMP_NUM_THREADS
export BLIS_NUM_THREADS=${nt}
elif [ "${im}" = "openblas" ]; then
unset OMP_NUM_THREADS
export OPENBLAS_NUM_THREADS=${nt}
elif [ "${im}" = "eigen" ]; then
export OMP_NUM_THREADS=${nt}
elif [ "${im}" = "vendor" ]; then
unset OMP_NUM_THREADS
export MKL_NUM_THREADS=${nt}
fi
# Multithreaded OpenBLAS seems to have a problem
# running properly if GOMP_CPU_AFFINITY is set.
# So we temporarily unset it here if we are about
# to execute OpenBLAS, but otherwise restore it.
if [ ${im} = "openblas" ]; then
unset GOMP_CPU_AFFINITY
else
export GOMP_CPU_AFFINITY="${GOMP_CPU_AFFINITYsave}"
fi
# Limit execution of non-BLIS implementations to
# rrr/ccc storage cases.
if [ "${im:0:4}" != "blis" ] && \
[ "${st}" != "rrr" ] && \
[ "${st}" != "ccc" ]; then
continue;
fi
# Further limit execution of libxsmm to
# ccc storage cases.
if [ "${im:0:7}" = "libxsmm" ] && \
[ "${st}" != "ccc" ]; then
continue;
fi
# Extract the shape chars for m, n, k.
chm=${sh:0:1}
chn=${sh:1:1}
chk=${sh:2:1}
# Construct the shape substring (e.g. m6npkp)
shstr=""
if [ ${chm} = "s" ]; then
shstr="${shstr}m${sm}"
else
shstr="${shstr}mp"
fi
if [ ${chn} = "s" ]; then
shstr="${shstr}n${sn}"
else
shstr="${shstr}np"
fi
if [ ${chk} = "s" ]; then
shstr="${shstr}k${sk}"
else
shstr="${shstr}kp"
fi
# Ex: test_dgemm_nn_rrc_m6npkp_blissup_st.x
# Construct the name of the test executable.
exec_name="${exec_root}_${dt}${op}_${tr}_${st}_${shstr}_${im}_${th}.x"
# Construct the name of the output file.
out_file="${out_root}_${th}_${dt}${op}_${tr}_${st}_${shstr}_${im}.m"
echo "Running (nt = ${nt}) ./${exec_name} > ${out_file}"
# Run executable.
./${exec_name} > ${out_file}
sleep ${delay}
done
done
done
done
done
done
done
done
done
done

View File

@@ -1,589 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#ifdef EIGEN
#define BLIS_DISABLE_BLAS_DEFS
#include "blis.h"
#include <Eigen/Core>
//#include <Eigen/src/misc/blas.h>
using namespace Eigen;
#else
#include "blis.h"
#endif
//#define PRINT
int main( int argc, char** argv )
{
rntm_t rntm_g;
bli_init();
// Copy the global rntm_t object in case we need it later when disabling
// sup.
bli_rntm_init_from_global( &rntm_g );
#ifndef ERROR_CHECK
bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
#endif
dim_t n_trials = N_TRIALS;
num_t dt = DT;
#if 1
dim_t p_begin = P_BEGIN;
dim_t p_max = P_MAX;
dim_t p_inc = P_INC;
#else
dim_t p_begin = 4;
dim_t p_max = 40;
dim_t p_inc = 4;
#endif
#if 1
dim_t m_input = M_DIM;
dim_t n_input = N_DIM;
dim_t k_input = K_DIM;
#else
p_begin = p_inc = 32;
dim_t m_input = 6;
dim_t n_input = -1;
dim_t k_input = -1;
#endif
#if 1
trans_t transa = TRANSA;
trans_t transb = TRANSB;
#else
trans_t transa = BLIS_NO_TRANSPOSE;
trans_t transb = BLIS_NO_TRANSPOSE;
#endif
#if 1
stor3_t sc = STOR3;
#else
stor3_t sc = BLIS_RRR;
#endif
inc_t rs_c, cs_c;
inc_t rs_a, cs_a;
inc_t rs_b, cs_b;
if ( sc == BLIS_RRR ) { rs_c = cs_c = -1; rs_a = cs_a = -1; rs_b = cs_b = -1; }
else if ( sc == BLIS_RRC ) { rs_c = cs_c = -1; rs_a = cs_a = -1; rs_b = cs_b = 0; }
else if ( sc == BLIS_RCR ) { rs_c = cs_c = -1; rs_a = cs_a = 0; rs_b = cs_b = -1; }
else if ( sc == BLIS_RCC ) { rs_c = cs_c = -1; rs_a = cs_a = 0; rs_b = cs_b = 0; }
else if ( sc == BLIS_CRR ) { rs_c = cs_c = 0; rs_a = cs_a = -1; rs_b = cs_b = -1; }
else if ( sc == BLIS_CRC ) { rs_c = cs_c = 0; rs_a = cs_a = -1; rs_b = cs_b = 0; }
else if ( sc == BLIS_CCR ) { rs_c = cs_c = 0; rs_a = cs_a = 0; rs_b = cs_b = -1; }
else if ( sc == BLIS_CCC ) { rs_c = cs_c = 0; rs_a = cs_a = 0; rs_b = cs_b = 0; }
else { bli_abort(); }
f77_int cbla_storage;
if ( sc == BLIS_RRR ) cbla_storage = CblasRowMajor;
else if ( sc == BLIS_CCC ) cbla_storage = CblasColMajor;
else cbla_storage = -1;
( void )cbla_storage;
char dt_ch;
// Choose the char corresponding to the requested datatype.
if ( bli_is_float( dt ) ) dt_ch = 's';
else if ( bli_is_double( dt ) ) dt_ch = 'd';
else if ( bli_is_scomplex( dt ) ) dt_ch = 'c';
else dt_ch = 'z';
f77_char f77_transa;
f77_char f77_transb;
char transal, transbl;
bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
bli_param_map_blis_to_netlib_trans( transb, &f77_transb );
transal = tolower( f77_transa );
transbl = tolower( f77_transb );
f77_int cbla_transa = ( transal == 'n' ? CblasNoTrans : CblasTrans );
f77_int cbla_transb = ( transbl == 'n' ? CblasNoTrans : CblasTrans );
( void )cbla_transa;
( void )cbla_transb;
dim_t p;
// Begin with initializing the last entry to zero so that
// matlab allocates space for the entire array once up-front.
for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ;
printf( "data_%s_%cgemm_%c%c_%s", THR_STR, dt_ch,
transal, transbl, STR );
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin)/p_inc + 1,
( unsigned long )0,
( unsigned long )0,
( unsigned long )0, 0.0 );
//for ( p = p_begin; p <= p_max; p += p_inc )
for ( p = p_max; p_begin <= p; p -= p_inc )
{
obj_t a, b, c;
obj_t c_save;
obj_t alpha, beta;
dim_t m, n, k;
if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( n_input < 0 ) n = p / ( dim_t )abs(n_input);
else n = ( dim_t ) n_input;
if ( k_input < 0 ) k = p / ( dim_t )abs(k_input);
else k = ( dim_t ) k_input;
bli_obj_create( dt, 1, 1, 0, 0, &alpha );
bli_obj_create( dt, 1, 1, 0, 0, &beta );
bli_obj_create( dt, m, n, rs_c, cs_c, &c );
bli_obj_create( dt, m, n, rs_c, cs_c, &c_save );
if ( bli_does_notrans( transa ) )
bli_obj_create( dt, m, k, rs_a, cs_a, &a );
else
bli_obj_create( dt, k, m, rs_a, cs_a, &a );
if ( bli_does_notrans( transb ) )
bli_obj_create( dt, k, n, rs_b, cs_b, &b );
else
bli_obj_create( dt, n, k, rs_b, cs_b, &b );
bli_randm( &a );
bli_randm( &b );
bli_randm( &c );
bli_obj_set_conjtrans( transa, &a );
bli_obj_set_conjtrans( transb, &b );
bli_setsc( (1.0/1.0), 0.0, &alpha );
bli_setsc( (1.0/1.0), 0.0, &beta );
bli_copym( &c, &c_save );
#ifdef EIGEN
double alpha_r, alpha_i;
bli_getsc( &alpha, &alpha_r, &alpha_i );
void* ap = bli_obj_buffer_at_off( &a );
void* bp = bli_obj_buffer_at_off( &b );
void* cp = bli_obj_buffer_at_off( &c );
const int os_a = ( bli_obj_is_col_stored( &a ) ? bli_obj_col_stride( &a )
: bli_obj_row_stride( &a ) );
const int os_b = ( bli_obj_is_col_stored( &b ) ? bli_obj_col_stride( &b )
: bli_obj_row_stride( &b ) );
const int os_c = ( bli_obj_is_col_stored( &c ) ? bli_obj_col_stride( &c )
: bli_obj_row_stride( &c ) );
Stride<Dynamic,1> stride_a( os_a, 1 );
Stride<Dynamic,1> stride_b( os_b, 1 );
Stride<Dynamic,1> stride_c( os_c, 1 );
#if defined(IS_FLOAT)
#elif defined (IS_DOUBLE)
#ifdef A_STOR_R
typedef Matrix<double, Dynamic, Dynamic, RowMajor> MatrixXd_A;
#else
typedef Matrix<double, Dynamic, Dynamic, ColMajor> MatrixXd_A;
#endif
#ifdef B_STOR_R
typedef Matrix<double, Dynamic, Dynamic, RowMajor> MatrixXd_B;
#else
typedef Matrix<double, Dynamic, Dynamic, ColMajor> MatrixXd_B;
#endif
#ifdef C_STOR_R
typedef Matrix<double, Dynamic, Dynamic, RowMajor> MatrixXd_C;
#else
typedef Matrix<double, Dynamic, Dynamic, ColMajor> MatrixXd_C;
#endif
#ifdef A_NOTRANS // A is not transposed
Map<MatrixXd_A, 0, Stride<Dynamic,1> > A( ( double* )ap, m, k, stride_a );
#else // A is transposed
Map<MatrixXd_A, 0, Stride<Dynamic,1> > A( ( double* )ap, k, m, stride_a );
#endif
#ifdef B_NOTRANS // B is not transposed
Map<MatrixXd_B, 0, Stride<Dynamic,1> > B( ( double* )bp, k, n, stride_b );
#else // B is transposed
Map<MatrixXd_B, 0, Stride<Dynamic,1> > B( ( double* )bp, n, k, stride_b );
#endif
Map<MatrixXd_C, 0, Stride<Dynamic,1> > C( ( double* )cp, m, n, stride_c );
#endif
#endif
double dtime_save = DBL_MAX;
for ( dim_t r = 0; r < n_trials; ++r )
{
bli_copym( &c_save, &c );
double dtime = bli_clock();
#ifdef EIGEN
#ifdef A_NOTRANS
#ifdef B_NOTRANS
C.noalias() += alpha_r * A * B;
#else // B_TRANS
C.noalias() += alpha_r * A * B.transpose();
#endif
#else // A_TRANS
#ifdef B_NOTRANS
C.noalias() += alpha_r * A.transpose() * B;
#else // B_TRANS
C.noalias() += alpha_r * A.transpose() * B.transpose();
#endif
#endif
#endif
#ifdef BLIS
#ifdef SUP
// Allow sup.
bli_gemm( &alpha,
&a,
&b,
&beta,
&c );
#else
// Disable sup and use the expert interface.
//rntm_t rntm = BLIS_RNTM_INITIALIZER;
rntm_t rntm = rntm_g;
bli_rntm_disable_l3_sup( &rntm );
bli_gemm_ex( &alpha,
&a,
&b,
&beta,
&c, NULL, &rntm );
#endif
#endif
#ifdef BLAS
if ( bli_is_float( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
float* alphap = ( float* )bli_obj_buffer( &alpha );
float* ap = ( float* )bli_obj_buffer( &a );
float* bp = ( float* )bli_obj_buffer( &b );
float* betap = ( float* )bli_obj_buffer( &beta );
float* cp = ( float* )bli_obj_buffer( &c );
#ifdef XSMM
libxsmm_sgemm( &f77_transa,
#else
sgemm_( &f77_transa,
#endif
&f77_transb,
&mm,
&nn,
&kk,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
else if ( bli_is_double( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
double* alphap = ( double* )bli_obj_buffer( &alpha );
double* ap = ( double* )bli_obj_buffer( &a );
double* bp = ( double* )bli_obj_buffer( &b );
double* betap = ( double* )bli_obj_buffer( &beta );
double* cp = ( double* )bli_obj_buffer( &c );
#ifdef XSMM
libxsmm_dgemm( &f77_transa,
#else
dgemm_( &f77_transa,
#endif
&f77_transb,
&mm,
&nn,
&kk,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
else if ( bli_is_scomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
scomplex* alphap = ( scomplex* )bli_obj_buffer( &alpha );
scomplex* ap = ( scomplex* )bli_obj_buffer( &a );
scomplex* bp = ( scomplex* )bli_obj_buffer( &b );
scomplex* betap = ( scomplex* )bli_obj_buffer( &beta );
scomplex* cp = ( scomplex* )bli_obj_buffer( &c );
#ifdef XSMM
libxsmm_cgemm( &f77_transa,
#else
cgemm_( &f77_transa,
#endif
&f77_transb,
&mm,
&nn,
&kk,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
else if ( bli_is_dcomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
dcomplex* alphap = ( dcomplex* )bli_obj_buffer( &alpha );
dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a );
dcomplex* bp = ( dcomplex* )bli_obj_buffer( &b );
dcomplex* betap = ( dcomplex* )bli_obj_buffer( &beta );
dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c );
#ifdef XSMM
libxsmm_zgemm( &f77_transa,
#else
zgemm_( &f77_transa,
#endif
&f77_transb,
&mm,
&nn,
&kk,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
#endif
#ifdef CBLAS
if ( bli_is_float( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int nn = bli_obj_width( &c );
#ifdef C_STOR_R
f77_int lda = bli_obj_row_stride( &a );
f77_int ldb = bli_obj_row_stride( &b );
f77_int ldc = bli_obj_row_stride( &c );
#else
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
#endif
float* alphap = bli_obj_buffer( &alpha );
float* ap = bli_obj_buffer( &a );
float* bp = bli_obj_buffer( &b );
float* betap = bli_obj_buffer( &beta );
float* cp = bli_obj_buffer( &c );
cblas_sgemm( cbla_storage,
cbla_transa,
cbla_transb,
mm,
nn,
kk,
*alphap,
ap, lda,
bp, ldb,
*betap,
cp, ldc );
}
else if ( bli_is_double( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int nn = bli_obj_width( &c );
#ifdef C_STOR_R
f77_int lda = bli_obj_row_stride( &a );
f77_int ldb = bli_obj_row_stride( &b );
f77_int ldc = bli_obj_row_stride( &c );
#else
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
#endif
double* alphap = bli_obj_buffer( &alpha );
double* ap = bli_obj_buffer( &a );
double* bp = bli_obj_buffer( &b );
double* betap = bli_obj_buffer( &beta );
double* cp = bli_obj_buffer( &c );
cblas_dgemm( cbla_storage,
cbla_transa,
cbla_transb,
mm,
nn,
kk,
*alphap,
ap, lda,
bp, ldb,
*betap,
cp, ldc );
}
else if ( bli_is_scomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int nn = bli_obj_width( &c );
#ifdef C_STOR_R
f77_int lda = bli_obj_row_stride( &a );
f77_int ldb = bli_obj_row_stride( &b );
f77_int ldc = bli_obj_row_stride( &c );
#else
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
#endif
scomplex* alphap = bli_obj_buffer( &alpha );
scomplex* ap = bli_obj_buffer( &a );
scomplex* bp = bli_obj_buffer( &b );
scomplex* betap = bli_obj_buffer( &beta );
scomplex* cp = bli_obj_buffer( &c );
cblas_cgemm( cbla_storage,
cbla_transa,
cbla_transb,
mm,
nn,
kk,
alphap,
ap, lda,
bp, ldb,
betap,
cp, ldc );
}
else if ( bli_is_dcomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int nn = bli_obj_width( &c );
#ifdef C_STOR_R
f77_int lda = bli_obj_row_stride( &a );
f77_int ldb = bli_obj_row_stride( &b );
f77_int ldc = bli_obj_row_stride( &c );
#else
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
#endif
dcomplex* alphap = bli_obj_buffer( &alpha );
dcomplex* ap = bli_obj_buffer( &a );
dcomplex* bp = bli_obj_buffer( &b );
dcomplex* betap = bli_obj_buffer( &beta );
dcomplex* cp = bli_obj_buffer( &c );
cblas_zgemm( cbla_storage,
cbla_transa,
cbla_transb,
mm,
nn,
kk,
alphap,
ap, lda,
bp, ldb,
betap,
cp, ldc );
}
#endif
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
double gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 );
if ( bli_is_complex( dt ) ) gflops *= 4.0;
printf( "data_%s_%cgemm_%c%c_%s", THR_STR, dt_ch,
transal, transbl, STR );
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin)/p_inc + 1,
( unsigned long )m,
( unsigned long )n,
( unsigned long )k, gflops );
bli_obj_free( &alpha );
bli_obj_free( &beta );
bli_obj_free( &a );
bli_obj_free( &b );
bli_obj_free( &c );
bli_obj_free( &c_save );
}
//bli_finalize();
return 0;
}