mirror of
https://github.com/amd/blis.git
synced 2026-05-24 10:24:34 +00:00
Merged test/sup, test/supmt into test/sup.
Details: - Updated the Makefile, test_gemm.c, and runme.sh in test/sup to be able to compile and run both single-threaded and multithreaded experiments. This should help with maintenance going forward. - Created a test/sup/octave_st directory of scripts (based on the previous test/sup/octave scripts) as well as a test/sup/octave_mt directory (based on the previous test/supmt/octave scripts). The octave scripts are slightly different and not easily mergeable, and thus for now I'll maintain them separately. - Preserved the previous test/sup directory as test/sup/old/supst and the previous test/supmt directory as test/sup/old/supmt. Change-Id: Ia230fc65185fd9a34eec714721004aa9e0bd40ed
This commit is contained in:
committed by
dzambare
parent
01e1a41c95
commit
26cd966af7
@@ -505,6 +505,7 @@ eigen-mt: check-env $(EIGEN_MT_BINS)
|
||||
openblas-mt: check-env $(OPENBLAS_MT_BINS)
|
||||
vendor-mt: check-env $(VENDOR_MT_BINS)
|
||||
|
||||
# -- Multithreaded --
|
||||
|
||||
# --- Object file rules --------------------------------------------------------
|
||||
|
||||
@@ -540,6 +541,29 @@ $(eval $(call make-st-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(ld),$(imp
|
||||
|
||||
# -- Multithreaded BLAS --
|
||||
|
||||
# -- Multithreaded BLAS --
|
||||
|
||||
# Define the function that will be used to instantiate compilation rules
|
||||
# for the various multithreaded implementations.
|
||||
define make-mt-rule
|
||||
test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_mt.o: test_gemm.c Makefile
|
||||
$(CC) $(CFLAGS) $(ERRCHK) $(N_TRIALS) $(call get-pdefs,$(4)) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_MT) -c $$< -o $$@
|
||||
endef
|
||||
|
||||
# Instantiate the rule function make-mt-rule() for each BLIS/BLAS/CBLAS
|
||||
# implementation.
|
||||
$(foreach dt,$(DTS), \
|
||||
$(foreach tr,$(TRANS), \
|
||||
$(foreach st,$(STORS), \
|
||||
$(foreach sh,$(SHAPES), \
|
||||
$(foreach sm,$(SMS_MT), \
|
||||
$(foreach sn,$(SNS_MT), \
|
||||
$(foreach sk,$(SKS_MT), \
|
||||
$(foreach impl,$(BIMPLS_MT), \
|
||||
$(eval $(call make-mt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl)))))))))))
|
||||
|
||||
# -- Single-threaded Eigen --
|
||||
|
||||
# Define the function that will be used to instantiate compilation rules
|
||||
# for the various multithreaded implementations.
|
||||
define make-mt-rule
|
||||
@@ -602,6 +626,26 @@ $(foreach ld,$(LDIMS), \
|
||||
$(foreach impl,$(EIMPLS), \
|
||||
$(eval $(call make-eigmt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(ld),$(impl))))))))))))
|
||||
|
||||
# -- Multithreaded Eigen --
|
||||
|
||||
# Define the function that will be used to instantiate compilation rules
|
||||
# for the multithreaded Eigen implementation.
|
||||
define make-eigmt-rule
|
||||
test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_mt.o: test_gemm.c Makefile
|
||||
$(CXX) $(CXXFLAGS_MT) $(ERRCHK) $(N_TRIALS) $(call get-pdefs,$(4)) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_MT) -c $$< -o $$@
|
||||
endef
|
||||
|
||||
# Instantiate the rule function make-st-rule() for each Eigen implementation.
|
||||
$(foreach dt,$(DTS), \
|
||||
$(foreach tr,$(TRANS), \
|
||||
$(foreach st,$(STORS), \
|
||||
$(foreach sh,$(SHAPES), \
|
||||
$(foreach sm,$(SMS_MT), \
|
||||
$(foreach sn,$(SNS_MT), \
|
||||
$(foreach sk,$(SKS_MT), \
|
||||
$(foreach impl,$(EIMPLS), \
|
||||
$(eval $(call make-eigmt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl)))))))))))
|
||||
|
||||
|
||||
# --- Executable file rules ----------------------------------------------------
|
||||
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
|
||||
% kabylake
|
||||
plot_panel_trxsh(3.8,16,1,'st','d','rrr',[ 6 8 4 ],'../results/kabylake/20190619/4_800_4_mt201','kbl','MKL','matlab'); close; clear all;
|
||||
plot_panel_trxsh(3.8,16,1,'st','d','ccc',[ 6 8 4 ],'../results/kabylake/20190619/4_800_4_mt201','kbl','MKL','matlab'); close; clear all;
|
||||
|
||||
% epyc
|
||||
plot_panel_trxsh(3.0,8,1,'st','d','rrr',[ 6 8 4 ],'../results/epyc/20190619/4_800_4_mt256','epyc','MKL','matlab'); close; clear all;
|
||||
plot_panel_trxsh(3.0,8,1,'st','d','ccc',[ 6 8 4 ],'../results/epyc/20190619/4_800_4_mt256','epyc','MKL','matlab'); close; clear all;
|
||||
@@ -9,6 +9,7 @@ function r_val = plot_l3sup_perf( opname, ...
|
||||
cfreq, ...
|
||||
dfps, ...
|
||||
theid, impl )
|
||||
|
||||
%if ... %mod(theid-1,cols) == 2 || ...
|
||||
% ... %mod(theid-1,cols) == 3 || ...
|
||||
% ... %mod(theid-1,cols) == 4 || ...
|
||||
@@ -19,11 +20,11 @@ function r_val = plot_l3sup_perf( opname, ...
|
||||
%end
|
||||
|
||||
%legend_plot_id = 11;
|
||||
legend_plot_id = 1*cols + 1*5;
|
||||
legend_plot_id = 0*cols + 1*6;
|
||||
|
||||
if 1
|
||||
ax1 = subplot( rows, cols, theid );
|
||||
hold( ax1, 'on' );
|
||||
ax1 = subplot( rows, cols, theid );
|
||||
hold( ax1, 'on' );
|
||||
end
|
||||
|
||||
% Set line properties.
|
||||
@@ -77,9 +78,9 @@ end
|
||||
flopscol = size( data_blissup, 2 );
|
||||
msize = 5;
|
||||
if 1
|
||||
fontsize = 11;
|
||||
fontsize = 12;
|
||||
else
|
||||
fontsize = 16;
|
||||
fontsize = 16;
|
||||
end
|
||||
linesize = 0.5;
|
||||
legend_loc = 'southeast';
|
||||
@@ -95,13 +96,14 @@ for psize_col = 1:3
|
||||
end
|
||||
x_axis( :, 1 ) = data_blissup( :, psize_col );
|
||||
|
||||
% Compute the number of data points we have in the x-axis. Note that
|
||||
% we only use quarter the data points for the m = n = k column of graphs.
|
||||
if mod(theid-1,cols) == 6
|
||||
np = size( data_blissup, 1 ) / 4;
|
||||
else
|
||||
np = size( data_blissup, 1 );
|
||||
end
|
||||
% Compute the number of data points we have in the x-axis. Note that we
|
||||
% only use half the data points for the m = n = k column of graphs.
|
||||
%if mod(theid-1,cols) == 6
|
||||
% np = size( data_blissup, 1 ) / 2;
|
||||
%else
|
||||
% np = size( data_blissup, 1 );
|
||||
%end
|
||||
np = size( data_blissup, 1 );
|
||||
|
||||
% Grab the last x-axis value.
|
||||
x_end = data_blissup( np, psize_col );
|
||||
@@ -125,8 +127,7 @@ open_ln = line( x_axis( 1:np, 1 ), data_open( 1:np, flopscol ) / nth, ...
|
||||
vend_ln = line( x_axis( 1:np, 1 ), data_vend( 1:np, flopscol ) / nth, ...
|
||||
'Color',color_vend, 'LineStyle',lines_vend, ...
|
||||
'LineWidth',linesize );
|
||||
else
|
||||
if theid == legend_plot_id
|
||||
elseif theid == legend_plot_id
|
||||
blissup_ln = line( nan, nan, ...
|
||||
'Color',color_blissup, 'LineStyle',lines_blissup, ...
|
||||
'LineWidth',linesize );
|
||||
@@ -143,15 +144,28 @@ vend_ln = line( nan, nan, ...
|
||||
'Color',color_vend, 'LineStyle',lines_vend, ...
|
||||
'LineWidth',linesize );
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
xlim( ax1, [x_begin x_end] );
|
||||
ylim( ax1, [y_begin y_end] );
|
||||
|
||||
if 6000 <= x_end && x_end < 10000
|
||||
if mod(theid-1,cols) == 3 || mod(theid-1,cols) == 4 || mod(theid-1,cols) == 5
|
||||
if nth == 12
|
||||
ylim( ax1, [y_begin y_end/2] );
|
||||
elseif nth > 12
|
||||
ylim( ax1, [y_begin y_end/6] );
|
||||
end
|
||||
end
|
||||
|
||||
if 10000 <= x_end && x_end < 15000
|
||||
x_tick2 = x_end - 2000;
|
||||
x_tick1 = x_tick2/2;
|
||||
%xticks( ax1, [ x_tick1 x_tick2 ] );
|
||||
xticks( ax1, [ 4000 8000 12000 ] );
|
||||
elseif 6000 <= x_end && x_end < 10000
|
||||
x_tick2 = x_end - 2000;
|
||||
x_tick1 = x_tick2/2;
|
||||
%xticks( ax1, [ x_tick1 x_tick2 ] );
|
||||
xticks( ax1, [ x_tick1 x_tick2 ] );
|
||||
elseif 4000 <= x_end && x_end < 6000
|
||||
x_tick2 = x_end - 1000;
|
||||
@@ -188,11 +202,12 @@ if show_plot == 1 || theid == legend_plot_id
|
||||
set( leg,'Color','none' );
|
||||
set( leg,'Units','inches' );
|
||||
if impl == 'octave'
|
||||
set( leg,'FontSize',fontsize );
|
||||
set( leg,'Position',[12.50 10.35 1.5 0.9 ] ); % (1,4tl)
|
||||
set( leg,'FontSize',fontsize );
|
||||
%set( leg,'Position',[12.40 10.60 1.9 0.95 ] ); % (1,4tl)
|
||||
set( leg,'Position',[18.80 10.60 1.9 0.95 ] ); % (1,4tl)
|
||||
else
|
||||
set( leg,'FontSize',fontsize-1 );
|
||||
set( leg,'Position',[18.24 10.15 1.15 0.7 ] ); % (1,4tl)
|
||||
set( leg,'FontSize',fontsize-1 );
|
||||
set( leg,'Position',[18.24 10.15 1.15 0.7 ] ); % (1,4tl)
|
||||
end
|
||||
set( leg,'Box','off' );
|
||||
set( leg,'Color','none' );
|
||||
@@ -209,17 +224,31 @@ box( ax1, 'on' );
|
||||
titl = title( titlename );
|
||||
set( titl, 'FontWeight', 'normal' ); % default font style is now 'bold'.
|
||||
|
||||
% The default is to align the plot title across whole figure, not the box.
|
||||
% This is a hack to nudge the title back to the center of the box.
|
||||
if impl == 'octave'
|
||||
tpos = get( titl, 'Position' ); % default is to align across whole figure, not box.
|
||||
tpos(1) = tpos(1) + -40;
|
||||
set( titl, 'Position', tpos ); % here we nudge it back to centered with box.
|
||||
tpos = get( titl, 'Position' );
|
||||
% For some reason, the titles in the graphs in the last column start
|
||||
% off in a different relative position than the graphs in the other
|
||||
% columns. Here, we manually account for that.
|
||||
if mod(theid-1,cols) == 6
|
||||
tpos(1) = tpos(1) + -10;
|
||||
else
|
||||
tpos(1) = tpos(1) + -40;
|
||||
end
|
||||
set( titl, 'Position', tpos );
|
||||
set( titl, 'FontSize', fontsize );
|
||||
else % impl == 'matlab'
|
||||
tpos = get( titl, 'Position' );
|
||||
tpos(1) = tpos(1) + 90;
|
||||
set( titl, 'Position', tpos );
|
||||
end
|
||||
|
||||
if theid > (rows-1)*cols
|
||||
%xlab = xlabel( ax1,xaxisname );
|
||||
%tpos = get( xlab, 'Position' )
|
||||
%tpos(2) = tpos(2) + 10;
|
||||
%set( xlab, 'Position', tpos );
|
||||
%xlab = xlabel( ax1,xaxisname );
|
||||
%tpos = get( xlab, 'Position' )
|
||||
%tpos(2) = tpos(2) + 10;
|
||||
%set( xlab, 'Position', tpos );
|
||||
if theid == rows*cols - 6
|
||||
xlab = xlabel( ax1, 'm = 6; n = k' );
|
||||
elseif theid == rows*cols - 5
|
||||
@@ -238,13 +267,8 @@ if theid > (rows-1)*cols
|
||||
end
|
||||
|
||||
if mod(theid-1,cols) == 0
|
||||
ylab = ylabel( ax1,yaxisname );
|
||||
ylab = ylabel( ax1,yaxisname );
|
||||
end
|
||||
|
||||
%export_fig( filename, colorflag, '-pdf', '-m2', '-painters', '-transparent' );
|
||||
%saveas( fig, filename_png );
|
||||
|
||||
%hold( ax1, 'off' );
|
||||
|
||||
r_val = 0;
|
||||
|
||||
8
test/sup/octave_mt/runthese.m
Normal file
8
test/sup/octave_mt/runthese.m
Normal file
@@ -0,0 +1,8 @@
|
||||
% kabylake
|
||||
plot_panel_trxsh(3.80,16,4,'mt','d','rrr',[ 6 8 10 ],'../results/kabylake/20200302/mnkt100000_mt4','kbl','MKL','octave'); close; clear all;
|
||||
|
||||
% haswell
|
||||
plot_panel_trxsh(3.1,16,12,'mt','d','rrr',[ 6 8 10 ],'../results/haswell/20200302/mnkt100000_mt12','has','MKL','octave'); close; clear all;
|
||||
|
||||
% epyc
|
||||
plot_panel_trxsh(2.55,8,32,'mt','d','rrr',[ 6 8 10 ],'../results/epyc/20200302/mnkt100000_mt32','epyc','MKL','octave'); close; clear all;
|
||||
@@ -1,50 +1,29 @@
|
||||
function r_val = plot_l3sup_perf( opname, ...
|
||||
smalldims, ...
|
||||
data_blissup, ...
|
||||
data_blisconv, ...
|
||||
data_blislpab, ...
|
||||
data_eigen, ...
|
||||
data_open, ...
|
||||
data_bfeo, ...
|
||||
data_vend, vend_str, ...
|
||||
data_bfeo, ...
|
||||
data_xsmm, ...
|
||||
data_vend, vend_str, ...
|
||||
nth, ...
|
||||
rows, cols, ...
|
||||
cfreq, ...
|
||||
dfps, ...
|
||||
theid, impl )
|
||||
|
||||
% Define the column in which the performance rates are found.
|
||||
flopscol = size( data_blissup, 2 );
|
||||
|
||||
% Check if blasfeo data is available.
|
||||
has_bfeo = 1;
|
||||
if data_bfeo( 1, flopscol ) == 0.0
|
||||
has_bfeo = 0;
|
||||
end
|
||||
|
||||
% Check if libxsmm data is available.
|
||||
has_xsmm = 1;
|
||||
if data_xsmm( 1, flopscol ) == 0.0
|
||||
has_xsmm = 0;
|
||||
end
|
||||
|
||||
% Define which plot id will have the legend.
|
||||
% NOTE: We can draw the legend on any graph as long as it has already been
|
||||
% rendered. Since the coordinates are global, we can simply always wait until
|
||||
% the final graph to draw the legend.
|
||||
%if nth == 1
|
||||
% if has_xsmm == 1
|
||||
% legend_plot_id = 2*cols + 1*5;
|
||||
% else
|
||||
% legend_plot_id = 1*cols + 1*5;
|
||||
% end
|
||||
%if ... %mod(theid-1,cols) == 2 || ...
|
||||
% ... %mod(theid-1,cols) == 3 || ...
|
||||
% ... %mod(theid-1,cols) == 4 || ...
|
||||
% 0 == 1 ... %theid >= 19
|
||||
% show_plot = 0;
|
||||
%else
|
||||
% legend_plot_id = 0*cols + 1*6;
|
||||
show_plot = 1;
|
||||
%end
|
||||
legend_plot_id = cols*rows;
|
||||
|
||||
% Hold the axes.
|
||||
%legend_plot_id = 11;
|
||||
legend_plot_id = 2*cols + 1*5;
|
||||
|
||||
if 1
|
||||
ax1 = subplot( rows, cols, theid );
|
||||
hold( ax1, 'on' );
|
||||
@@ -52,13 +31,12 @@ end
|
||||
|
||||
% Set line properties.
|
||||
color_blissup = 'k'; lines_blissup = '-'; markr_blissup = '';
|
||||
color_blisconv = 'k'; lines_blisconv = ':'; markr_blisconv = '';
|
||||
color_blislpab = 'k'; lines_blislpab = ':'; markr_blislpab = '';
|
||||
color_eigen = 'm'; lines_eigen = '-.'; markr_eigen = 'o';
|
||||
color_open = 'r'; lines_open = '--'; markr_open = 'o';
|
||||
color_bfeo = 'c'; lines_bfeo = '-'; markr_bfeo = 'o';
|
||||
color_vend = 'b'; lines_vend = '-.'; markr_vend = '.';
|
||||
color_bfeo = 'c'; lines_bfeo = '-'; markr_bfeo = 'o';
|
||||
color_xsmm = 'g'; lines_xsmm = '-'; markr_xsmm = 'o';
|
||||
color_vend = 'b'; lines_vend = '-.'; markr_vend = '.';
|
||||
|
||||
% Compute the peak performance in terms of the number of double flops
|
||||
% executable per cycle and the clock rate.
|
||||
@@ -77,13 +55,15 @@ titlename = '%s';
|
||||
titlename = sprintf( titlename, title_opname );
|
||||
|
||||
% Set the legend strings.
|
||||
blissup_lg = sprintf( 'BLIS sup' );
|
||||
blisconv_lg = sprintf( 'BLIS conv' );
|
||||
eigen_lg = sprintf( 'Eigen' );
|
||||
open_lg = sprintf( 'OpenBLAS' );
|
||||
vend_lg = vend_str;
|
||||
bfeo_lg = sprintf( 'BLASFEO' );
|
||||
xsmm_lg = sprintf( 'libxsmm' );
|
||||
blissup_legend = sprintf( 'BLIS sup' );
|
||||
blislpab_legend = sprintf( 'BLIS conv' );
|
||||
eigen_legend = sprintf( 'Eigen' );
|
||||
open_legend = sprintf( 'OpenBLAS' );
|
||||
bfeo_legend = sprintf( 'BLASFEO' );
|
||||
xsmm_legend = sprintf( 'libxsmm' );
|
||||
%vend_legend = sprintf( 'MKL' );
|
||||
%vend_legend = sprintf( 'ARMPL' );
|
||||
vend_legend = vend_str;
|
||||
|
||||
% Set axes range values.
|
||||
y_scale = 1.00;
|
||||
@@ -101,6 +81,7 @@ end
|
||||
|
||||
|
||||
%flopscol = 4;
|
||||
flopscol = size( data_blissup, 2 );
|
||||
msize = 5;
|
||||
if 1
|
||||
fontsize = 12;
|
||||
@@ -130,44 +111,67 @@ x_axis( :, 1 ) = data_blissup( :, psize_col );
|
||||
%end
|
||||
np = size( data_blissup, 1 );
|
||||
|
||||
has_xsmm = 1;
|
||||
if data_xsmm( 1, flopscol ) == 0.0
|
||||
has_xsmm = 0;
|
||||
end
|
||||
|
||||
% Grab the last x-axis value.
|
||||
x_end = data_blissup( np, psize_col );
|
||||
|
||||
%data_peak( 1, 1:2 ) = [ 0 max_perf_core ];
|
||||
%data_peak( 2, 1:2 ) = [ x_end max_perf_core ];
|
||||
|
||||
if show_plot == 1
|
||||
blissup_ln = line( x_axis( 1:np, 1 ), data_blissup( 1:np, flopscol ) / nth, ...
|
||||
'Color',color_blissup, 'LineStyle',lines_blissup, ...
|
||||
'LineWidth',linesize );
|
||||
blisconv_ln = line( x_axis( 1:np, 1 ), data_blisconv( 1:np, flopscol ) / nth, ...
|
||||
'Color',color_blisconv, 'LineStyle',lines_blisconv, ...
|
||||
'LineWidth',linesize );
|
||||
'Color',color_blissup, 'LineStyle',lines_blissup, ...
|
||||
'LineWidth',linesize );
|
||||
blislpab_ln = line( x_axis( 1:np, 1 ), data_blislpab( 1:np, flopscol ) / nth, ...
|
||||
'Color',color_blislpab, 'LineStyle',lines_blislpab, ...
|
||||
'LineWidth',linesize );
|
||||
eigen_ln = line( x_axis( 1:np, 1 ), data_eigen( 1:np, flopscol ) / nth, ...
|
||||
'Color',color_eigen, 'LineStyle',lines_eigen, ...
|
||||
'LineWidth',linesize );
|
||||
'Color',color_eigen, 'LineStyle',lines_eigen, ...
|
||||
'LineWidth',linesize );
|
||||
open_ln = line( x_axis( 1:np, 1 ), data_open( 1:np, flopscol ) / nth, ...
|
||||
'Color',color_open, 'LineStyle',lines_open, ...
|
||||
'LineWidth',linesize );
|
||||
vend_ln = line( x_axis( 1:np, 1 ), data_vend( 1:np, flopscol ) / nth, ...
|
||||
'Color',color_vend, 'LineStyle',lines_vend, ...
|
||||
'LineWidth',linesize );
|
||||
if has_bfeo == 1
|
||||
bfeo_ln = line( x_axis( 1:np, 1 ), data_bfeo( 1:np, flopscol ) / nth, ...
|
||||
'Color',color_bfeo, 'LineStyle',lines_bfeo, ...
|
||||
'LineWidth',linesize );
|
||||
else
|
||||
bfeo_ln = line( nan, nan, ...
|
||||
'Color',color_bfeo, 'LineStyle',lines_bfeo, ...
|
||||
'LineWidth',linesize );
|
||||
end
|
||||
'Color',color_open, 'LineStyle',lines_open, ...
|
||||
'LineWidth',linesize );
|
||||
bfeo_ln = line( x_axis( 1:np, 1 ), data_bfeo( 1:np, flopscol ) / nth, ...
|
||||
'Color',color_bfeo, 'LineStyle',lines_bfeo, ...
|
||||
'LineWidth',linesize );
|
||||
if has_xsmm == 1
|
||||
xsmm_ln = line( x_axis( 1:np, 1 ), data_xsmm( 1:np, flopscol ) / nth, ...
|
||||
'Color',color_xsmm, 'LineStyle',lines_xsmm, ...
|
||||
'LineWidth',linesize );
|
||||
xsmm_ln = line( x_axis( 1:np, 1 ), data_xsmm( 1:np, flopscol ) / nth, ...
|
||||
'Color',color_xsmm, 'LineStyle',lines_xsmm, ...
|
||||
'LineWidth',linesize );
|
||||
else
|
||||
xsmm_ln = line( nan, nan, ...
|
||||
'Color',color_xsmm, 'LineStyle',lines_xsmm, ...
|
||||
'LineWidth',linesize );
|
||||
xsmm_ln = line( nan, nan, ...
|
||||
'Color',color_xsmm, 'LineStyle',lines_xsmm, ...
|
||||
'LineWidth',linesize );
|
||||
end
|
||||
vend_ln = line( x_axis( 1:np, 1 ), data_vend( 1:np, flopscol ) / nth, ...
|
||||
'Color',color_vend, 'LineStyle',lines_vend, ...
|
||||
'LineWidth',linesize );
|
||||
elseif theid == legend_plot_id
|
||||
blissup_ln = line( nan, nan, ...
|
||||
'Color',color_blissup, 'LineStyle',lines_blissup, ...
|
||||
'LineWidth',linesize );
|
||||
blislpab_ln = line( nan, nan, ...
|
||||
'Color',color_blislpab, 'LineStyle',lines_blislpab, ...
|
||||
'LineWidth',linesize );
|
||||
eigen_ln = line( nan, nan, ...
|
||||
'Color',color_eigen, 'LineStyle',lines_eigen, ...
|
||||
'LineWidth',linesize );
|
||||
open_ln = line( nan, nan, ...
|
||||
'Color',color_open, 'LineStyle',lines_open, ...
|
||||
'LineWidth',linesize );
|
||||
bfeo_ln = line( nan, nan, ...
|
||||
'Color',color_bfeo, 'LineStyle',lines_bfeo, ...
|
||||
'LineWidth',linesize );
|
||||
xsmm_ln = line( nan, nan, ...
|
||||
'Color',color_xsmm, 'LineStyle',lines_xsmm, ...
|
||||
'LineWidth',linesize );
|
||||
vend_ln = line( nan, nan, ...
|
||||
'Color',color_vend, 'LineStyle',lines_vend, ...
|
||||
'LineWidth',linesize );
|
||||
end
|
||||
|
||||
|
||||
@@ -199,51 +203,71 @@ elseif 500 <= x_end && x_end < 1000
|
||||
xticks( ax1, [ x_tick1 x_tick2 x_tick3 ] );
|
||||
end
|
||||
|
||||
% xpos ypos
|
||||
%set( leg,'Position',[11.32 6.36 1.15 0.7 ] ); % (1,4tl)
|
||||
if nth == 1 && theid == legend_plot_id
|
||||
if has_xsmm == 1
|
||||
% single-threaded, with libxsmm (ccc)
|
||||
leg = legend( ...
|
||||
[ blissup_ln blisconv_ln eigen_ln open_ln vend_ln bfeo_ln xsmm_ln ], ...
|
||||
blissup_lg, blisconv_lg, eigen_lg, open_lg, vend_lg, bfeo_lg, xsmm_lg, ...
|
||||
'Location', legend_loc );
|
||||
set( leg,'Box','off','Color','none','Units','inches' );
|
||||
if impl == 'octave'
|
||||
set( leg,'FontSize',fontsize );
|
||||
set( leg,'Position',[15.35 4.62 1.9 1.20] ); % (1,4tl)
|
||||
if show_plot == 1 || theid == legend_plot_id
|
||||
if nth == 1 && theid == legend_plot_id
|
||||
if has_xsmm == 1
|
||||
leg = legend( ...
|
||||
[ ...
|
||||
blissup_ln ...
|
||||
blislpab_ln ...
|
||||
eigen_ln ...
|
||||
open_ln ...
|
||||
bfeo_ln ...
|
||||
xsmm_ln ...
|
||||
vend_ln ...
|
||||
], ...
|
||||
blissup_legend, ...
|
||||
blislpab_legend, ...
|
||||
eigen_legend, ...
|
||||
open_legend, ...
|
||||
bfeo_legend, ...
|
||||
xsmm_legend, ...
|
||||
vend_legend, ...
|
||||
'Location', legend_loc );
|
||||
set( leg,'Box','off' );
|
||||
set( leg,'Color','none' );
|
||||
set( leg,'Units','inches' );
|
||||
if impl == 'octave'
|
||||
set( leg,'FontSize',fontsize );
|
||||
set( leg,'Position',[15.40 4.75 1.9 1.20] ); % (1,4tl)
|
||||
else
|
||||
set( leg,'FontSize',fontsize-3 );
|
||||
set( leg,'Position',[18.20 10.20 1.15 0.7 ] ); % (1,4tl)
|
||||
end
|
||||
else
|
||||
set( leg,'FontSize',fontsize-3 );
|
||||
set( leg,'Position',[18.20 10.20 1.15 0.7 ] ); % (1,4tl)
|
||||
leg = legend( ...
|
||||
[ ...
|
||||
blissup_ln ...
|
||||
blislpab_ln ...
|
||||
eigen_ln ...
|
||||
open_ln ...
|
||||
bfeo_ln ...
|
||||
vend_ln ...
|
||||
], ...
|
||||
blissup_legend, ...
|
||||
blislpab_legend, ...
|
||||
eigen_legend, ...
|
||||
open_legend, ...
|
||||
bfeo_legend, ...
|
||||
vend_legend, ...
|
||||
'Location', legend_loc );
|
||||
set( leg,'Box','off' );
|
||||
set( leg,'Color','none' );
|
||||
set( leg,'Units','inches' );
|
||||
if impl == 'octave'
|
||||
set( leg,'FontSize',fontsize );
|
||||
set( leg,'Position',[15.40 7.65 1.9 1.10] ); % (1,4tl)
|
||||
else
|
||||
set( leg,'FontSize',fontsize-1 );
|
||||
set( leg,'Position',[18.24 10.15 1.15 0.7] ); % (1,4tl)
|
||||
end
|
||||
end
|
||||
else
|
||||
% single-threaded, without libxsmm (rrr, or other)
|
||||
leg = legend( ...
|
||||
[ blissup_ln blisconv_ln eigen_ln open_ln vend_ln bfeo_ln ], ...
|
||||
blissup_lg, blisconv_lg, eigen_lg, open_lg, vend_lg, bfeo_lg, ...
|
||||
'Location', legend_loc );
|
||||
set( leg,'Box','off','Color','none','Units','inches' );
|
||||
if impl == 'octave'
|
||||
set( leg,'FontSize',fontsize );
|
||||
set( leg,'Position',[15.35 7.40 1.9 1.10] ); % (1,4tl)
|
||||
else
|
||||
set( leg,'FontSize',fontsize-1 );
|
||||
set( leg,'Position',[18.24 10.15 1.15 0.7] ); % (1,4tl)
|
||||
end
|
||||
end
|
||||
elseif nth > 1 && theid == legend_plot_id
|
||||
% multithreaded
|
||||
leg = legend( ...
|
||||
[ blissup_ln blisconv_ln eigen_ln open_ln vend_ln ], ...
|
||||
blissup_lg, blisconv_lg, eigen_lg, open_lg, vend_lg, ...
|
||||
'Location', legend_loc );
|
||||
set( leg,'Box','off','Color','none','Units','inches' );
|
||||
if impl == 'octave'
|
||||
set( leg,'FontSize',fontsize );
|
||||
set( leg,'Position',[18.20 10.30 1.9 0.95] ); % (1,4tl)
|
||||
else
|
||||
set( leg,'FontSize',fontsize-1 );
|
||||
set( leg,'Position',[18.24 10.15 1.15 0.7] ); % (1,4tl)
|
||||
set( leg,'Box','off' );
|
||||
set( leg,'Color','none' );
|
||||
set( leg,'Units','inches' );
|
||||
% xpos ypos
|
||||
%set( leg,'Position',[11.32 6.36 1.15 0.7 ] ); % (1,4tl)
|
||||
elseif nth > 1 && theid == legend_plot_id
|
||||
end
|
||||
end
|
||||
|
||||
@@ -274,38 +298,28 @@ else % impl == 'matlab'
|
||||
set( titl, 'Position', tpos );
|
||||
end
|
||||
|
||||
sll_str = sprintf( 'm = %u; n = k', smalldims(1) );
|
||||
lsl_str = sprintf( 'n = %u; m = k', smalldims(2) );
|
||||
lls_str = sprintf( 'k = %u; m = n', smalldims(3) );
|
||||
lss_str = sprintf( 'm; n = %u, k = %u', smalldims(2), smalldims(3) );
|
||||
sls_str = sprintf( 'n; m = %u, k = %u', smalldims(1), smalldims(3) );
|
||||
ssl_str = sprintf( 'k; m = %u, n = %u', smalldims(1), smalldims(2) );
|
||||
lll_str = sprintf( 'm = n = k' );
|
||||
|
||||
% Place labels on the bottom row of graphs.
|
||||
if theid > (rows-1)*cols
|
||||
%xlab = xlabel( ax1,xaxisname );
|
||||
%tpos = get( xlab, 'Position' )
|
||||
%tpos(2) = tpos(2) + 10;
|
||||
%set( xlab, 'Position', tpos );
|
||||
if theid == rows*cols - 6
|
||||
xlab = xlabel( ax1, sll_str );
|
||||
xlab = xlabel( ax1, 'm = 6; n = k' );
|
||||
elseif theid == rows*cols - 5
|
||||
xlab = xlabel( ax1, lsl_str );
|
||||
xlab = xlabel( ax1, 'n = 8; m = k' );
|
||||
elseif theid == rows*cols - 4
|
||||
xlab = xlabel( ax1, lls_str );
|
||||
xlab = xlabel( ax1, 'k = 4; m = n' );
|
||||
elseif theid == rows*cols - 3
|
||||
xlab = xlabel( ax1, lss_str );
|
||||
xlab = xlabel( ax1, 'm; n = 8, k = 4' );
|
||||
elseif theid == rows*cols - 2
|
||||
xlab = xlabel( ax1, sls_str );
|
||||
xlab = xlabel( ax1, 'n; m = 6, k = 4' );
|
||||
elseif theid == rows*cols - 1
|
||||
xlab = xlabel( ax1, ssl_str );
|
||||
xlab = xlabel( ax1, 'k; m = 6, n = 8' );
|
||||
elseif theid == rows*cols - 0
|
||||
xlab = xlabel( ax1, lll_str );
|
||||
xlab = xlabel( ax1, 'm = n = k' );
|
||||
end
|
||||
end
|
||||
|
||||
% Place labels on the left-hand column of graphs.
|
||||
if mod(theid-1,cols) == 0
|
||||
ylab = ylabel( ax1,yaxisname );
|
||||
end
|
||||
8
test/sup/octave_st/runthese.m
Normal file
8
test/sup/octave_st/runthese.m
Normal file
@@ -0,0 +1,8 @@
|
||||
% kabylake
|
||||
plot_panel_trxsh(3.80,16,1,'st','d','rrr',[ 6 8 4 ],'../results/kabylake/20200302/mnkt100000_st','kbl','MKL','octave'); close; clear all;
|
||||
|
||||
% haswell
|
||||
plot_panel_trxsh(3.5,16,1,'st','d','rrr',[ 6 8 4 ],'../results/haswell/20200302/mnkt100000_st','has','MKL','octave'); close; clear all;
|
||||
|
||||
% epyc
|
||||
plot_panel_trxsh(3.00, 8,1,'st','d','rrr',[ 6 8 4 ],'../results/epyc/20200302/mnkt100000_st','epyc','MKL','octave'); close; clear all;
|
||||
@@ -1,580 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# BLIS
|
||||
# An object-based framework for developing high-performance BLAS-like
|
||||
# libraries.
|
||||
#
|
||||
# Copyright (C) 2014, The University of Texas at Austin
|
||||
# Copyright (C) 2019, Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# - Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# - Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# - Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
|
||||
#
|
||||
# Makefile
|
||||
#
|
||||
# Field G. Van Zee
|
||||
#
|
||||
# Makefile for standalone BLIS test drivers.
|
||||
#
|
||||
|
||||
#
|
||||
# --- Makefile PHONY target definitions ----------------------------------------
|
||||
#
|
||||
|
||||
.PHONY: all all-st all-mt \
|
||||
blis blis-st blis-mt \
|
||||
clean cleanx
|
||||
|
||||
|
||||
|
||||
#
|
||||
# --- Determine makefile fragment location -------------------------------------
|
||||
#
|
||||
|
||||
# Comments:
|
||||
# - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given.
|
||||
# - We must use recursively expanded assignment for LIB_PATH and INC_PATH in
|
||||
# the second case because CONFIG_NAME is not yet set.
|
||||
ifneq ($(strip $(BLIS_INSTALL_PATH)),)
|
||||
LIB_PATH := $(BLIS_INSTALL_PATH)/lib
|
||||
INC_PATH := $(BLIS_INSTALL_PATH)/include/blis
|
||||
SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis
|
||||
else
|
||||
DIST_PATH := ../..
|
||||
LIB_PATH = ../../lib/$(CONFIG_NAME)
|
||||
INC_PATH = ../../include/$(CONFIG_NAME)
|
||||
SHARE_PATH := ../..
|
||||
endif
|
||||
|
||||
|
||||
|
||||
#
|
||||
# --- Include common makefile definitions --------------------------------------
|
||||
#
|
||||
|
||||
# Include the common makefile fragment.
|
||||
-include $(SHARE_PATH)/common.mk
|
||||
|
||||
|
||||
|
||||
#
|
||||
# --- BLAS and LAPACK implementations ------------------------------------------
|
||||
#
|
||||
|
||||
# BLIS library and header path. This is simply wherever it was installed.
|
||||
#BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib
|
||||
#BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis
|
||||
|
||||
# BLIS library.
|
||||
#BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a
|
||||
|
||||
# BLAS library path(s). This is where the BLAS libraries reside.
|
||||
HOME_LIB_PATH := $(HOME)/flame/lib
|
||||
MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64
|
||||
|
||||
# netlib BLAS
|
||||
NETLIB_LIB := $(HOME_LIB_PATH)/libblas.a
|
||||
|
||||
# OpenBLAS
|
||||
OPENBLAS_LIB := $(HOME_LIB_PATH)/libopenblas.a
|
||||
OPENBLASP_LIB := $(HOME_LIB_PATH)/libopenblasp.a
|
||||
|
||||
# BLASFEO
|
||||
BLASFEO_LIB := $(HOME_LIB_PATH)/libblasfeo.a
|
||||
|
||||
# libxsmm
|
||||
LIBXSMM_LIB := $(HOME_LIB_PATH)/libxsmm.a -ldl \
|
||||
$(NETLIB_LIB) -lgfortran
|
||||
|
||||
# ATLAS
|
||||
ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \
|
||||
$(HOME_LIB_PATH)/libatlas.a
|
||||
|
||||
# Eigen
|
||||
EIGEN_INC := $(HOME)/flame/eigen/include/eigen3
|
||||
EIGEN_LIB := $(HOME_LIB_PATH)/libeigen_blas_static.a
|
||||
EIGENP_LIB := $(EIGEN_LIB)
|
||||
|
||||
# MKL
|
||||
MKL_LIB := -L$(MKL_LIB_PATH) \
|
||||
-lmkl_intel_lp64 \
|
||||
-lmkl_core \
|
||||
-lmkl_sequential \
|
||||
-lpthread -lm -ldl
|
||||
MKLP_LIB := -L$(MKL_LIB_PATH) \
|
||||
-lmkl_intel_lp64 \
|
||||
-lmkl_core \
|
||||
-lmkl_gnu_thread \
|
||||
-lpthread -lm -ldl -fopenmp
|
||||
#-L$(ICC_LIB_PATH) \
|
||||
#-lgomp
|
||||
|
||||
VENDOR_LIB := $(MKL_LIB)
|
||||
VENDORP_LIB := $(MKLP_LIB)
|
||||
|
||||
|
||||
#
|
||||
# --- Problem size definitions -------------------------------------------------
|
||||
#
|
||||
|
||||
# Single core
|
||||
PS_BEGIN := 4
|
||||
PS_MAX := 800
|
||||
PS_INC := 4
|
||||
|
||||
# Multicore
|
||||
P1_BEGIN := 8
|
||||
P1_MAX := 1600
|
||||
P1_INC := 8
|
||||
|
||||
|
||||
#
|
||||
# --- General build definitions ------------------------------------------------
|
||||
#
|
||||
|
||||
TEST_SRC_PATH := .
|
||||
TEST_OBJ_PATH := .
|
||||
|
||||
# Gather all local object files.
|
||||
TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \
|
||||
$(TEST_OBJ_PATH)/%.o, \
|
||||
$(wildcard $(TEST_SRC_PATH)/*.c)))
|
||||
|
||||
# Override the value of CINCFLAGS so that the value of CFLAGS returned by
|
||||
# get-frame-cflags-for() is not cluttered up with include paths needed only
|
||||
# while building BLIS.
|
||||
CINCFLAGS := -I$(INC_PATH)
|
||||
|
||||
# Use the "framework" CFLAGS for the configuration family.
|
||||
CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME))
|
||||
|
||||
# Add local header paths to CFLAGS.
|
||||
CFLAGS += -I$(TEST_SRC_PATH)
|
||||
|
||||
# Locate the libblis library to which we will link.
|
||||
LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L)
|
||||
|
||||
# Define a set of CFLAGS for use with C++ and Eigen.
|
||||
CXXFLAGS := $(subst -std=c99,-std=c++11,$(CFLAGS))
|
||||
CXXFLAGS += -I$(EIGEN_INC)
|
||||
|
||||
# Create a copy of CXXFLAGS without -fopenmp in order to disable multithreading.
|
||||
CXXFLAGS_ST := -march=native $(subst -fopenmp,,$(CXXFLAGS))
|
||||
CXXFLAGS_MT := -march=native $(CXXFLAGS)
|
||||
|
||||
# Single or multithreaded string
|
||||
STR_ST := -DTHR_STR=\"st\"
|
||||
STR_MT := -DTHR_STR=\"mt\"
|
||||
|
||||
# Number of trials per problem size.
|
||||
N_TRIALS := -DN_TRIALS=3
|
||||
|
||||
# Problem size specification
|
||||
PDEF_ST := -DP_BEGIN=$(PS_BEGIN) \
|
||||
-DP_MAX=$(PS_MAX) \
|
||||
-DP_INC=$(PS_INC)
|
||||
|
||||
PDEF_MT := -DP_BEGIN=$(P1_BEGIN) \
|
||||
-DP_MAX=$(P1_MAX) \
|
||||
-DP_INC=$(P1_INC)
|
||||
|
||||
ifeq ($(E),1)
|
||||
ERRCHK := -DERROR_CHECK
|
||||
else
|
||||
ERRCHK := -DNO_ERROR_CHECK
|
||||
endif
|
||||
|
||||
# Enumerate possible datatypes and computation precisions.
|
||||
#dts := s d c z
|
||||
DTS := d
|
||||
|
||||
TRANS := n_n \
|
||||
n_t \
|
||||
t_n \
|
||||
t_t
|
||||
|
||||
# While BLIS supports all combinations of row and column storage for matrices
|
||||
# C, A, and B, the alternatives mostly only support CBLAS APIs, which inherently
|
||||
# support only "all row-storage" or "all column-storage". Thus, we disable the
|
||||
# building of those other drivers so that compilation/linking completes sooner.
|
||||
#STORS := r_r_r \
|
||||
# r_r_c \
|
||||
# r_c_r \
|
||||
# r_c_c \
|
||||
# c_r_r \
|
||||
# c_r_c \
|
||||
# c_c_r \
|
||||
# c_c_c
|
||||
STORS := r_r_r \
|
||||
c_c_c
|
||||
|
||||
|
||||
SHAPES := l_l_s \
|
||||
l_s_l \
|
||||
s_l_l \
|
||||
s_s_l \
|
||||
s_l_s \
|
||||
l_s_s \
|
||||
l_l_l
|
||||
|
||||
SMS := 6
|
||||
SNS := 8
|
||||
SKS := 10
|
||||
|
||||
|
||||
#
|
||||
# --- Function definitions -----------------------------------------------------
|
||||
#
|
||||
|
||||
# A function to strip the underscores from a list of strings.
|
||||
stripu = $(subst _,,$(1))
|
||||
|
||||
# Various functions that help us construct the datatype combinations and then
|
||||
# extract the needed datatype strings and C preprocessor define flags.
|
||||
get-1of2 = $(word 1,$(subst _, ,$(1)))
|
||||
get-2of2 = $(word 2,$(subst _, ,$(1)))
|
||||
|
||||
get-1of3 = $(word 1,$(subst _, ,$(1)))
|
||||
get-2of3 = $(word 2,$(subst _, ,$(1)))
|
||||
get-3of3 = $(word 3,$(subst _, ,$(1)))
|
||||
|
||||
# Datatype defs.
|
||||
get-dt-cpp = $(strip \
|
||||
$(if $(findstring s,$(1)),-DDT=BLIS_FLOAT -DIS_FLOAT,\
|
||||
$(if $(findstring d,$(1)),-DDT=BLIS_DOUBLE -DIS_DOUBLE,\
|
||||
$(if $(findstring c,$(1)),-DDT=BLIS_SCOMPLEX -DIS_SCOMPLEX,\
|
||||
-DDT=BLIS_DCOMPLEX -DIS_DCOMPLEX))))
|
||||
|
||||
# Transpose defs.
|
||||
get-tra-defs-a = $(strip $(subst n,-DTRANSA=BLIS_NO_TRANSPOSE -DA_NOTRANS, \
|
||||
$(subst t,-DTRANSA=BLIS_TRANSPOSE -DA_TRANS,$(call get-1of2,$(1)))))
|
||||
get-tra-defs-b = $(strip $(subst n,-DTRANSB=BLIS_NO_TRANSPOSE -DB_NOTRANS, \
|
||||
$(subst t,-DTRANSB=BLIS_TRANSPOSE -DB_TRANS,$(call get-2of2,$(1)))))
|
||||
get-tra-defs = $(call get-tra-defs-a,$(1)) $(call get-tra-defs-b,$(1))
|
||||
|
||||
# Storage defs.
|
||||
get-sto-uch-a = $(strip $(subst r,R, \
|
||||
$(subst c,C,$(call get-1of3,$(1)))))
|
||||
get-sto-uch-b = $(strip $(subst r,R, \
|
||||
$(subst c,C,$(call get-2of3,$(1)))))
|
||||
get-sto-uch-c = $(strip $(subst r,R, \
|
||||
$(subst c,C,$(call get-3of3,$(1)))))
|
||||
get-sto-defs = $(strip \
|
||||
-DSTOR3=BLIS_$(call get-sto-uch-a,$(1))$(call get-sto-uch-b,$(1))$(call get-sto-uch-c,$(1)) \
|
||||
-DA_STOR_$(call get-sto-uch-a,$(1)) \
|
||||
-DB_STOR_$(call get-sto-uch-b,$(1)) \
|
||||
-DC_STOR_$(call get-sto-uch-c,$(1)))
|
||||
|
||||
# Dimension defs.
|
||||
get-shape-defs-cm = $(if $(findstring l,$(1)),-DM_DIM=-1,-DM_DIM=$(2))
|
||||
get-shape-defs-cn = $(if $(findstring l,$(1)),-DN_DIM=-1,-DN_DIM=$(2))
|
||||
get-shape-defs-ck = $(if $(findstring l,$(1)),-DK_DIM=-1,-DK_DIM=$(2))
|
||||
get-shape-defs-m = $(call get-shape-defs-cm,$(call get-1of3,$(1)),$(2))
|
||||
get-shape-defs-n = $(call get-shape-defs-cn,$(call get-2of3,$(1)),$(2))
|
||||
get-shape-defs-k = $(call get-shape-defs-ck,$(call get-3of3,$(1)),$(2))
|
||||
|
||||
# arguments: 1: shape (w/ underscores) 2: smallm 3: smalln 4: smallk
|
||||
get-shape-defs = $(strip $(call get-shape-defs-m,$(1),$(2)) \
|
||||
$(call get-shape-defs-n,$(1),$(3)) \
|
||||
$(call get-shape-defs-k,$(1),$(4)))
|
||||
|
||||
#$(error l_l_s 6 8 4 = $(call get-shape-defs,l_l_s,6,8,4))
|
||||
|
||||
# Shape-dimension string.
|
||||
get-shape-str-ch = $(if $(findstring l,$(1)),p,$(2))
|
||||
get-shape-str-m = $(call get-shape-str-ch,$(call get-1of3,$(1)),$(2))
|
||||
get-shape-str-n = $(call get-shape-str-ch,$(call get-2of3,$(1)),$(2))
|
||||
get-shape-str-k = $(call get-shape-str-ch,$(call get-3of3,$(1)),$(2))
|
||||
|
||||
# arguments: 1: shape (w/ underscores) 2: smallm 3: smalln 4: smallk
|
||||
get-shape-dim-str = m$(call get-shape-str-m,$(1),$(2))n$(call get-shape-str-n,$(1),$(3))k$(call get-shape-str-k,$(1),$(4))
|
||||
|
||||
# Implementation defs.
|
||||
# Define a function to return the appropriate -DSTR= and -D[BLIS|BLAS] flags.
|
||||
get-imp-defs = $(strip $(subst blissup,-DSTR=\"$(1)\" -DBLIS -DSUP, \
|
||||
$(subst blislpab,-DSTR=\"$(1)\" -DBLIS, \
|
||||
$(subst eigen,-DSTR=\"$(1)\" -DEIGEN, \
|
||||
$(subst openblas,-DSTR=\"$(1)\" -DCBLAS, \
|
||||
$(subst blasfeo,-DSTR=\"$(1)\" -DCBLAS, \
|
||||
$(subst libxsmm,-DSTR=\"$(1)\" -DBLAS -DXSMM, \
|
||||
$(subst vendor,-DSTR=\"$(1)\" -DCBLAS,$(1)))))))))
|
||||
|
||||
TRANS0 = $(call stripu,$(TRANS))
|
||||
STORS0 = $(call stripu,$(STORS))
|
||||
|
||||
# Limit BLAS and Eigen to only using all row-stored, or all column-stored matrices.
|
||||
# Also, limit libxsmm to using all column-stored matrices since it does not offer
|
||||
# CBLAS interfaces.
|
||||
BSTORS0 = rrr ccc
|
||||
ESTORS0 = rrr ccc
|
||||
XSTORS0 = ccc
|
||||
|
||||
|
||||
#
|
||||
# --- Object and binary file definitons ----------------------------------------
|
||||
#
|
||||
|
||||
get-st-objs = $(foreach dt,$(1),$(foreach tr,$(2),$(foreach st,$(3),$(foreach sh,$(4),$(foreach sm,$(5),$(foreach sn,$(6),$(foreach sk,$(7),test_$(dt)gemm_$(tr)_$(st)_$(call get-shape-dim-str,$(sh),$(sm),$(sn),$(sk))_$(8)_st.o)))))))
|
||||
|
||||
# Build a list of object files and binaries for each single-threaded
|
||||
# implementation using the get-st-objs() function defined above.
|
||||
BLISSUP_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(STORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blissup)
|
||||
BLISSUP_ST_BINS := $(patsubst %.o,%.x,$(BLISSUP_ST_OBJS))
|
||||
|
||||
BLISLPAB_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(STORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blislpab)
|
||||
BLISLPAB_ST_BINS := $(patsubst %.o,%.x,$(BLISLPAB_ST_OBJS))
|
||||
|
||||
EIGEN_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(ESTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),eigen)
|
||||
EIGEN_ST_BINS := $(patsubst %.o,%.x,$(EIGEN_ST_OBJS))
|
||||
|
||||
OPENBLAS_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),openblas)
|
||||
OPENBLAS_ST_BINS := $(patsubst %.o,%.x,$(OPENBLAS_ST_OBJS))
|
||||
|
||||
BLASFEO_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blasfeo)
|
||||
BLASFEO_ST_BINS := $(patsubst %.o,%.x,$(BLASFEO_ST_OBJS))
|
||||
|
||||
LIBXSMM_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(XSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),libxsmm)
|
||||
LIBXSMM_ST_BINS := $(patsubst %.o,%.x,$(LIBXSMM_ST_OBJS))
|
||||
|
||||
VENDOR_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),vendor)
|
||||
VENDOR_ST_BINS := $(patsubst %.o,%.x,$(VENDOR_ST_OBJS))
|
||||
|
||||
# Mark the object files as intermediate so that make will remove them
|
||||
# automatically after building the binaries on which they depend.
|
||||
.INTERMEDIATE: $(BLISSUP_ST_OBJS) \
|
||||
$(BLISLPAB_ST_OBJS) \
|
||||
$(EIGEN_ST_OBJS) \
|
||||
$(OPENBLAS_ST_OBJS) \
|
||||
$(BLASFEO_ST_OBJS) \
|
||||
$(LIBXSMM_ST_OBJS) \
|
||||
$(VENDOR_ST_OBJS)
|
||||
|
||||
get-mt-objs = $(foreach dt,$(1),$(foreach tr,$(2),$(foreach st,$(3),$(foreach sh,$(4),$(foreach sm,$(5),$(foreach sn,$(6),$(foreach sk,$(7),test_$(dt)gemm_$(tr)_$(st)_$(call get-shape-dim-str,$(sh),$(sm),$(sn),$(sk))_$(8)_mt.o)))))))
|
||||
|
||||
# Build a list of object files and binaries for each multithreaded
|
||||
# implementation using the get-st-objs() function defined above.
|
||||
BLISSUP_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(STORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blissup)
|
||||
BLISSUP_MT_BINS := $(patsubst %.o,%.x,$(BLISSUP_MT_OBJS))
|
||||
|
||||
BLISLPAB_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(STORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blislpab)
|
||||
BLISLPAB_MT_BINS := $(patsubst %.o,%.x,$(BLISLPAB_MT_OBJS))
|
||||
|
||||
EIGEN_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(ESTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),eigen)
|
||||
EIGEN_MT_BINS := $(patsubst %.o,%.x,$(EIGEN_MT_OBJS))
|
||||
|
||||
OPENBLAS_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),openblas)
|
||||
OPENBLAS_MT_BINS := $(patsubst %.o,%.x,$(OPENBLAS_MT_OBJS))
|
||||
|
||||
VENDOR_MT_OBJS := $(call get-mt-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),vendor)
|
||||
VENDOR_MT_BINS := $(patsubst %.o,%.x,$(VENDOR_MT_OBJS))
|
||||
|
||||
#$(error "objs = $(EIGEN_ST_BINS)" )
|
||||
|
||||
# Mark the object files as intermediate so that make will remove them
|
||||
# automatically after building the binaries on which they depend.
|
||||
.INTERMEDIATE: $(BLISSUP_MT_OBJS) \
|
||||
$(BLISLPAB_MT_OBJS) \
|
||||
$(EIGEN_MT_OBJS) \
|
||||
$(OPENBLAS_MT_OBJS) \
|
||||
$(VENDOR_MT_OBJS)
|
||||
|
||||
|
||||
#
|
||||
# --- Targets/rules ------------------------------------------------------------
|
||||
#
|
||||
|
||||
all: st
|
||||
|
||||
blis: blissup-st blislpab-st
|
||||
|
||||
blissup: blissup-st
|
||||
blislpab: blislpab-st
|
||||
eigen: eigen-st
|
||||
openblas: openblas-st
|
||||
blasfeo: blasfeo-st
|
||||
libxsmm: libxsmm-st
|
||||
vendor: vendor-st
|
||||
|
||||
st: blissup-st blislpab-st \
|
||||
eigen-st openblas-st blasfeo-st libxsmm-st vendor-st
|
||||
|
||||
blissup-st: $(BLISSUP_ST_BINS)
|
||||
blislpab-st: $(BLISLPAB_ST_BINS)
|
||||
eigen-st: $(EIGEN_ST_BINS)
|
||||
openblas-st: $(OPENBLAS_ST_BINS)
|
||||
blasfeo-st: $(BLASFEO_ST_BINS)
|
||||
libxsmm-st: $(LIBXSMM_ST_BINS)
|
||||
vendor-st: $(VENDOR_ST_BINS)
|
||||
|
||||
mt: blissup-mt blislpab-mt \
|
||||
eigen-mt openblas-mt vendor-mt
|
||||
|
||||
blissup-mt: $(BLISSUP_MT_BINS)
|
||||
blislpab-mt: $(BLISLPAB_MT_BINS)
|
||||
eigen-mt: $(EIGEN_MT_BINS)
|
||||
openblas-mt: $(OPENBLAS_MT_BINS)
|
||||
vendor-mt: $(VENDOR_MT_BINS)
|
||||
|
||||
|
||||
|
||||
# --Object file rules --
|
||||
|
||||
# Define the implementations for which we will instantiate compilation rules.
|
||||
BIMPLS_ST := blissup blislpab openblas blasfeo libxsmm vendor
|
||||
BIMPLS_MT := blissup blislpab openblas vendor
|
||||
EIMPLS := eigen
|
||||
|
||||
# 1 2 3 4 567 8
|
||||
# test_dgemm_nn_rrr_mpn6kp_blissup_st.x
|
||||
|
||||
# Define the function that will be used to instantiate compilation rules
|
||||
# for the various single-threaded implementations.
|
||||
define make-st-rule
|
||||
test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_st.o: test_gemm.c Makefile
|
||||
$(CC) $(CFLAGS) $(ERRCHK) $(N_TRIALS) $(PDEF_ST) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_ST) -c $$< -o $$@
|
||||
endef
|
||||
|
||||
# Instantiate the rule function make-st-rule() for each BLIS/BLAS/CBLAS
|
||||
# implementation.
|
||||
$(foreach dt,$(DTS), \
|
||||
$(foreach tr,$(TRANS), \
|
||||
$(foreach st,$(STORS), \
|
||||
$(foreach sh,$(SHAPES), \
|
||||
$(foreach sm,$(SMS), \
|
||||
$(foreach sn,$(SNS), \
|
||||
$(foreach sk,$(SKS), \
|
||||
$(foreach impl,$(BIMPLS_ST), \
|
||||
$(eval $(call make-st-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl)))))))))))
|
||||
|
||||
# Define the function that will be used to instantiate compilation rules
|
||||
# for the various multithreaded implementations.
|
||||
define make-mt-rule
|
||||
test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_mt.o: test_gemm.c Makefile
|
||||
$(CC) $(CFLAGS) $(ERRCHK) $(N_TRIALS) $(PDEF_MT) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_MT) -c $$< -o $$@
|
||||
endef
|
||||
|
||||
# Instantiate the rule function make-mt-rule() for each BLIS/BLAS/CBLAS
|
||||
# implementation.
|
||||
$(foreach dt,$(DTS), \
|
||||
$(foreach tr,$(TRANS), \
|
||||
$(foreach st,$(STORS), \
|
||||
$(foreach sh,$(SHAPES), \
|
||||
$(foreach sm,$(SMS), \
|
||||
$(foreach sn,$(SNS), \
|
||||
$(foreach sk,$(SKS), \
|
||||
$(foreach impl,$(BIMPLS_MT), \
|
||||
$(eval $(call make-mt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl)))))))))))
|
||||
|
||||
# Define the function that will be used to instantiate compilation rules
|
||||
# for the single-threaded Eigen implementation.
|
||||
define make-eigst-rule
|
||||
test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_st.o: test_gemm.c Makefile
|
||||
$(CXX) $(CXXFLAGS_ST) $(ERRCHK) $(N_TRIALS) $(PDEF_ST) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_ST) -c $$< -o $$@
|
||||
endef
|
||||
|
||||
# Instantiate the rule function make-st-rule() for each Eigen implementation.
|
||||
$(foreach dt,$(DTS), \
|
||||
$(foreach tr,$(TRANS), \
|
||||
$(foreach st,$(STORS), \
|
||||
$(foreach sh,$(SHAPES), \
|
||||
$(foreach sm,$(SMS), \
|
||||
$(foreach sn,$(SNS), \
|
||||
$(foreach sk,$(SKS), \
|
||||
$(foreach impl,$(EIMPLS), \
|
||||
$(eval $(call make-eigst-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl)))))))))))
|
||||
|
||||
# Define the function that will be used to instantiate compilation rules
|
||||
# for the multithreaded Eigen implementation.
|
||||
define make-eigmt-rule
|
||||
test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_mt.o: test_gemm.c Makefile
|
||||
$(CXX) $(CXXFLAGS_MT) $(ERRCHK) $(N_TRIALS) $(PDEF_MT) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_MT) -c $$< -o $$@
|
||||
endef
|
||||
|
||||
# Instantiate the rule function make-st-rule() for each Eigen implementation.
|
||||
$(foreach dt,$(DTS), \
|
||||
$(foreach tr,$(TRANS), \
|
||||
$(foreach st,$(STORS), \
|
||||
$(foreach sh,$(SHAPES), \
|
||||
$(foreach sm,$(SMS), \
|
||||
$(foreach sn,$(SNS), \
|
||||
$(foreach sk,$(SKS), \
|
||||
$(foreach impl,$(EIMPLS), \
|
||||
$(eval $(call make-eigmt-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl)))))))))))
|
||||
|
||||
|
||||
# -- Executable file rules --
|
||||
|
||||
# NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS
|
||||
# on the link command line in case BLIS was configured with the BLAS
|
||||
# compatibility layer. This prevents BLIS from inadvertently getting called
|
||||
# for the BLAS routines we are trying to test with.
|
||||
|
||||
test_%_blissup_st.x: test_%_blissup_st.o $(LIBBLIS_LINK)
|
||||
$(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_blislpab_st.x: test_%_blislpab_st.o $(LIBBLIS_LINK)
|
||||
$(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_eigen_st.x: test_%_eigen_st.o $(LIBBLIS_LINK)
|
||||
$(CXX) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_openblas_st.x: test_%_openblas_st.o $(LIBBLIS_LINK)
|
||||
$(CC) $(strip $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_blasfeo_st.x: test_%_blasfeo_st.o $(LIBBLIS_LINK)
|
||||
$(CC) $(strip $< $(BLASFEO_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_libxsmm_st.x: test_%_libxsmm_st.o $(LIBBLIS_LINK)
|
||||
$(CC) $(strip $< $(LIBXSMM_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_vendor_st.x: test_%_vendor_st.o $(LIBBLIS_LINK)
|
||||
$(CC) $(strip $< $(VENDOR_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
|
||||
test_%_blissup_mt.x: test_%_blissup_mt.o $(LIBBLIS_LINK)
|
||||
$(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_blislpab_mt.x: test_%_blislpab_mt.o $(LIBBLIS_LINK)
|
||||
$(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_eigen_mt.x: test_%_eigen_mt.o $(LIBBLIS_LINK)
|
||||
$(CXX) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_openblas_mt.x: test_%_openblas_mt.o $(LIBBLIS_LINK)
|
||||
$(CC) $(strip $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
test_%_vendor_mt.x: test_%_vendor_mt.o $(LIBBLIS_LINK)
|
||||
$(CC) $(strip $< $(VENDORP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@)
|
||||
|
||||
|
||||
# -- Clean rules --
|
||||
|
||||
clean: cleanx
|
||||
|
||||
cleanx:
|
||||
- $(RM_F) *.x *.o
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
|
||||
% haswell
|
||||
plot_panel_trxsh(3.25,16,1,'mt','d','ccc',[ 6 8 10 ],'../results/haswell/20190823/4_800_4_mt201','has','MKL','matlab'); close; clear all;
|
||||
plot_panel_trxsh(3.25,16,1,'mt','d','rrr',[ 6 8 10 ],'../results/haswell/20190823/4_800_4_mt201','has','MKL','matlab'); close; clear all;
|
||||
|
||||
% kabylake
|
||||
plot_panel_trxsh(3.80,16,1,'mt','d','rrr',[ 6 8 10 ],'..','kbl','MKL','matlab'); close; clear all;
|
||||
plot_panel_trxsh(3.80,16,1,'mt','d','ccc',[ 6 8 10 ],'..','kbl','MKL','matlab'); close; clear all;
|
||||
|
||||
% epyc
|
||||
plot_panel_trxsh(3.00, 8,1,'mt','d','rrr',[ 6 8 10 ],'../results/epyc/20190826/4_800_4_mt256','epyc','MKL','matlab'); close; clear all;
|
||||
plot_panel_trxsh(3.00, 8,1,'mt','d','ccc',[ 6 8 10 ],'../results/epyc/20190826/4_800_4_mt256','epyc','MKL','matlab'); close; clear all;
|
||||
@@ -1,188 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# File pefixes.
|
||||
exec_root="test"
|
||||
out_root="output"
|
||||
|
||||
sys="blis"
|
||||
#sys="lonestar5"
|
||||
#sys="ul252"
|
||||
#sys="ul264"
|
||||
|
||||
if [ ${sys} = "blis" ]; then
|
||||
|
||||
export GOMP_CPU_AFFINITY="0-3"
|
||||
nt=4
|
||||
|
||||
elif [ ${sys} = "lonestar5" ]; then
|
||||
|
||||
export GOMP_CPU_AFFINITY="0-23"
|
||||
nt=24
|
||||
|
||||
elif [ ${sys} = "ul252" ]; then
|
||||
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
|
||||
export GOMP_CPU_AFFINITY="0-51"
|
||||
nt=52
|
||||
|
||||
elif [ ${sys} = "ul264" ]; then
|
||||
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64"
|
||||
export GOMP_CPU_AFFINITY="0-63"
|
||||
nt=64
|
||||
|
||||
fi
|
||||
|
||||
# Delay between test cases.
|
||||
delay=0.02
|
||||
|
||||
# Threadedness to test.
|
||||
threads="mt"
|
||||
|
||||
# Datatypes to test.
|
||||
#dts="d s"
|
||||
dts="d"
|
||||
|
||||
# Operations to test.
|
||||
ops="gemm"
|
||||
|
||||
# Transpose combintions to test.
|
||||
trans="nn nt tn tt"
|
||||
|
||||
# Storage combinations to test.
|
||||
#stors="rrr rrc rcr rcc crr crc ccr ccc"
|
||||
stors="rrr ccc"
|
||||
|
||||
# Problem shapes to test.
|
||||
shapes="sll lsl lls lss sls ssl lll"
|
||||
|
||||
# FGVZ: figure out how to probe what's in the directory and
|
||||
# execute everything that's there?
|
||||
sms="6"
|
||||
sns="8"
|
||||
sks="10"
|
||||
|
||||
# Implementations to test.
|
||||
impls="vendor blissup blislpab openblas eigen"
|
||||
#impls="vendor"
|
||||
#impls="blissup"
|
||||
#impls="blislpab"
|
||||
#impls="openblas"
|
||||
#impls="eigen"
|
||||
|
||||
# Save a copy of GOMP_CPU_AFFINITY so that if we have to unset it, we can
|
||||
# restore the value.
|
||||
GOMP_CPU_AFFINITYsave=${GOMP_CPU_AFFINITY}
|
||||
|
||||
# Example: test_dgemm_nn_rrc_m6npkp_blissup_st.x
|
||||
|
||||
for th in ${threads}; do
|
||||
|
||||
for dt in ${dts}; do
|
||||
|
||||
for op in ${ops}; do
|
||||
|
||||
for tr in ${trans}; do
|
||||
|
||||
for st in ${stors}; do
|
||||
|
||||
for sh in ${shapes}; do
|
||||
|
||||
for sm in ${sms}; do
|
||||
|
||||
for sn in ${sns}; do
|
||||
|
||||
for sk in ${sks}; do
|
||||
|
||||
for im in ${impls}; do
|
||||
|
||||
if [ "${im:0:4}" = "blis" ]; then
|
||||
unset OMP_NUM_THREADS
|
||||
export BLIS_NUM_THREADS=${nt}
|
||||
elif [ "${im}" = "openblas" ]; then
|
||||
unset OMP_NUM_THREADS
|
||||
export OPENBLAS_NUM_THREADS=${nt}
|
||||
elif [ "${im}" = "eigen" ]; then
|
||||
export OMP_NUM_THREADS=${nt}
|
||||
elif [ "${im}" = "vendor" ]; then
|
||||
unset OMP_NUM_THREADS
|
||||
export MKL_NUM_THREADS=${nt}
|
||||
fi
|
||||
|
||||
# Multithreaded OpenBLAS seems to have a problem
|
||||
# running properly if GOMP_CPU_AFFINITY is set.
|
||||
# So we temporarily unset it here if we are about
|
||||
# to execute OpenBLAS, but otherwise restore it.
|
||||
if [ ${im} = "openblas" ]; then
|
||||
unset GOMP_CPU_AFFINITY
|
||||
else
|
||||
export GOMP_CPU_AFFINITY="${GOMP_CPU_AFFINITYsave}"
|
||||
fi
|
||||
|
||||
# Limit execution of non-BLIS implementations to
|
||||
# rrr/ccc storage cases.
|
||||
if [ "${im:0:4}" != "blis" ] && \
|
||||
[ "${st}" != "rrr" ] && \
|
||||
[ "${st}" != "ccc" ]; then
|
||||
continue;
|
||||
fi
|
||||
|
||||
# Further limit execution of libxsmm to
|
||||
# ccc storage cases.
|
||||
if [ "${im:0:7}" = "libxsmm" ] && \
|
||||
[ "${st}" != "ccc" ]; then
|
||||
continue;
|
||||
fi
|
||||
|
||||
# Extract the shape chars for m, n, k.
|
||||
chm=${sh:0:1}
|
||||
chn=${sh:1:1}
|
||||
chk=${sh:2:1}
|
||||
|
||||
# Construct the shape substring (e.g. m6npkp)
|
||||
shstr=""
|
||||
|
||||
if [ ${chm} = "s" ]; then
|
||||
shstr="${shstr}m${sm}"
|
||||
else
|
||||
shstr="${shstr}mp"
|
||||
fi
|
||||
|
||||
if [ ${chn} = "s" ]; then
|
||||
shstr="${shstr}n${sn}"
|
||||
else
|
||||
shstr="${shstr}np"
|
||||
fi
|
||||
|
||||
if [ ${chk} = "s" ]; then
|
||||
shstr="${shstr}k${sk}"
|
||||
else
|
||||
shstr="${shstr}kp"
|
||||
fi
|
||||
|
||||
# Ex: test_dgemm_nn_rrc_m6npkp_blissup_st.x
|
||||
|
||||
# Construct the name of the test executable.
|
||||
exec_name="${exec_root}_${dt}${op}_${tr}_${st}_${shstr}_${im}_${th}.x"
|
||||
|
||||
# Construct the name of the output file.
|
||||
out_file="${out_root}_${th}_${dt}${op}_${tr}_${st}_${shstr}_${im}.m"
|
||||
|
||||
echo "Running (nt = ${nt}) ./${exec_name} > ${out_file}"
|
||||
|
||||
# Run executable.
|
||||
./${exec_name} > ${out_file}
|
||||
|
||||
sleep ${delay}
|
||||
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
@@ -1,589 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#ifdef EIGEN
|
||||
#define BLIS_DISABLE_BLAS_DEFS
|
||||
#include "blis.h"
|
||||
#include <Eigen/Core>
|
||||
//#include <Eigen/src/misc/blas.h>
|
||||
using namespace Eigen;
|
||||
#else
|
||||
#include "blis.h"
|
||||
#endif
|
||||
|
||||
//#define PRINT
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
rntm_t rntm_g;
|
||||
|
||||
bli_init();
|
||||
|
||||
// Copy the global rntm_t object in case we need it later when disabling
|
||||
// sup.
|
||||
bli_rntm_init_from_global( &rntm_g );
|
||||
|
||||
#ifndef ERROR_CHECK
|
||||
bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
|
||||
#endif
|
||||
|
||||
|
||||
dim_t n_trials = N_TRIALS;
|
||||
|
||||
num_t dt = DT;
|
||||
|
||||
#if 1
|
||||
dim_t p_begin = P_BEGIN;
|
||||
dim_t p_max = P_MAX;
|
||||
dim_t p_inc = P_INC;
|
||||
#else
|
||||
dim_t p_begin = 4;
|
||||
dim_t p_max = 40;
|
||||
dim_t p_inc = 4;
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
dim_t m_input = M_DIM;
|
||||
dim_t n_input = N_DIM;
|
||||
dim_t k_input = K_DIM;
|
||||
#else
|
||||
p_begin = p_inc = 32;
|
||||
dim_t m_input = 6;
|
||||
dim_t n_input = -1;
|
||||
dim_t k_input = -1;
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
trans_t transa = TRANSA;
|
||||
trans_t transb = TRANSB;
|
||||
#else
|
||||
trans_t transa = BLIS_NO_TRANSPOSE;
|
||||
trans_t transb = BLIS_NO_TRANSPOSE;
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
stor3_t sc = STOR3;
|
||||
#else
|
||||
stor3_t sc = BLIS_RRR;
|
||||
#endif
|
||||
|
||||
|
||||
inc_t rs_c, cs_c;
|
||||
inc_t rs_a, cs_a;
|
||||
inc_t rs_b, cs_b;
|
||||
|
||||
if ( sc == BLIS_RRR ) { rs_c = cs_c = -1; rs_a = cs_a = -1; rs_b = cs_b = -1; }
|
||||
else if ( sc == BLIS_RRC ) { rs_c = cs_c = -1; rs_a = cs_a = -1; rs_b = cs_b = 0; }
|
||||
else if ( sc == BLIS_RCR ) { rs_c = cs_c = -1; rs_a = cs_a = 0; rs_b = cs_b = -1; }
|
||||
else if ( sc == BLIS_RCC ) { rs_c = cs_c = -1; rs_a = cs_a = 0; rs_b = cs_b = 0; }
|
||||
else if ( sc == BLIS_CRR ) { rs_c = cs_c = 0; rs_a = cs_a = -1; rs_b = cs_b = -1; }
|
||||
else if ( sc == BLIS_CRC ) { rs_c = cs_c = 0; rs_a = cs_a = -1; rs_b = cs_b = 0; }
|
||||
else if ( sc == BLIS_CCR ) { rs_c = cs_c = 0; rs_a = cs_a = 0; rs_b = cs_b = -1; }
|
||||
else if ( sc == BLIS_CCC ) { rs_c = cs_c = 0; rs_a = cs_a = 0; rs_b = cs_b = 0; }
|
||||
else { bli_abort(); }
|
||||
|
||||
f77_int cbla_storage;
|
||||
|
||||
if ( sc == BLIS_RRR ) cbla_storage = CblasRowMajor;
|
||||
else if ( sc == BLIS_CCC ) cbla_storage = CblasColMajor;
|
||||
else cbla_storage = -1;
|
||||
|
||||
( void )cbla_storage;
|
||||
|
||||
|
||||
char dt_ch;
|
||||
|
||||
// Choose the char corresponding to the requested datatype.
|
||||
if ( bli_is_float( dt ) ) dt_ch = 's';
|
||||
else if ( bli_is_double( dt ) ) dt_ch = 'd';
|
||||
else if ( bli_is_scomplex( dt ) ) dt_ch = 'c';
|
||||
else dt_ch = 'z';
|
||||
|
||||
f77_char f77_transa;
|
||||
f77_char f77_transb;
|
||||
char transal, transbl;
|
||||
|
||||
bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
|
||||
bli_param_map_blis_to_netlib_trans( transb, &f77_transb );
|
||||
|
||||
transal = tolower( f77_transa );
|
||||
transbl = tolower( f77_transb );
|
||||
|
||||
f77_int cbla_transa = ( transal == 'n' ? CblasNoTrans : CblasTrans );
|
||||
f77_int cbla_transb = ( transbl == 'n' ? CblasNoTrans : CblasTrans );
|
||||
|
||||
( void )cbla_transa;
|
||||
( void )cbla_transb;
|
||||
|
||||
dim_t p;
|
||||
|
||||
// Begin with initializing the last entry to zero so that
|
||||
// matlab allocates space for the entire array once up-front.
|
||||
for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ;
|
||||
|
||||
printf( "data_%s_%cgemm_%c%c_%s", THR_STR, dt_ch,
|
||||
transal, transbl, STR );
|
||||
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin)/p_inc + 1,
|
||||
( unsigned long )0,
|
||||
( unsigned long )0,
|
||||
( unsigned long )0, 0.0 );
|
||||
|
||||
|
||||
//for ( p = p_begin; p <= p_max; p += p_inc )
|
||||
for ( p = p_max; p_begin <= p; p -= p_inc )
|
||||
{
|
||||
obj_t a, b, c;
|
||||
obj_t c_save;
|
||||
obj_t alpha, beta;
|
||||
dim_t m, n, k;
|
||||
|
||||
if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
|
||||
else m = ( dim_t ) m_input;
|
||||
if ( n_input < 0 ) n = p / ( dim_t )abs(n_input);
|
||||
else n = ( dim_t ) n_input;
|
||||
if ( k_input < 0 ) k = p / ( dim_t )abs(k_input);
|
||||
else k = ( dim_t ) k_input;
|
||||
|
||||
bli_obj_create( dt, 1, 1, 0, 0, &alpha );
|
||||
bli_obj_create( dt, 1, 1, 0, 0, &beta );
|
||||
|
||||
bli_obj_create( dt, m, n, rs_c, cs_c, &c );
|
||||
bli_obj_create( dt, m, n, rs_c, cs_c, &c_save );
|
||||
|
||||
if ( bli_does_notrans( transa ) )
|
||||
bli_obj_create( dt, m, k, rs_a, cs_a, &a );
|
||||
else
|
||||
bli_obj_create( dt, k, m, rs_a, cs_a, &a );
|
||||
|
||||
if ( bli_does_notrans( transb ) )
|
||||
bli_obj_create( dt, k, n, rs_b, cs_b, &b );
|
||||
else
|
||||
bli_obj_create( dt, n, k, rs_b, cs_b, &b );
|
||||
|
||||
bli_randm( &a );
|
||||
bli_randm( &b );
|
||||
bli_randm( &c );
|
||||
|
||||
bli_obj_set_conjtrans( transa, &a );
|
||||
bli_obj_set_conjtrans( transb, &b );
|
||||
|
||||
bli_setsc( (1.0/1.0), 0.0, &alpha );
|
||||
bli_setsc( (1.0/1.0), 0.0, &beta );
|
||||
|
||||
bli_copym( &c, &c_save );
|
||||
|
||||
#ifdef EIGEN
|
||||
double alpha_r, alpha_i;
|
||||
|
||||
bli_getsc( &alpha, &alpha_r, &alpha_i );
|
||||
|
||||
void* ap = bli_obj_buffer_at_off( &a );
|
||||
void* bp = bli_obj_buffer_at_off( &b );
|
||||
void* cp = bli_obj_buffer_at_off( &c );
|
||||
|
||||
const int os_a = ( bli_obj_is_col_stored( &a ) ? bli_obj_col_stride( &a )
|
||||
: bli_obj_row_stride( &a ) );
|
||||
const int os_b = ( bli_obj_is_col_stored( &b ) ? bli_obj_col_stride( &b )
|
||||
: bli_obj_row_stride( &b ) );
|
||||
const int os_c = ( bli_obj_is_col_stored( &c ) ? bli_obj_col_stride( &c )
|
||||
: bli_obj_row_stride( &c ) );
|
||||
|
||||
Stride<Dynamic,1> stride_a( os_a, 1 );
|
||||
Stride<Dynamic,1> stride_b( os_b, 1 );
|
||||
Stride<Dynamic,1> stride_c( os_c, 1 );
|
||||
|
||||
#if defined(IS_FLOAT)
|
||||
#elif defined (IS_DOUBLE)
|
||||
#ifdef A_STOR_R
|
||||
typedef Matrix<double, Dynamic, Dynamic, RowMajor> MatrixXd_A;
|
||||
#else
|
||||
typedef Matrix<double, Dynamic, Dynamic, ColMajor> MatrixXd_A;
|
||||
#endif
|
||||
#ifdef B_STOR_R
|
||||
typedef Matrix<double, Dynamic, Dynamic, RowMajor> MatrixXd_B;
|
||||
#else
|
||||
typedef Matrix<double, Dynamic, Dynamic, ColMajor> MatrixXd_B;
|
||||
#endif
|
||||
#ifdef C_STOR_R
|
||||
typedef Matrix<double, Dynamic, Dynamic, RowMajor> MatrixXd_C;
|
||||
#else
|
||||
typedef Matrix<double, Dynamic, Dynamic, ColMajor> MatrixXd_C;
|
||||
#endif
|
||||
|
||||
#ifdef A_NOTRANS // A is not transposed
|
||||
Map<MatrixXd_A, 0, Stride<Dynamic,1> > A( ( double* )ap, m, k, stride_a );
|
||||
#else // A is transposed
|
||||
Map<MatrixXd_A, 0, Stride<Dynamic,1> > A( ( double* )ap, k, m, stride_a );
|
||||
#endif
|
||||
|
||||
#ifdef B_NOTRANS // B is not transposed
|
||||
Map<MatrixXd_B, 0, Stride<Dynamic,1> > B( ( double* )bp, k, n, stride_b );
|
||||
#else // B is transposed
|
||||
Map<MatrixXd_B, 0, Stride<Dynamic,1> > B( ( double* )bp, n, k, stride_b );
|
||||
#endif
|
||||
|
||||
Map<MatrixXd_C, 0, Stride<Dynamic,1> > C( ( double* )cp, m, n, stride_c );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
double dtime_save = DBL_MAX;
|
||||
|
||||
for ( dim_t r = 0; r < n_trials; ++r )
|
||||
{
|
||||
bli_copym( &c_save, &c );
|
||||
|
||||
|
||||
double dtime = bli_clock();
|
||||
|
||||
|
||||
#ifdef EIGEN
|
||||
|
||||
#ifdef A_NOTRANS
|
||||
#ifdef B_NOTRANS
|
||||
C.noalias() += alpha_r * A * B;
|
||||
#else // B_TRANS
|
||||
C.noalias() += alpha_r * A * B.transpose();
|
||||
#endif
|
||||
#else // A_TRANS
|
||||
#ifdef B_NOTRANS
|
||||
C.noalias() += alpha_r * A.transpose() * B;
|
||||
#else // B_TRANS
|
||||
C.noalias() += alpha_r * A.transpose() * B.transpose();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#ifdef BLIS
|
||||
#ifdef SUP
|
||||
// Allow sup.
|
||||
bli_gemm( &alpha,
|
||||
&a,
|
||||
&b,
|
||||
&beta,
|
||||
&c );
|
||||
#else
|
||||
// Disable sup and use the expert interface.
|
||||
//rntm_t rntm = BLIS_RNTM_INITIALIZER;
|
||||
rntm_t rntm = rntm_g;
|
||||
bli_rntm_disable_l3_sup( &rntm );
|
||||
|
||||
bli_gemm_ex( &alpha,
|
||||
&a,
|
||||
&b,
|
||||
&beta,
|
||||
&c, NULL, &rntm );
|
||||
#endif
|
||||
#endif
|
||||
#ifdef BLAS
|
||||
if ( bli_is_float( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
float* alphap = ( float* )bli_obj_buffer( &alpha );
|
||||
float* ap = ( float* )bli_obj_buffer( &a );
|
||||
float* bp = ( float* )bli_obj_buffer( &b );
|
||||
float* betap = ( float* )bli_obj_buffer( &beta );
|
||||
float* cp = ( float* )bli_obj_buffer( &c );
|
||||
|
||||
#ifdef XSMM
|
||||
libxsmm_sgemm( &f77_transa,
|
||||
#else
|
||||
sgemm_( &f77_transa,
|
||||
#endif
|
||||
&f77_transb,
|
||||
&mm,
|
||||
&nn,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_double( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
double* alphap = ( double* )bli_obj_buffer( &alpha );
|
||||
double* ap = ( double* )bli_obj_buffer( &a );
|
||||
double* bp = ( double* )bli_obj_buffer( &b );
|
||||
double* betap = ( double* )bli_obj_buffer( &beta );
|
||||
double* cp = ( double* )bli_obj_buffer( &c );
|
||||
|
||||
#ifdef XSMM
|
||||
libxsmm_dgemm( &f77_transa,
|
||||
#else
|
||||
dgemm_( &f77_transa,
|
||||
#endif
|
||||
&f77_transb,
|
||||
&mm,
|
||||
&nn,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_scomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
scomplex* alphap = ( scomplex* )bli_obj_buffer( &alpha );
|
||||
scomplex* ap = ( scomplex* )bli_obj_buffer( &a );
|
||||
scomplex* bp = ( scomplex* )bli_obj_buffer( &b );
|
||||
scomplex* betap = ( scomplex* )bli_obj_buffer( &beta );
|
||||
scomplex* cp = ( scomplex* )bli_obj_buffer( &c );
|
||||
|
||||
#ifdef XSMM
|
||||
libxsmm_cgemm( &f77_transa,
|
||||
#else
|
||||
cgemm_( &f77_transa,
|
||||
#endif
|
||||
&f77_transb,
|
||||
&mm,
|
||||
&nn,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else if ( bli_is_dcomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
dcomplex* alphap = ( dcomplex* )bli_obj_buffer( &alpha );
|
||||
dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a );
|
||||
dcomplex* bp = ( dcomplex* )bli_obj_buffer( &b );
|
||||
dcomplex* betap = ( dcomplex* )bli_obj_buffer( &beta );
|
||||
dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c );
|
||||
|
||||
#ifdef XSMM
|
||||
libxsmm_zgemm( &f77_transa,
|
||||
#else
|
||||
zgemm_( &f77_transa,
|
||||
#endif
|
||||
&f77_transb,
|
||||
&mm,
|
||||
&nn,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
#endif
|
||||
#ifdef CBLAS
|
||||
if ( bli_is_float( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
#ifdef C_STOR_R
|
||||
f77_int lda = bli_obj_row_stride( &a );
|
||||
f77_int ldb = bli_obj_row_stride( &b );
|
||||
f77_int ldc = bli_obj_row_stride( &c );
|
||||
#else
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
#endif
|
||||
float* alphap = bli_obj_buffer( &alpha );
|
||||
float* ap = bli_obj_buffer( &a );
|
||||
float* bp = bli_obj_buffer( &b );
|
||||
float* betap = bli_obj_buffer( &beta );
|
||||
float* cp = bli_obj_buffer( &c );
|
||||
|
||||
cblas_sgemm( cbla_storage,
|
||||
cbla_transa,
|
||||
cbla_transb,
|
||||
mm,
|
||||
nn,
|
||||
kk,
|
||||
*alphap,
|
||||
ap, lda,
|
||||
bp, ldb,
|
||||
*betap,
|
||||
cp, ldc );
|
||||
}
|
||||
else if ( bli_is_double( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
#ifdef C_STOR_R
|
||||
f77_int lda = bli_obj_row_stride( &a );
|
||||
f77_int ldb = bli_obj_row_stride( &b );
|
||||
f77_int ldc = bli_obj_row_stride( &c );
|
||||
#else
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
#endif
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
double* ap = bli_obj_buffer( &a );
|
||||
double* bp = bli_obj_buffer( &b );
|
||||
double* betap = bli_obj_buffer( &beta );
|
||||
double* cp = bli_obj_buffer( &c );
|
||||
|
||||
cblas_dgemm( cbla_storage,
|
||||
cbla_transa,
|
||||
cbla_transb,
|
||||
mm,
|
||||
nn,
|
||||
kk,
|
||||
*alphap,
|
||||
ap, lda,
|
||||
bp, ldb,
|
||||
*betap,
|
||||
cp, ldc );
|
||||
}
|
||||
else if ( bli_is_scomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
#ifdef C_STOR_R
|
||||
f77_int lda = bli_obj_row_stride( &a );
|
||||
f77_int ldb = bli_obj_row_stride( &b );
|
||||
f77_int ldc = bli_obj_row_stride( &c );
|
||||
#else
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
#endif
|
||||
scomplex* alphap = bli_obj_buffer( &alpha );
|
||||
scomplex* ap = bli_obj_buffer( &a );
|
||||
scomplex* bp = bli_obj_buffer( &b );
|
||||
scomplex* betap = bli_obj_buffer( &beta );
|
||||
scomplex* cp = bli_obj_buffer( &c );
|
||||
|
||||
cblas_cgemm( cbla_storage,
|
||||
cbla_transa,
|
||||
cbla_transb,
|
||||
mm,
|
||||
nn,
|
||||
kk,
|
||||
alphap,
|
||||
ap, lda,
|
||||
bp, ldb,
|
||||
betap,
|
||||
cp, ldc );
|
||||
}
|
||||
else if ( bli_is_dcomplex( dt ) )
|
||||
{
|
||||
f77_int mm = bli_obj_length( &c );
|
||||
f77_int kk = bli_obj_width_after_trans( &a );
|
||||
f77_int nn = bli_obj_width( &c );
|
||||
#ifdef C_STOR_R
|
||||
f77_int lda = bli_obj_row_stride( &a );
|
||||
f77_int ldb = bli_obj_row_stride( &b );
|
||||
f77_int ldc = bli_obj_row_stride( &c );
|
||||
#else
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
f77_int ldb = bli_obj_col_stride( &b );
|
||||
f77_int ldc = bli_obj_col_stride( &c );
|
||||
#endif
|
||||
dcomplex* alphap = bli_obj_buffer( &alpha );
|
||||
dcomplex* ap = bli_obj_buffer( &a );
|
||||
dcomplex* bp = bli_obj_buffer( &b );
|
||||
dcomplex* betap = bli_obj_buffer( &beta );
|
||||
dcomplex* cp = bli_obj_buffer( &c );
|
||||
|
||||
cblas_zgemm( cbla_storage,
|
||||
cbla_transa,
|
||||
cbla_transb,
|
||||
mm,
|
||||
nn,
|
||||
kk,
|
||||
alphap,
|
||||
ap, lda,
|
||||
bp, ldb,
|
||||
betap,
|
||||
cp, ldc );
|
||||
}
|
||||
#endif
|
||||
|
||||
dtime_save = bli_clock_min_diff( dtime_save, dtime );
|
||||
}
|
||||
|
||||
double gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 );
|
||||
|
||||
if ( bli_is_complex( dt ) ) gflops *= 4.0;
|
||||
|
||||
printf( "data_%s_%cgemm_%c%c_%s", THR_STR, dt_ch,
|
||||
transal, transbl, STR );
|
||||
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin)/p_inc + 1,
|
||||
( unsigned long )m,
|
||||
( unsigned long )n,
|
||||
( unsigned long )k, gflops );
|
||||
|
||||
bli_obj_free( &alpha );
|
||||
bli_obj_free( &beta );
|
||||
|
||||
bli_obj_free( &a );
|
||||
bli_obj_free( &b );
|
||||
bli_obj_free( &c );
|
||||
bli_obj_free( &c_save );
|
||||
}
|
||||
|
||||
//bli_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user