Fusion Conv+Bias+ReLU(+Add) (#62)

* fix relu

* clean up

* clean up

* adding 1x1 conv

* adding 1x1 conv

* added 1x1 conv

* refactor

* refactor

* refactor

* added profiler for conv+bias+relu+add

* clean up

* adding conv+bias+relu

* adding conv+bias+relu

* added conv+bias+relu

* Update README.md

* update cpu verification

* adding c shuffle

* update static_tensor for dealing with invalid element

* adding c shuffle

* debugging

* fix bug

* convert to fp16 before shuffle

* shuffle more than one M/NRepeat

* clean up

* remove coordinate step hack from GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v3r1

* clean up

* remove coordinate step hack from all gridwise gemm xdl

* clean up coordinate step hack

* clean up coordinate step hack

* ThreadwiseTensorSliceTransfer_v3r2 support pointwise op on both src and dst

* adding output shuffle in conv+bias+relu+add

* update

* added conv+bias+relu+add with c shuffle

* added conv+bias+relu+add with c shuffle

* fix forward_sweep bugs in threadwise copy

* clean up

* refactor

* clean up

* clean up

* added conv_c_shuffle+bias_relu

* clean up

* added conv+bias+relu+atomic_add

* clean up

* clean up

* clean up

* clean up

* clean up

* clean up

* misc fixes; add 1x1 specialization

* clean up

* delete unused device op

* clean up

* add support for odd C value
This commit is contained in:
Chao Liu
2021-12-26 08:43:42 -06:00
committed by GitHub
parent a4f24233e5
commit acbd7bd7c5
90 changed files with 13347 additions and 2639 deletions

View File

@@ -5,22 +5,42 @@
#include <stdlib.h>
#include <half.hpp>
int gemm_profiler(int, char*[]);
int conv_profiler(int, char*[]);
int profile_gemm(int, char*[]);
int profile_conv_fwd(int, char*[]);
int profile_conv_fwd_bias_relu(int, char*[]);
int profile_conv_fwd_bias_relu_add(int, char*[]);
int profile_conv_fwd_bias_relu_atomic_add(int, char*[]);
int main(int argc, char* argv[])
{
if(strcmp(argv[1], "gemm") == 0)
{
return gemm_profiler(argc, argv);
return profile_gemm(argc, argv);
}
else if(strcmp(argv[1], "conv") == 0)
else if(strcmp(argv[1], "conv_fwd") == 0)
{
return conv_profiler(argc, argv);
return profile_conv_fwd(argc, argv);
}
else if(strcmp(argv[1], "conv_fwd_bias_relu") == 0)
{
return profile_conv_fwd_bias_relu(argc, argv);
}
else if(strcmp(argv[1], "conv_fwd_bias_relu_add") == 0)
{
return profile_conv_fwd_bias_relu_add(argc, argv);
}
else if(strcmp(argv[1], "conv_fwd_bias_relu_atomic_add") == 0)
{
return profile_conv_fwd_bias_relu_atomic_add(argc, argv);
}
else
{
printf("arg1: tensor operation (gemm=GEMM, conv=Convolution)\n");
printf("arg1: tensor operation (gemm: GEMM;\n"
" conv_fwd: ForwardConvolution;\n"
" conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU)\n"
" conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add)\n"
" conv_fwd_bias_relu_atomic_add: "
"ForwardConvolution+Bias+ReLU+AtomicAdd)\n");
return 0;
}
}