mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
Run CI on MI100 nodes only, run daily QA on MI200 nodes. (#339)
* turn on full qa only on gfx90a, use int initialization
* change script syntax
* update script parsing clinfo, throw exception if 0 devices
* fix syntax
* try using toBoolean for the QA conditions
* run regular CI on MI100 only, use MI200 only for daily QA
* evaluate when conditions before agent
* launch QA on develop branch and update profile_reduce script
* update test script
* update script
* remove false dependency from dockerfile
* try removing rbuild completely
Co-authored-by: Chao Liu <chao.liu2@amd.com>
Co-authored-by: Chao Liu <lc.roy86@gmail.com>
[ROCm/composable_kernel commit: 984b3722bf]
This commit is contained in:
16
Dockerfile
16
Dockerfile
@@ -24,8 +24,8 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
|
||||
cmake-data=3.15.1-0kitware1 \
|
||||
cmake=3.15.1-0kitware1 \
|
||||
curl \
|
||||
g++ \
|
||||
gdb \
|
||||
# g++ \
|
||||
# gdb \
|
||||
git \
|
||||
hip-rocclr \
|
||||
jq \
|
||||
@@ -63,16 +63,16 @@ RUN wget https://github.com/Yelp/dumb-init/releases/download/v1.2.0/dumb-init_1.
|
||||
RUN dpkg -i dumb-init_*.deb && rm dumb-init_*.deb
|
||||
|
||||
# Install cget
|
||||
RUN pip install cget
|
||||
#RUN pip install cget
|
||||
|
||||
# Install rclone
|
||||
RUN pip install https://github.com/pfultz2/rclone/archive/master.tar.gz
|
||||
#RUN pip install https://github.com/pfultz2/rclone/archive/master.tar.gz
|
||||
|
||||
ARG PREFIX=/opt/rocm
|
||||
# Install dependencies
|
||||
RUN cget install pfultz2/rocm-recipes
|
||||
#RUN cget install pfultz2/rocm-recipes
|
||||
# Install rbuild
|
||||
RUN pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/6d78a0553babdaea8d2da5de15cbda7e869594b8.tar.gz
|
||||
#RUN pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/6d78a0553babdaea8d2da5de15cbda7e869594b8.tar.gz
|
||||
# Install packages for processing the performance results
|
||||
RUN pip3 install --upgrade pip
|
||||
RUN pip3 install sqlalchemy
|
||||
@@ -85,9 +85,9 @@ ENV UBSAN_OPTIONS=print_stacktrace=1
|
||||
|
||||
ENV LC_ALL=C.UTF-8
|
||||
ENV LANG=C.UTF-8
|
||||
ADD rbuild.ini /rbuild.ini
|
||||
#ADD rbuild.ini /rbuild.ini
|
||||
ADD dev-requirements.txt dev-requirements.txt
|
||||
RUN rbuild prepare -s develop -d $PREFIX
|
||||
#RUN rbuild prepare -s develop -d $PREFIX
|
||||
RUN groupadd -f render
|
||||
|
||||
# Install the new rocm-cmake version
|
||||
|
||||
57
Jenkinsfile
vendored
57
Jenkinsfile
vendored
@@ -12,8 +12,9 @@ def show_node_info() {
|
||||
}
|
||||
|
||||
def runShell(String command){
|
||||
def responseCode = sh returnStatus: true, script: "${command} &> tmp.txt"
|
||||
def responseCode = sh returnStatus: true, script: "${command} > tmp.txt"
|
||||
def output = readFile(file: "tmp.txt")
|
||||
echo "tmp.txt contents: $output"
|
||||
return (output != "")
|
||||
}
|
||||
|
||||
@@ -121,8 +122,7 @@ def buildHipClangJob(Map conf=[:]){
|
||||
timeout(time: 5, unit: 'MINUTES'){
|
||||
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
|
||||
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
|
||||
echo "GPU not found"
|
||||
throw e
|
||||
throw new Exception ("GPU not found")
|
||||
}
|
||||
else{
|
||||
echo "GPU is OK"
|
||||
@@ -140,8 +140,7 @@ def buildHipClangJob(Map conf=[:]){
|
||||
timeout(time: 5, unit: 'MINUTES'){
|
||||
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo |tee clinfo.log'
|
||||
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
|
||||
echo "GPU not found"
|
||||
throw e
|
||||
throw new Exception ("GPU not found")
|
||||
}
|
||||
else{
|
||||
echo "GPU is OK"
|
||||
@@ -153,14 +152,6 @@ def buildHipClangJob(Map conf=[:]){
|
||||
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
|
||||
timeout(time: 5, unit: 'HOURS')
|
||||
{
|
||||
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
|
||||
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
|
||||
echo "GPU not found"
|
||||
throw e
|
||||
}
|
||||
else{
|
||||
echo "GPU is OK"
|
||||
}
|
||||
cmake_build(conf)
|
||||
}
|
||||
}
|
||||
@@ -223,8 +214,7 @@ def runCKProfiler(Map conf=[:]){
|
||||
timeout(time: 5, unit: 'MINUTES'){
|
||||
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
|
||||
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
|
||||
echo "GPU not found"
|
||||
throw e
|
||||
throw new Exception ("GPU not found")
|
||||
}
|
||||
else{
|
||||
echo "GPU is OK"
|
||||
@@ -242,8 +232,7 @@ def runCKProfiler(Map conf=[:]){
|
||||
timeout(time: 5, unit: 'MINUTES'){
|
||||
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
|
||||
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
|
||||
echo "GPU not found"
|
||||
throw e
|
||||
throw new Exception ("GPU not found")
|
||||
}
|
||||
else{
|
||||
echo "GPU is OK"
|
||||
@@ -268,7 +257,7 @@ def runCKProfiler(Map conf=[:]){
|
||||
archiveArtifacts "perf_gemm_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_resnet50_N256_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_resnet50_N4_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_bathced_gemm_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_batched_gemm_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_grouped_gemm_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_fwd_conv_${gpu_arch}.log"
|
||||
archiveArtifacts "perf_bwd_conv_${gpu_arch}.log"
|
||||
@@ -278,7 +267,7 @@ def runCKProfiler(Map conf=[:]){
|
||||
stash name: "perf_gemm_${gpu_arch}.log"
|
||||
stash name: "perf_resnet50_N256_${gpu_arch}.log"
|
||||
stash name: "perf_resnet50_N4_${gpu_arch}.log"
|
||||
stash name: "perf_bathced_gemm_${gpu_arch}.log"
|
||||
stash name: "perf_batched_gemm_${gpu_arch}.log"
|
||||
stash name: "perf_grouped_gemm_${gpu_arch}.log"
|
||||
stash name: "perf_fwd_conv_${gpu_arch}.log"
|
||||
stash name: "perf_bwd_conv_${gpu_arch}.log"
|
||||
@@ -362,7 +351,7 @@ def process_results(Map conf=[:]){
|
||||
unstash "perf_gemm_${gpu_arch}.log"
|
||||
unstash "perf_resnet50_N256_${gpu_arch}.log"
|
||||
unstash "perf_resnet50_N4_${gpu_arch}.log"
|
||||
unstash "perf_bathced_gemm_${gpu_arch}.log"
|
||||
unstash "perf_batched_gemm_${gpu_arch}.log"
|
||||
unstash "perf_grouped_gemm_${gpu_arch}.log"
|
||||
unstash "perf_fwd_conv_${gpu_arch}.log"
|
||||
unstash "perf_bwd_conv_${gpu_arch}.log"
|
||||
@@ -389,13 +378,13 @@ def process_results(Map conf=[:]){
|
||||
}
|
||||
|
||||
//launch develop branch daily at 23:00 in FULL_QA mode
|
||||
//CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;USE_9110=true''' : ""
|
||||
CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;USE_9110=true''' : ""
|
||||
|
||||
pipeline {
|
||||
agent none
|
||||
//triggers {
|
||||
// cron(CRON_SETTINGS)
|
||||
//}
|
||||
triggers {
|
||||
parameterizedCron(CRON_SETTINGS)
|
||||
}
|
||||
options {
|
||||
parallelsAlwaysFailFast()
|
||||
}
|
||||
@@ -467,6 +456,10 @@ pipeline {
|
||||
}
|
||||
stage("Run Tests: gfx90a")
|
||||
{
|
||||
when {
|
||||
beforeAgent true
|
||||
expression { params.RUN_FULL_QA.toBoolean() }
|
||||
}
|
||||
agent{ label rocmnode("gfx90a")}
|
||||
environment{
|
||||
setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx90a -O3 " -DBUILD_DEV=On """
|
||||
@@ -500,6 +493,10 @@ pipeline {
|
||||
{
|
||||
stage("Run ckProfiler: gfx908")
|
||||
{
|
||||
when {
|
||||
beforeAgent true
|
||||
expression { !params.RUN_FULL_QA.toBoolean() }
|
||||
}
|
||||
agent{ label rocmnode("gfx908")}
|
||||
environment{
|
||||
setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " -DBUILD_DEV=On """
|
||||
@@ -510,6 +507,10 @@ pipeline {
|
||||
}
|
||||
stage("Run ckProfiler: gfx90a")
|
||||
{
|
||||
when {
|
||||
beforeAgent true
|
||||
expression { params.RUN_FULL_QA.toBoolean() }
|
||||
}
|
||||
agent{ label rocmnode("gfx90a")}
|
||||
environment{
|
||||
setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx90a -O3 " -DBUILD_DEV=On """
|
||||
@@ -525,12 +526,20 @@ pipeline {
|
||||
parallel
|
||||
{
|
||||
stage("Process results for gfx908"){
|
||||
when {
|
||||
beforeAgent true
|
||||
expression { !params.RUN_FULL_QA.toBoolean() }
|
||||
}
|
||||
agent { label 'mici' }
|
||||
steps{
|
||||
process_results(gpu_arch: "gfx908")
|
||||
}
|
||||
}
|
||||
stage("Process results for gfx90a"){
|
||||
when {
|
||||
beforeAgent true
|
||||
expression { params.RUN_FULL_QA.toBoolean() }
|
||||
}
|
||||
agent { label 'mici' }
|
||||
steps{
|
||||
process_results(gpu_arch: "gfx90a")
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
|
||||
make -j $1
|
||||
|
||||
DRIVER=example/$1
|
||||
VERIFY=$2
|
||||
INIT=$3
|
||||
REPEAT=$4
|
||||
|
||||
# test
|
||||
######## verify init repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
|
||||
$DRIVER $VERIFY $INIT $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT 128 256 64 1 1 1 1 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT 256 64 3 7 7 230 230 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT 128 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT 256 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
|
||||
N=$5
|
||||
|
||||
# Resnet50
|
||||
######## verify init repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 128 128 3 3 58 58 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 256 256 3 3 30 30 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 512 256 1 1 56 56 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 512 512 3 3 16 16 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 1024 512 1 1 28 28 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
@@ -1,71 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
|
||||
make -j conv_fwd_driver_offline
|
||||
#make -j conv_bwd_driver_offline
|
||||
#make -j conv_wrw_driver_offline
|
||||
|
||||
DRIVER="./host/driver_offline/conv_fwd_driver_offline"
|
||||
#DRIVER="./host/driver_offline/conv_bwd_driver_offline"
|
||||
#DRIVER="./host/driver_offline/conv_wrw_driver_offline"
|
||||
|
||||
LAYOUT=$1
|
||||
ALGO=$2
|
||||
VERIFY=$3
|
||||
INIT=$4
|
||||
LOG=$5
|
||||
REPEAT=$6
|
||||
|
||||
DESIRED_GRID_SIZE=$7
|
||||
|
||||
######### layout algo verify init log repeat N__ K___ C___ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 192 3 3 71 71 2 2 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 1024 1 7 17 17 1 1 1 1 0 3 0 3 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 14 14 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
$DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 192 3 3 35 35 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 30 30 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 16 16 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
$DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
$DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
$DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 32 256 3 3 1 1 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 32 256 1 1 1 1 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
$DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 64 1 1 2 2 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 128 1 1 2 2 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
|
||||
# Resnet50
|
||||
######### layout algo verify init log repeat N__ K___ C___ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 128 128 3 3 28 28 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 128 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 128 128 3 3 58 58 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 30 30 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 128 256 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 256 1 1 56 56 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 16 16 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 1024 512 1 1 28 28 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 128 512 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 512 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
@@ -1,20 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=1
|
||||
|
||||
make -j gemm_xdl
|
||||
|
||||
DRIVER="./example/gemm_xdl"
|
||||
|
||||
VERIFY=$1
|
||||
INIT=$2
|
||||
LOG=$3
|
||||
REPEAT=$4
|
||||
|
||||
######### verify init log repeat M___ N___ K___ StrideA StrideB StrideC
|
||||
#$DRIVER $VERIFY $INIT $LOG $REPEAT 960 1024 1024 1024 1024 1024
|
||||
#$DRIVER $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024
|
||||
#$DRIVER $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 2048 2048 2048
|
||||
$DRIVER $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 4096 4096 4096
|
||||
#$DRIVER $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 8192 8192 8192
|
||||
@@ -1,20 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
|
||||
make -j $1
|
||||
|
||||
DRIVER=example/$1
|
||||
VERIFY=$2
|
||||
INIT=$3
|
||||
REPEAT=$4
|
||||
|
||||
######## verify init repeat M___ N___ K___ StrideA StrideB StrideC StrideC1
|
||||
#$DRIVER $VERIFY $INIT $REPEAT 256 256 256 256 256 256 256
|
||||
#$DRIVER $VERIFY $INIT $REPEAT 960 1024 1024 1024 1024 1024 1024
|
||||
#$DRIVER $VERIFY $INIT $REPEAT 1920 2048 2048 2048 2048 2048 2048
|
||||
$DRIVER $VERIFY $INIT $REPEAT 3840 4096 4096 4096 4096 4096 4096
|
||||
#$DRIVER $VERIFY $INIT $REPEAT 7680 8192 8192 8192 8192 8192 8192
|
||||
#$DRIVER $VERIFY $INIT $REPEAT 1024 1024 1024 1024 1024 1024 1024
|
||||
#$DRIVER $VERIFY $INIT $REPEAT 2048 2048 2048 2048 2048 2048 2048
|
||||
@@ -1,25 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
|
||||
make -j gemm_driver_offline
|
||||
|
||||
DRIVER="./host/driver_offline/gemm_driver_offline"
|
||||
|
||||
LAYOUT=$1
|
||||
ALGO=$2
|
||||
VERIFY=$3
|
||||
INIT=$4
|
||||
LOG=$5
|
||||
REPEAT=$6
|
||||
|
||||
M01=$7
|
||||
N01=$8
|
||||
|
||||
######### layout algo verify init log repeat M___ N___ K___ M01_ N01_
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 960 1024 1024 $M01 $N01
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 $M01 $N01
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 $M01 $N01
|
||||
$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 $M01 $N01
|
||||
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 $M01 $N01
|
||||
@@ -1,46 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
|
||||
make -j $1
|
||||
|
||||
DRIVER=example/$1
|
||||
VERIFY=$2
|
||||
INIT=$3
|
||||
REPEAT=$4
|
||||
|
||||
# test
|
||||
######## verify init repeat N__ C___ Y X Hi__ Wi__ Strides LeftPads RightPads
|
||||
#$DRIVER $VERIFY $INIT $REPEAT 128 192 3 3 71 71 2 2 1 1 1 1
|
||||
#$DRIVER $VERIFY $INIT $REPEAT 128 64 1 1 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT 256 3 7 7 230 230 2 2 0 0 0 0
|
||||
$DRIVER $VERIFY $INIT $REPEAT 256 1024 14 14 14 14 1 1 0 0 0 0
|
||||
|
||||
N=$5
|
||||
|
||||
# Resnet50
|
||||
######## verify init repeat N__ C___ Y X Hi__ Wi__ Strides LeftPads RightPads
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 1024 1 1 14 14 2 2 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 1024 1 1 14 14 1 1 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 1024 1 1 14 14 1 1 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 128 3 3 28 28 1 1 1 1 1 1
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 128 1 1 28 28 1 1 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 128 3 3 58 58 2 2 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 2048 1 1 7 7 1 1 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 256 1 1 14 14 1 1 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 256 3 3 14 14 1 1 1 1 1 1
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 256 3 3 30 30 2 2 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 256 1 1 56 56 1 1 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 256 1 1 56 56 2 2 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 256 1 1 56 56 1 1 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 512 3 3 16 16 2 2 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 512 1 1 28 28 2 2 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 512 1 1 28 28 1 1 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 512 1 1 28 28 1 1 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 512 1 1 7 7 1 1 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 512 3 3 7 7 1 1 1 1 1 1
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 64 1 1 56 56 1 1 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 64 1 1 56 56 1 1 0 0 0 0
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 64 3 3 56 56 1 1 1 1 1 1
|
||||
#$DRIVER $VERIFY $INIT $REPEAT $N 3 7 7 230 230 2 2 0 0 0 0
|
||||
@@ -120,14 +120,14 @@ def parse_logfile(logfile):
|
||||
res = [x for _,x in sorted(zip(tests,tflops))]
|
||||
#sorted_kernels = [x for _,x in sorted(zip(tests,kernels))]
|
||||
test_list=list(range(1,len(tests)+1))
|
||||
#parse fwd_conv performance tests:
|
||||
elif 'fwd_conv' in logfile:
|
||||
#parse conv_fwd performance tests:
|
||||
elif 'conv_fwd' in logfile:
|
||||
for line in open(logfile):
|
||||
if 'tflops:' in line:
|
||||
lst=line.split()
|
||||
res.append(lst[1])
|
||||
#parse all other performance tests:
|
||||
elif 'resnet50' or 'batched_gemm' or 'grouped_gemm' or 'bwd_conv' or 'fusion' or 'reduction' in logfile:
|
||||
elif 'resnet50' or 'batched_gemm' or 'grouped_gemm' or 'conv_bwd_data' or 'gemm_bilinear' or 'reduction' in logfile:
|
||||
for line in open(logfile):
|
||||
if 'Best Perf' in line:
|
||||
lst=line.split()
|
||||
@@ -257,18 +257,18 @@ def main():
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_grouped_gemm_tflops"
|
||||
if 'fwd_conv' in filename:
|
||||
if 'conv_fwd' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_fwd_conv_tflops"
|
||||
if 'bwd_conv' in filename:
|
||||
table_name="ck_conv_fwd_tflops"
|
||||
if 'conv_bwd_data' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_bwd_conv_tflops"
|
||||
if 'fusion' in filename:
|
||||
table_name="ck_conv_bwd_data_tflops"
|
||||
if 'gemm_bilinear' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
table_name="ck_fusion_tflops"
|
||||
table_name="ck_gemm_bilinear_tflops"
|
||||
if 'reduction' in filename:
|
||||
for i in range(1,len(results)+1):
|
||||
testlist.append("Test%i"%i)
|
||||
|
||||
@@ -16,7 +16,7 @@ python3 process_perf_data.py perf_resnet50_N265_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_batched_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_grouped_gemm_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_fwd_conv_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_bwd_conv_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_fusion_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_conv_fwd_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_conv_bwd_data_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_gemm_bilinear_"$gpu_arch".log
|
||||
python3 process_perf_data.py perf_reduction_"$gpu_arch".log
|
||||
@@ -9,7 +9,7 @@ LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
REPEAT=$7
|
||||
TIME=$7
|
||||
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
@@ -17,28 +17,28 @@ LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
REPEAT=$7
|
||||
TIME=$7
|
||||
|
||||
######## op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 960 1024 1024 -1 -1 -1 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 -1 -1 -1 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 -1 -1 -1 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 -1 -1 -1 -1 -1 -1 2
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 1024 1024 -1 -1 -1 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1920 2048 2048 -1 -1 -1 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 3840 4096 4096 -1 -1 -1 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 7680 8192 8192 -1 -1 -1 -1 -1 -1 2
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2048 2048 2048 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4096 4096 4096 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8192 8192 8192 -1 -1 -1 2
|
||||
####### op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1024 1024 1024 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2048 2048 2048 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4096 4096 4096 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8192 8192 8192 -1 -1 -1 2
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1056 1056 1056 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2080 2080 2080 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4128 4128 4128 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8224 8224 8224 -1 -1 -1 2
|
||||
####### op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1056 1056 1056 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2080 2080 2080 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4128 4128 4128 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8224 8224 8224 -1 -1 -1 2
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1088 1088 1088 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2112 2112 2112 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4160 4160 4160 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8256 8256 8256 -1 -1 -1 2
|
||||
####### op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1088 1088 1088 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2112 2112 2112 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4160 4160 4160 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8256 8256 8256 -1 -1 -1 2
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
IN_LAYOUT=$3
|
||||
WEI_LAYOUT=$4
|
||||
OUT_LAYOUT=$5
|
||||
VERIFY=$6
|
||||
INIT=$7
|
||||
LOG=$8
|
||||
REPEAT=$9
|
||||
N=${10}
|
||||
|
||||
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
|
||||
38
script/profile_conv_bwd_data.sh
Executable file
38
script/profile_conv_bwd_data.sh
Executable file
@@ -0,0 +1,38 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
N=$8
|
||||
|
||||
# Resnet50
|
||||
######## op datatype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
38
script/profile_conv_fwd.sh
Executable file
38
script/profile_conv_fwd.sh
Executable file
@@ -0,0 +1,38 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
N=$8
|
||||
|
||||
# Resnet50
|
||||
######## op datatype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
#make -j ckProfiler
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
echo $DRIVER
|
||||
OP=$1
|
||||
@@ -11,43 +10,49 @@ LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
REPEAT=$7
|
||||
TIME=$7
|
||||
|
||||
######## op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC
|
||||
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 256 256 256 256 256 256
|
||||
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 960 1024 1024 1024 1024 1024
|
||||
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 2048 2048 2048
|
||||
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 4096 4096 4096
|
||||
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 8192 8192 8192
|
||||
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024
|
||||
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2048 2048 2048
|
||||
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 960 1024 1024 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 -1 -1 -1
|
||||
# 120 CU
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 1024 1024 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 2048 2048 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1920 1024 2048 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1920 2048 2048 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 3840 4096 4096 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 7680 8192 8192 -1 -1 -1
|
||||
|
||||
# 104 CU
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 832 1024 1024 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 832 2048 2048 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1664 1024 2048 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1664 2048 2048 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 3328 4096 4096 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 6656 8192 8192 -1 -1 -1
|
||||
|
||||
# 110 CU
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1280 1408 1024 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1280 2816 2048 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2560 1408 2048 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2560 2816 2048 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 5120 5632 4096 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 7040 8192 8192 -1 -1 -1
|
||||
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2048 2048 2048
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4096 4096 4096
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8192 8192 8192
|
||||
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1056 1056 1056
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2080 2080 2080
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4128 4128 4128
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8224 8224 8224
|
||||
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1088 1088 1088
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2112 2112 2112
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4160 4160 4160
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8256 8256 8256
|
||||
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 6656 8192 8192 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3328 4096 4096 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1664 2048 2048 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 832 1024 1024 -1 -1 -1
|
||||
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7040 8192 8192 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 5120 5632 4096 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2560 2816 2048 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1280 1408 1024 -1 -1 -1
|
||||
# testing different strides
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1024 1024 1024
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2048 2048 2048
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4096 4096 4096
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8192 8192 8192
|
||||
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1056 1056 1056
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2080 2080 2080
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4128 4128 4128
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8224 8224 8224
|
||||
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1088 1088 1088
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2112 2112 2112
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4160 4160 4160
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8256 8256 8256
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
REPEAT=$7
|
||||
|
||||
######## op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC StrideC1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 960 1024 1024 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 -1 -1 -1 -1
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC StrideC1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024 1024
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2048 2048 2048 2048
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4096 4096 4096 4096
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8192 8192 8192 8192
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC StrideC1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1056 1056 1056 1056
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2080 2080 2080 2080
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4128 4128 4128 4128
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8224 8224 8224 8224
|
||||
|
||||
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC StrideC1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1088 1088 1088 1088
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2112 2112 2112 2112
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4160 4160 4160 4160
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8256 8256 8256 8256
|
||||
@@ -9,10 +9,10 @@ LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
REPEAT=$7
|
||||
TIME=$7
|
||||
|
||||
######## op datatype layout verify init log repeat Ms______________ Ns______________ Ks_____________ StrideAs___________ StrideBs__________ StrideCs___________
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 256,512,1024,768 128,256,384,1024 128,192,256,512 1024,1025,1044,1026 1024,1024,1024,1024 1025,1024,1028,1024
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 512,768,2048,128 128,256,384,1024 128,192,256,512 1024,1025,2053,1026 1024,1024,1024,1024 1025,1024,2054,1024
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 256,512,1024,768 512,256,768,1024 128,192,256,512 1024,1045,1034,1026 1024,1024,1024,1024 1025,1063,1028,1024
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 512,768,4096,768 128,768,512,2048 128,192,256,512 1024,1027,4096,2050 1024,1024,1024,2048 1025,1024,4099,2049
|
||||
######## op datatype layout verify init log time Ms______________ Ns______________ Ks_____________ StrideAs___________ StrideBs__________ StrideCs___________
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 256,512,1024,768 128,256,384,1024 128,192,256,512 1024,1025,1044,1026 1024,1024,1024,1024 1025,1024,1028,1024
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 512,768,2048,128 128,256,384,1024 128,192,256,512 1024,1025,2053,1026 1024,1024,1024,1024 1025,1024,2054,1024
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 256,512,1024,768 512,256,768,1024 128,192,256,512 1024,1045,1034,1026 1024,1024,1024,1024 1025,1063,1028,1024
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 512,768,4096,768 128,768,512,2048 128,192,256,512 1024,1027,4096,2050 1024,1024,1024,2048 1025,1024,4099,2049
|
||||
|
||||
@@ -16,10 +16,10 @@ elif [ -n $PRECISION ] && [ "$PRECISION" = "--int8" ]; then
|
||||
fi
|
||||
|
||||
#### 0 - ADD, 5 - AVG, 7 - NORM2
|
||||
Operations="0 5 7"
|
||||
Operations="0 5"
|
||||
|
||||
#### 0 - ADD, 5 - AVG, for int8, no NORM2 supported
|
||||
if [ -n $PRECISION ] && [ "$PRECISION" = "--int8" ]; then
|
||||
if [ -n $PRECISION ] && [ "$PRECISION" = "--int8" -o "$PRECISION" = "--half" ]; then
|
||||
Operations=5
|
||||
fi
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
IN_LAYOUT=$3
|
||||
@@ -11,161 +12,58 @@ OUT_LAYOUT=$5
|
||||
VERIFY=$6
|
||||
INIT=$7
|
||||
LOG=$8
|
||||
REPEAT=$9
|
||||
N=${10}
|
||||
|
||||
# test
|
||||
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 128 256 256 3 3 30 30 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 128 256 256 3 3 28 28 2 2 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 128 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
|
||||
# Resnet50 (no duplicated layer)
|
||||
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
|
||||
# Resnet50 fusion
|
||||
####### op_________________ datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C_ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
TIME=$9
|
||||
|
||||
N=${10}
|
||||
|
||||
# Resnet50
|
||||
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 58 58 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 30 30 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 256 1 1 56 56 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 16 16 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 512 1 1 28 28 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 230 230 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
|
||||
|
||||
# SSD
|
||||
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 3 7 7 300 300 2 2 1 1 3 3 3 3
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 64 64 3 3 75 75 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 64 1 1 75 75 2 2 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 64 3 3 75 75 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 1 1 38 38 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 256 1 1 38 38 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 512 256 3 3 38 38 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 512 1 1 19 19 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 512 256 3 3 19 19 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 512 1 1 10 10 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 3 3 10 10 2 2 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 256 1 1 5 5 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 3 3 5 5 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 128 256 1 1 3 3 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 256 128 3 3 3 3 1 1 1 1 0 0 0 0
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 340 256 3 3 38 38 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 510 512 3 3 19 19 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 510 512 3 3 10 10 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 510 256 3 3 5 5 1 1 1 1 1 1 1 1
|
||||
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT 120 340 256 3 3 3 3 1 1 1 1 1 1 1 1
|
||||
######## op____________________ datatype in_layout wei_layout out_layout verify init log time N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $TIME $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
|
||||
@@ -40,82 +40,82 @@ function print_log_header(){
|
||||
#run gemm tests
|
||||
export gemm_log="perf_gemm_${gpu_arch}.log"
|
||||
print_log_header $gemm_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm 0 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
|
||||
#run resnet50 tests
|
||||
export resnet256_log="perf_resnet50_N256_${gpu_arch}.log"
|
||||
print_log_header $resnet256_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 2 0 1 256 | tee -a $resnet256_log
|
||||
export resnet4_log="perf_resnet50_N4_${gpu_arch}.log"
|
||||
print_log_header $resnet4_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 2 0 1 4 | tee -a $resnet4_log
|
||||
./profile_gemm.sh gemm 0 0 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 0 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 0 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 0 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 1 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 1 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 1 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 1 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 2 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 2 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 2 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 2 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 3 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 3 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 3 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log
|
||||
|
||||
#run batched_gemm tests
|
||||
export batched_gemm_log="perf_batched_gemm_${gpu_arch}.log"
|
||||
print_log_header $batched_gemm_log $env_type $branch $host_name
|
||||
./profile_batched_gemm.sh batched_gemm 0 0 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 1 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 2 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 3 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 0 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 1 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 2 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 3 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 0 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 1 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 2 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 3 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 0 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 1 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 2 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 3 $verify 2 0 5 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 0 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 1 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 2 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 0 3 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 0 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 1 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 2 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 1 3 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 0 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 1 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 2 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 2 3 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 0 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 1 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 2 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
./profile_batched_gemm.sh batched_gemm 3 3 $verify 1 0 1 | tee -a $batched_gemm_log
|
||||
|
||||
#run grouped_gemm tests
|
||||
export grouped_gemm_log="perf_grouped_gemm_${gpu_arch}.log"
|
||||
print_log_header $grouped_gemm_log $env_type $branch $host_name
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 0 $verify 2 0 5 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 1 $verify 2 0 5 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 2 $verify 2 0 5 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 3 $verify 2 0 5 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 0 $verify 1 0 1 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 1 $verify 1 0 1 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 2 $verify 1 0 1 | tee -a $grouped_gemm_log
|
||||
./profile_grouped_gemm.sh grouped_gemm 1 3 $verify 1 0 1 | tee -a $grouped_gemm_log
|
||||
|
||||
#run fwd_conv tests
|
||||
export fwd_conv_log="perf_fwd_conv_${gpu_arch}.log"
|
||||
print_log_header $fwd_conv_log $env_type $branch $host_name
|
||||
./profile_conv.sh conv_fwd 0 1 $verify 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
./profile_conv.sh conv_fwd 1 1 $verify 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
./profile_conv.sh conv_fwd 2 1 $verify 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
./profile_conv.sh conv_fwd 3 1 $verify 2 0 5 2 256 | tee -a $fwd_conv_log
|
||||
#run GEMM+Bilinear tests
|
||||
export gemm_bilinear_log="perf_gemm_bilinear_${gpu_arch}.log"
|
||||
print_log_header $gemm_bilinear_log $env_type $branch $host_name
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 0 $verify 1 0 1 | tee -a $gemm_bilinear_log
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 1 $verify 1 0 1 | tee -a $gemm_bilinear_log
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 2 $verify 1 0 1 | tee -a $gemm_bilinear_log
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 1 0 1 | tee -a $gemm_bilinear_log
|
||||
|
||||
#run bwd_conv tests
|
||||
export bwd_conv_log="perf_bwd_conv_${gpu_arch}.log"
|
||||
print_log_header $bwd_conv_log $env_type $branch $host_name
|
||||
./profile_conv.sh conv2d_bwd_data 0 1 1 1 $verify 2 0 5 128 | tee -a $bwd_conv_log
|
||||
./profile_conv.sh conv2d_bwd_data 1 1 1 1 $verify 2 0 5 128 | tee -a $bwd_conv_log
|
||||
./profile_conv.sh conv2d_bwd_data 2 1 1 1 $verify 2 0 5 128 | tee -a $bwd_conv_log
|
||||
./profile_conv.sh conv2d_bwd_data 3 1 1 1 $verify 2 0 5 128 | tee -a $bwd_conv_log
|
||||
#run conv_fwd tests
|
||||
export conv_fwd_log="perf_conv_fwd_${gpu_arch}.log"
|
||||
print_log_header $conv_fwd_log $env_type $branch $host_name
|
||||
./profile_conv_fwd.sh conv_fwd 0 1 $verify 1 0 1 256 | tee -a $conv_fwd_log
|
||||
./profile_conv_fwd.sh conv_fwd 1 1 $verify 1 0 1 256 | tee -a $conv_fwd_log
|
||||
./profile_conv_fwd.sh conv_fwd 2 1 $verify 1 0 1 256 | tee -a $conv_fwd_log
|
||||
./profile_conv_fwd.sh conv_fwd 3 1 $verify 1 0 1 256 | tee -a $conv_fwd_log
|
||||
|
||||
#run fusion tests
|
||||
export fusion_log="perf_fusion_${gpu_arch}.log"
|
||||
print_log_header $fusion_log $env_type $branch $host_name
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 0 $verify 2 0 1 | tee -a $fusion_log
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 1 $verify 2 0 1 | tee -a $fusion_log
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 2 $verify 2 0 1 | tee -a $fusion_log
|
||||
./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 2 0 1 | tee -a $fusion_log
|
||||
#run conv_bwd_data tests
|
||||
export conv_bwd_data_log="perf_conv_bwd_data_${gpu_arch}.log"
|
||||
print_log_header $conv_bwd_data_log $env_type $branch $host_name
|
||||
./profile_conv_bwd_data.sh conv_bwd_data 0 1 $verify 1 0 1 256 | tee -a $conv_bwd_data_log
|
||||
./profile_conv_bwd_data.sh conv_bwd_data 1 1 $verify 1 0 1 256 | tee -a $conv_bwd_data_log
|
||||
./profile_conv_bwd_data.sh conv_bwd_data 2 1 $verify 1 0 1 256 | tee -a $conv_bwd_data_log
|
||||
./profile_conv_bwd_data.sh conv_bwd_data 3 1 $verify 1 0 1 256 | tee -a $conv_bwd_data_log
|
||||
|
||||
#run resnet50 tests
|
||||
export resnet256_log="perf_resnet50_N256_${gpu_arch}.log"
|
||||
print_log_header $resnet256_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 | tee -a $resnet256_log
|
||||
export resnet4_log="perf_resnet50_N4_${gpu_arch}.log"
|
||||
print_log_header $resnet4_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 | tee -a $resnet4_log
|
||||
|
||||
#run reduction tests
|
||||
export reduction_log="perf_reduction_${gpu_arch}.log"
|
||||
|
||||
@@ -33,30 +33,31 @@ function print_log_header(){
|
||||
echo 'Environment type: ' $2 >> $1;
|
||||
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> $1;
|
||||
}
|
||||
|
||||
#run gemm tests
|
||||
export gemm_log="perf_gemm_${gpu_arch}.log"
|
||||
print_log_header $gemm_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm 0 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 0 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 1 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 2 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 3 $verify 1 0 5 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 0 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 0 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 0 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 0 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 1 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 1 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 1 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 1 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 2 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 2 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 2 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 2 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 0 3 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 1 3 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 2 3 $verify 1 0 1 | tee -a $gemm_log
|
||||
./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log
|
||||
|
||||
#run resnet50 test
|
||||
#run resnet50 tests
|
||||
export resnet256_log="perf_resnet50_N256_${gpu_arch}.log"
|
||||
print_log_header $resnet256_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 2 0 1 256 | tee -a $resnet256_log
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 | tee -a $resnet256_log
|
||||
export resnet4_log="perf_resnet50_N4_${gpu_arch}.log"
|
||||
print_log_header $resnet4_log $env_type $branch $host_name
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 2 0 1 4 | tee -a $resnet4_log
|
||||
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 | tee -a $resnet4_log
|
||||
|
||||
Reference in New Issue
Block a user