backward data (#7)

* enabled atomic add in tensor copy
* added gridwise GEMM
* added backward data conv using GEMM + atomic
* added backward data conv using GEMM, no atomic


[ROCm/composable_kernel commit: 8f5f64960e]
This commit is contained in:
Chao Liu
2019-12-03 01:16:12 -06:00
committed by GitHub
parent 809e8239e1
commit 4414e495ed
51 changed files with 3563 additions and 570 deletions

View File

@@ -4,5 +4,5 @@
export KMDUMPLLVM=1
export KMDUMPDIR=$PWD
make -j driver
make -j $1
#/opt/rocm/hcc/bin/llvm-objdump -mcpu=gfx906 -source -line-numbers driver/dump-gfx906.isabin > driver/dump-gfx906.isabin.asm

3
script/docker-cuda.sh Executable file
View File

@@ -0,0 +1,3 @@
WORKSPACE=$1
echo "workspace: " $WORKSPACE
sudo docker run -it -v $WORKSPACE:/root/workspace --group-add sudo --runtime=nvidia asroy/cuda:10.1-cudnn7-devel-ubuntu18.04-latest /bin/bash

View File

@@ -1,12 +0,0 @@
for((i=0;i<=4096;i=i+64))
do
OFFSET=$i
echo "if(offset == $OFFSET)"
echo "{"
echo " asm volatile(\"\\n \\"
echo " ds_read_b128 %0, %1 offset:$OFFSET\n \\"
echo " \""
echo " : \"=v\"(r)"
echo " : \"v\"(__to_local(lds)));"
echo "}"
done