mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-16 02:54:21 +00:00
backward data (#7)
* enabled atomic add in tensor copy
* added gridwise GEMM
* added backward data conv using GEMM + atomic
* added backward data conv using GEMM, no atomic
[ROCm/composable_kernel commit: 8f5f64960e]
This commit is contained in:
@@ -4,5 +4,5 @@
|
||||
export KMDUMPLLVM=1
|
||||
export KMDUMPDIR=$PWD
|
||||
|
||||
make -j driver
|
||||
make -j $1
|
||||
#/opt/rocm/hcc/bin/llvm-objdump -mcpu=gfx906 -source -line-numbers driver/dump-gfx906.isabin > driver/dump-gfx906.isabin.asm
|
||||
|
||||
3
script/docker-cuda.sh
Executable file
3
script/docker-cuda.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
WORKSPACE=$1
|
||||
echo "workspace: " $WORKSPACE
|
||||
sudo docker run -it -v $WORKSPACE:/root/workspace --group-add sudo --runtime=nvidia asroy/cuda:10.1-cudnn7-devel-ubuntu18.04-latest /bin/bash
|
||||
@@ -1,12 +0,0 @@
|
||||
for((i=0;i<=4096;i=i+64))
|
||||
do
|
||||
OFFSET=$i
|
||||
echo "if(offset == $OFFSET)"
|
||||
echo "{"
|
||||
echo " asm volatile(\"\\n \\"
|
||||
echo " ds_read_b128 %0, %1 offset:$OFFSET\n \\"
|
||||
echo " \""
|
||||
echo " : \"=v\"(r)"
|
||||
echo " : \"v\"(__to_local(lds)));"
|
||||
echo "}"
|
||||
done
|
||||
Reference in New Issue
Block a user