mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
* enabled atomic add in tensor copy
* added gridwise GEMM
* added backward data conv using GEMM + atomic
* added backward data conv using GEMM, no atomic
[ROCm/composable_kernel commit: 8f5f64960e]
9 lines
286 B
Bash
Executable File
9 lines
286 B
Bash
Executable File
#!/bin/bash
|
|
export KMOPTLLC="-mattr=+enable-ds128 -amdgpu-enable-global-sgpr-addr"
|
|
export KMDUMPISA=1
|
|
export KMDUMPLLVM=1
|
|
export KMDUMPDIR=$PWD
|
|
|
|
make -j $1
|
|
#/opt/rocm/hcc/bin/llvm-objdump -mcpu=gfx906 -source -line-numbers driver/dump-gfx906.isabin > driver/dump-gfx906.isabin.asm
|