mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-05 14:11:29 +00:00
* tuning para, * testing on v100 * add fp16 * remove deprecated tensor descriptor * sync with miopen * update build script Co-authored-by: Jing Zhang <jizhan@amd.com>
9 lines
286 B
Bash
Executable File
9 lines
286 B
Bash
Executable File
#!/bin/bash
|
|
export KMOPTLLC="-mattr=+enable-ds128 -amdgpu-enable-global-sgpr-addr"
|
|
export KMDUMPISA=1
|
|
export KMDUMPLLVM=1
|
|
export KMDUMPDIR=$PWD
|
|
|
|
make -j $1
|
|
#/opt/rocm/hcc/bin/llvm-objdump -mcpu=gfx906 -source -line-numbers driver/dump-gfx906.isabin > driver/dump-gfx906.isabin.asm
|