mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-17 11:30:02 +00:00
* tuning para,
* testing on v100
* add fp16
* remove deprecated tensor descriptor
* sync with miopen
* update build script
Co-authored-by: Jing Zhang <jizhan@amd.com>
[ROCm/composable_kernel commit: 5c7cec1115]
9 lines
286 B
Bash
Executable File
9 lines
286 B
Bash
Executable File
#!/bin/bash
|
|
export KMOPTLLC="-mattr=+enable-ds128 -amdgpu-enable-global-sgpr-addr"
|
|
export KMDUMPISA=1
|
|
export KMDUMPLLVM=1
|
|
export KMDUMPDIR=$PWD
|
|
|
|
make -j $1
|
|
#/opt/rocm/hcc/bin/llvm-objdump -mcpu=gfx906 -source -line-numbers driver/dump-gfx906.isabin > driver/dump-gfx906.isabin.asm
|