mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 17:00:18 +00:00
ac0d806650280b770bde1dac952535b34a2d4f5d
* forgot passing BatchedCount in some profiler_batched_gemm * delete default BatchCount
Docker script
docker run \
-it \
--privileged \
--group-add sudo \
-w /root/workspace \
-v ${PATH_TO_LOCAL_WORKSPACE}:/root/workspace \
rocm/tensorflow:rocm4.3.1-tf2.6-dev \
/bin/bash
Build
mkdir build && cd build
# Need to specify target ID, example below is gfx908 and gfx90a
cmake \
-D BUILD_DEV=OFF \
-D CMAKE_BUILD_TYPE=Release \
-D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 \
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
-D CMAKE_PREFIX_PATH=/opt/rocm \
..
Build and Run Examples
make -j examples
Instructions for running each individual examples are under example/
Tests
make -j tests
make test
Build ckProfiler
make -j ckProfiler
Instructions for running ckProfiler are under profiler/
Languages
C++
93.1%
Python
4.5%
CMake
1.5%
Shell
0.5%
Pawn
0.2%