mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
f3eb4639a8dbfc637c5ccd47b4249cc752b4b6cd
* change backward weight name
* start add bwd weight lib and profiler
* change tuning paramter
* change output info
* add bwd weight test
* change test info
* using conv_util
* change wgt to weight
* add }
* add fp32
[ROCm/composable_kernel commit: 781cacd2e6]
Docker script
docker run \
-it \
--privileged \
--group-add sudo \
-w /root/workspace \
-v ${PATH_TO_LOCAL_WORKSPACE}:/root/workspace \
rocm/tensorflow:rocm4.3.1-tf2.6-dev \
/bin/bash
Build
mkdir build && cd build
# Need to specify target ID, example below is gfx908 and gfx90a
cmake \
-D BUILD_DEV=OFF \
-D CMAKE_BUILD_TYPE=Release \
-D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 \
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
-D CMAKE_PREFIX_PATH=/opt/rocm \
..
Build and Run Examples
make -j examples
Instructions for running each individual examples are under example/
Tests
make -j tests
make test
Build ckProfiler
make -j ckProfiler
Instructions for running ckProfiler are under profiler/
Languages
C++
93.1%
Python
4.5%
CMake
1.5%
Shell
0.5%
Pawn
0.2%