mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 13:11:25 +00:00
* replace buffer_atomic with global_atomic * fixed global_atomic_add * added bf16 atomic_add * format * clang-format-12 * clean * clean * add guards * Update gtest.cmake * enabled splitk_gemm_multi_d * format * add ckProfiler * format * fixed naming * format * clean * clean * add guards * fix clang format * format * add kbatch printout * clean * Add rocm6.2 related gemm optimization * Limit bf16 atomic usage * remove redundant RCR gemm_universal instance * Add RRR fp8 gemm universal instance * Bug fix * Add GPU_TARGET guard to FP8/BF8 target * bug fix * update cmake * remove all fp8/bf8 example if arch not support * Enable fp8 RRR support in ckProfiler * limit greedy-reverse flag to gemm_universal in ckProfiler --------- Co-authored-by: Jing Zhang <jizhan@fb.com> Co-authored-by: Jing Zhang <jizhan@meta.com> Co-authored-by: zjing14 <zhangjing14@gmail.com> Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com> Co-authored-by: illsilin <Illia.Silin@amd.com>
16 lines
726 B
CMake
16 lines
726 B
CMake
if((DTYPES MATCHES "fp16") OR NOT DEFINED DTYPES)
|
|
add_executable(client_conv3d_fwd_fp16 conv3d_fwd_fp16.cpp)
|
|
target_link_libraries(client_conv3d_fwd_fp16 PRIVATE composable_kernel::device_conv_operations)
|
|
|
|
endif()
|
|
|
|
if((DTYPES MATCHES "fp8") OR (NOT DEFINED DTYPES AND GPU_TARGETS MATCHES "gfx94"))
|
|
add_executable(client_conv3d_fwd_fp16_comp_fp8 conv3d_fwd_fp16_comp_fp8.cpp)
|
|
target_link_libraries(client_conv3d_fwd_fp16_comp_fp8 PRIVATE composable_kernel::device_conv_operations)
|
|
endif()
|
|
|
|
if((DTYPES MATCHES "fp32") OR NOT DEFINED DTYPES)
|
|
add_executable(client_conv3d_fwd_fp32 conv3d_fwd_fp32.cpp)
|
|
target_link_libraries(client_conv3d_fwd_fp32 PRIVATE composable_kernel::device_conv_operations)
|
|
endif()
|