mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 05:01:25 +00:00
* init for reduce_threadwise multi_d * add reduce_threadwise_multi_d * add reduce_multi_d * clean * start add an other splitk device op * add reduce template parameter to SplitKBatchOffset * add reduce c matrix * clean up code * change example data type to bf16 * add bf16Ai8B example * remove reduce template parameter * add splitk atomic status to v4 * example add multi d parameters * device op add multi-d parameters * add multi-d to reduce * fix kbach=1 bug * change B layout to col in bf16Ai8B example * remove float adding struct * change multi-d interface * change file and class name * remove multi-d of bf16Ai8B example * change IsReduce function to IsReduceAdd * change example layout to RRR from RCR * according layout to set ds stride * reset parameter layout * add gemm universal reduce instance * add reduce factory * add profile_gemm_universal_reduce * add reduce to profiler * fix reduce instance * fix profiler reduce compiling bug * format * format library instance code * add mem instance for reduce library * fix call instance names * add workspace for reduce in ckProfiler * format * add mnpading to reduce library instance * add fp16 instance to reduce of profiler * change copyright time * restore profiler cmake file * add reduce text to instances * add DsLayout and DsDataType to instances template parameter * fixed gemm_reduce_multi_d * add an example without multi_d * Update common.hpp * Update gtest.cmake * Update gemm_xdl_splitk_reduce_bf16.cpp * clean * Update gtest.cmake * format * fixe api * format * default parameter change to RRR * add vector_len for multi_d * format * Update gtest.cmake * fix bf16A iBB elementwiseop * add ReduceDataType * move ReduceDataType to end position * format * remove googletest git method address * fix copyright time * update init data --------- Co-authored-by: root <jizhan@amd.com> Co-authored-by: letaoqin <letaoqin@amd.com> Co-authored-by: Jing Zhang <jizhan@meta.com> Co-authored-by: zjing14 <zhangjing14@gmail.com>
30 lines
1.6 KiB
CMake
30 lines
1.6 KiB
CMake
add_custom_target(example_splitK_gemm_xdl)
|
|
add_example_executable(example_splitK_gemm_xdl_fp32 splitK_gemm_xdl_fp32.cpp)
|
|
add_example_dependencies(example_splitK_gemm_xdl example_splitK_gemm_xdl_fp32)
|
|
|
|
add_example_executable(example_splitK_gemm_xdl_fp16 splitK_gemm_xdl_fp16.cpp)
|
|
add_example_dependencies(example_splitK_gemm_xdl example_splitK_gemm_xdl_fp16)
|
|
|
|
add_example_executable(example_splitK_gemm_xdl_fp16_fp8 splitK_gemm_xdl_fp16_fp8.cpp)
|
|
add_example_dependencies(example_splitK_gemm_xdl example_splitK_gemm_xdl_fp16_fp8)
|
|
|
|
add_example_executable(example_splitK_gemm_xdl_lds_direct_load_fp16 splitK_gemm_xdl_lds_direct_load_fp16.cpp)
|
|
add_example_dependencies(example_splitK_gemm_xdl example_splitK_gemm_xdl_lds_direct_load_fp16)
|
|
|
|
add_example_executable(example_splitK_gemm_xdl_bf16 splitK_gemm_xdl_bf16.cpp)
|
|
add_example_dependencies(example_splitK_gemm_xdl example_splitK_gemm_xdl_bf16)
|
|
|
|
add_example_executable(example_splitK_gemm_xdl_int8 splitK_gemm_xdl_int8.cpp)
|
|
add_example_dependencies(example_splitK_gemm_xdl example_splitK_gemm_xdl_int8)
|
|
|
|
if(USE_BITINT_EXTENSION_INT4)
|
|
add_example_executable(example_splitK_gemm_xdl_int4 splitK_gemm_xdl_int4.cpp)
|
|
add_example_dependencies(example_splitK_gemm_xdl example_splitK_gemm_xdl_int4)
|
|
endif()
|
|
|
|
add_example_executable(example_gemm_xdl_splitk_reduce_multi_d_fp16 gemm_xdl_splitk_reduce_multi_d_fp16.cpp)
|
|
add_example_executable(example_gemm_xdl_splitk_reduce_multi_d_bf16 gemm_xdl_splitk_reduce_multi_d_bf16.cpp)
|
|
add_example_executable(example_gemm_xdl_splitk_reduce_bf16A_i8B gemm_xdl_splitk_reduce_bf16A_i8B.cpp)
|
|
|
|
add_example_executable(example_gemm_xdl_splitk_reduce_bfp16 gemm_xdl_splitk_reduce_bf16.cpp)
|