mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-16 19:09:59 +00:00
* init for reduce_threadwise multi_d
* add reduce_threadwise_multi_d
* add reduce_multi_d
* clean
* start add an other splitk device op
* add reduce template parameter to SplitKBatchOffset
* add reduce c matrix
* clean up code
* change example data type to bf16
* add bf16Ai8B example
* remove reduce template parameter
* add splitk atomic status to v4
* example add multi d parameters
* device op add multi-d parameters
* add multi-d to reduce
* fix kbach=1 bug
* change B layout to col in bf16Ai8B example
* remove float adding struct
* change multi-d interface
* change file and class name
* remove multi-d of bf16Ai8B example
* change IsReduce function to IsReduceAdd
* change example layout to RRR from RCR
* according layout to set ds stride
* reset parameter layout
* add gemm universal reduce instance
* add reduce factory
* add profile_gemm_universal_reduce
* add reduce to profiler
* fix reduce instance
* fix profiler reduce compiling bug
* format
* format library instance code
* add mem instance for reduce library
* fix call instance names
* add workspace for reduce in ckProfiler
* format
* add mnpading to reduce library instance
* add fp16 instance to reduce of profiler
* change copyright time
* restore profiler cmake file
* add reduce text to instances
* add DsLayout and DsDataType to instances template parameter
* fixed gemm_reduce_multi_d
* add an example without multi_d
* Update common.hpp
* Update gtest.cmake
* Update gemm_xdl_splitk_reduce_bf16.cpp
* clean
* Update gtest.cmake
* format
* fixe api
* format
* default parameter change to RRR
* add vector_len for multi_d
* format
* Update gtest.cmake
* fix bf16A iBB elementwiseop
* add ReduceDataType
* move ReduceDataType to end position
* format
* remove googletest git method address
* fix copyright time
* update init data
---------
Co-authored-by: root <jizhan@amd.com>
Co-authored-by: letaoqin <letaoqin@amd.com>
Co-authored-by: Jing Zhang <jizhan@meta.com>
Co-authored-by: zjing14 <zhangjing14@gmail.com>
[ROCm/composable_kernel commit: c544eb4da0]
30 lines
1.6 KiB
CMake
30 lines
1.6 KiB
CMake
add_custom_target(example_splitK_gemm_xdl)
|
|
add_example_executable(example_splitK_gemm_xdl_fp32 splitK_gemm_xdl_fp32.cpp)
|
|
add_example_dependencies(example_splitK_gemm_xdl example_splitK_gemm_xdl_fp32)
|
|
|
|
add_example_executable(example_splitK_gemm_xdl_fp16 splitK_gemm_xdl_fp16.cpp)
|
|
add_example_dependencies(example_splitK_gemm_xdl example_splitK_gemm_xdl_fp16)
|
|
|
|
add_example_executable(example_splitK_gemm_xdl_fp16_fp8 splitK_gemm_xdl_fp16_fp8.cpp)
|
|
add_example_dependencies(example_splitK_gemm_xdl example_splitK_gemm_xdl_fp16_fp8)
|
|
|
|
add_example_executable(example_splitK_gemm_xdl_lds_direct_load_fp16 splitK_gemm_xdl_lds_direct_load_fp16.cpp)
|
|
add_example_dependencies(example_splitK_gemm_xdl example_splitK_gemm_xdl_lds_direct_load_fp16)
|
|
|
|
add_example_executable(example_splitK_gemm_xdl_bf16 splitK_gemm_xdl_bf16.cpp)
|
|
add_example_dependencies(example_splitK_gemm_xdl example_splitK_gemm_xdl_bf16)
|
|
|
|
add_example_executable(example_splitK_gemm_xdl_int8 splitK_gemm_xdl_int8.cpp)
|
|
add_example_dependencies(example_splitK_gemm_xdl example_splitK_gemm_xdl_int8)
|
|
|
|
if(USE_BITINT_EXTENSION_INT4)
|
|
add_example_executable(example_splitK_gemm_xdl_int4 splitK_gemm_xdl_int4.cpp)
|
|
add_example_dependencies(example_splitK_gemm_xdl example_splitK_gemm_xdl_int4)
|
|
endif()
|
|
|
|
add_example_executable(example_gemm_xdl_splitk_reduce_multi_d_fp16 gemm_xdl_splitk_reduce_multi_d_fp16.cpp)
|
|
add_example_executable(example_gemm_xdl_splitk_reduce_multi_d_bf16 gemm_xdl_splitk_reduce_multi_d_bf16.cpp)
|
|
add_example_executable(example_gemm_xdl_splitk_reduce_bf16A_i8B gemm_xdl_splitk_reduce_bf16A_i8B.cpp)
|
|
|
|
add_example_executable(example_gemm_xdl_splitk_reduce_bfp16 gemm_xdl_splitk_reduce_bf16.cpp)
|