Files
composable_kernel/example/15_grouped_gemm/CMakeLists.txt
Adam Osewski b4032629e5 Grouped GEMM Multiple D tile loop. (#1247)
* Overload output stream operator for LoopScheduler and PiplineVersion

* Add Run overload accepting grid descriptors MK.

* Add __device__ keyword for CalculateGridSize

* Create device op GroupedGemmMultipleD

* Add GroupedGemm MultipleD Tile Loop implementation.

* Add an example for GroupedGemm MultipleD tile loop.

* Device Op GroupedGEMMTileLoop.

* Bunch of small changes in exmaple.

* CkProfiler

* Remove unused tparam.

* Fix include statement.

* Fix output stream overloads.

* Do not make descriptors and check validity untill we find group.

* Fix gemm desc initialization.

* Revert device op

* Fix compilation for DTYPES=FP16

* Validate tensor transfers paramters.

* Validate on host only NK dims if M is not known.

* Fix bug.

* A convenient debug func for selecting threads.

* Fix has main k block loop bug.

* Make sure that b2c has up to date tile offset.

* Output stream operator for Sequence type.

* Cmake file formatting.
2024-04-25 15:12:53 -05:00

36 lines
2.0 KiB
CMake

add_custom_target(example_grouped_gemm_xdl)
add_example_executable(example_grouped_gemm_xdl_fp32 grouped_gemm_xdl_fp32.cpp)
add_example_dependencies(example_grouped_gemm_xdl example_grouped_gemm_xdl_fp32)
add_example_executable(example_grouped_gemm_xdl_fp16 grouped_gemm_xdl_fp16.cpp)
add_example_dependencies(example_grouped_gemm_xdl example_grouped_gemm_xdl_fp16)
add_example_executable(example_grouped_gemm_multiple_d_dl_fp16 grouped_gemm_multiple_d_dl_fp16.cpp)
add_example_dependencies(example_grouped_gemm_xdl example_grouped_gemm_multiple_d_dl_fp16)
add_example_executable(example_grouped_gemm_xdl_splitk_fp16 grouped_gemm_xdl_splitk_fp16.cpp)
add_example_dependencies(example_grouped_gemm_xdl example_grouped_gemm_xdl_splitk_fp16)
add_example_executable(example_grouped_gemm_xdl_fixed_nk_fp16 grouped_gemm_xdl_fixed_nk_fp16.cpp)
add_example_dependencies(example_grouped_gemm_xdl example_grouped_gemm_xdl_fixed_nk_fp16)
add_example_executable(example_grouped_gemm_xdl_fixed_nk_bias_fp16 grouped_gemm_xdl_fixed_nk_bias_fp16.cpp)
add_example_dependencies(example_grouped_gemm_xdl example_grouped_gemm_xdl_fixed_nk_bias_fp16)
add_example_executable(example_grouped_gemm_xdl_bf16 grouped_gemm_xdl_bf16.cpp)
add_example_dependencies(example_grouped_gemm_xdl example_grouped_gemm_xdl_bf16)
add_example_executable(example_grouped_gemm_xdl_int8 grouped_gemm_xdl_int8.cpp)
add_example_dependencies(example_grouped_gemm_xdl example_grouped_gemm_xdl_int8)
add_example_executable(example_grouped_gemm_xdl_fixed_nk_fp16_fp8 grouped_gemm_xdl_fixed_nk_fp16_fp8.cpp)
add_example_dependencies(example_grouped_gemm_xdl example_grouped_gemm_xdl_fixed_nk_fp16_fp8)
add_example_executable(example_grouped_gemm_multiple_d_xdl_fp16 grouped_gemm_multiple_d_xdl_fp16.cpp)
add_example_dependencies(example_grouped_gemm_xdl example_grouped_gemm_multiple_d_xdl_fp16)
if(USE_BITINT_EXTENSION_INT4)
add_example_executable(example_grouped_gemm_xdl_int4 grouped_gemm_xdl_int4.cpp)
add_example_dependencies(example_grouped_gemm_xdl example_grouped_gemm_xdl_int4)
endif()