mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 21:21:22 +00:00
* Add more printing to core cktile * Revert other changes in static encoding pattern * Refactor to using a free print() function * Remove loops and print just the containers * Print tuple with better formatting, fix sequence compilation * Add some tests for print utility * Add print utility header * Print for static_encoding_pattern * add buffer_view printing * Align vector_traits * Fix formatting * Lower-case enum strings Co-authored-by: Christopher Millette <63608002+cgmillette@users.noreply.github.com> * Remove empty comment lines * Fix test with lower-case too * Reduce repeated code in print tests, move helper function closer to type definition, test X&Y * Add test_print_common.hpp * add print.hpp in core.hpp --------- Co-authored-by: Aviral Goel <aviral.goel@amd.com> Co-authored-by: Christopher Millette <63608002+cgmillette@users.noreply.github.com> Co-authored-by: Adam Osewski <19374865+aosewski@users.noreply.github.com>
26 lines
810 B
CMake
26 lines
810 B
CMake
add_subdirectory(image_to_column)
|
|
add_subdirectory(gemm)
|
|
add_subdirectory(gemm_weight_preshuffle)
|
|
add_subdirectory(batched_gemm)
|
|
add_subdirectory(grouped_gemm)
|
|
add_subdirectory(gemm_multi_d)
|
|
add_subdirectory(data_type)
|
|
add_subdirectory(container)
|
|
add_subdirectory(elementwise)
|
|
# Not including these tests as there is a bug on gfx90a and gfx942
|
|
# resulting in "GPU core dump"
|
|
#add_subdirectory(moe_smoothquant)
|
|
add_subdirectory(permute)
|
|
add_subdirectory(moe_sorting)
|
|
add_subdirectory(slice_tile)
|
|
add_subdirectory(memory_copy)
|
|
add_subdirectory(batched_transpose)
|
|
add_subdirectory(smoothquant)
|
|
add_subdirectory(topk_softmax)
|
|
add_subdirectory(add_rmsnorm2d_rdquant)
|
|
# add_subdirectory(layernorm2d)
|
|
# add_subdirectory(rmsnorm2d)
|
|
add_subdirectory(gemm_block_scale)
|
|
add_subdirectory(utility)
|
|
add_subdirectory(reduce)
|