mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-05 12:44:40 +00:00
* initial stub for standalone softmax
* start device_softmax_mk_to_mk as a wrapper to device_reduce_mk_to_m
* host softmax validates
* compiles; to implement beta scaling
* use NaN trick to efficiently ignore OOB values during sum of exponentials
* freeload device_reduce's utility functions
* clean up interface
* adding prior value (beta scaling)
* remove restriction related to perf considerations
* apply clang-format
* clean; disable diagnostics
* resolve conflicts
* add exp wrapper
* honor HostTensorDesc interface; allow implicit cast from different vector<T> type
* test softmax for fp16/fp32
* update readme
* amend commit NaN trick
* remove redundant param added during development
* format
* replace ScalarDataType with AccDataType
* separate out test programs by precision type
* move softmax sample code to its own folder
* format
* keep up with recent changes in reduction API
* remove extra header
[ROCm/composable_kernel commit: 15c89e81f0]
70 lines
2.7 KiB
CMake
70 lines
2.7 KiB
CMake
include_directories(BEFORE
|
|
${PROJECT_SOURCE_DIR}/
|
|
${PROJECT_SOURCE_DIR}/include/ck
|
|
${PROJECT_SOURCE_DIR}/include/ck/utility
|
|
${PROJECT_SOURCE_DIR}/include/ck/host_utility
|
|
${PROJECT_SOURCE_DIR}/include/ck/tensor_description
|
|
${PROJECT_SOURCE_DIR}/include/ck/tensor
|
|
${PROJECT_SOURCE_DIR}/include/ck/problem_transform
|
|
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/device
|
|
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/grid
|
|
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/block
|
|
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/warp
|
|
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/thread
|
|
${PROJECT_SOURCE_DIR}/include/ck/tensor_operation/gpu/element
|
|
${PROJECT_SOURCE_DIR}/library/include/ck/library/host_tensor
|
|
${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance
|
|
${PROJECT_SOURCE_DIR}/library/include/ck/library/tensor_operation_instance/gpu/reduce
|
|
${PROJECT_SOURCE_DIR}/library/include/ck/library/reference_tensor_operation/cpu
|
|
${PROJECT_SOURCE_DIR}/library/include/ck/library/reference_tensor_operation/gpu
|
|
${PROJECT_SOURCE_DIR}/library/include/ck/library/utility
|
|
${PROJECT_SOURCE_DIR}/test/include
|
|
${PROJECT_SOURCE_DIR}/profiler/include
|
|
${PROJECT_SOURCE_DIR}/external/include/half
|
|
)
|
|
|
|
include(googletest)
|
|
|
|
add_custom_target(tests)
|
|
|
|
|
|
function(add_test_executable TEST_NAME)
|
|
message("adding test ${TEST_NAME}")
|
|
add_executable(${TEST_NAME} ${ARGN})
|
|
add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}> )
|
|
add_dependencies(tests ${TEST_NAME})
|
|
add_dependencies(check ${TEST_NAME})
|
|
endfunction(add_test_executable TEST_NAME)
|
|
|
|
include(GoogleTest)
|
|
|
|
function(add_gtest_executable TEST_NAME)
|
|
message("adding gtest ${TEST_NAME}")
|
|
add_executable(${TEST_NAME} ${ARGN})
|
|
add_dependencies(tests ${TEST_NAME})
|
|
add_dependencies(check ${TEST_NAME})
|
|
# suppress gtest warnings
|
|
target_compile_options(${TEST_NAME} PRIVATE -Wno-global-constructors -Wno-undef)
|
|
target_link_libraries(${TEST_NAME} PRIVATE gtest_main)
|
|
gtest_discover_tests(${TEST_NAME})
|
|
endfunction(add_gtest_executable TEST_NAME)
|
|
|
|
|
|
add_subdirectory(magic_number_division)
|
|
add_subdirectory(space_filling_curve)
|
|
add_subdirectory(conv_util)
|
|
add_subdirectory(reference_conv_fwd)
|
|
add_subdirectory(gemm)
|
|
add_subdirectory(gemm_split_k)
|
|
add_subdirectory(gemm_reduce)
|
|
add_subdirectory(batched_gemm)
|
|
add_subdirectory(batched_gemm_reduce)
|
|
add_subdirectory(grouped_gemm)
|
|
add_subdirectory(convnd_fwd)
|
|
add_subdirectory(reduce)
|
|
add_subdirectory(conv2d_bwd_weight)
|
|
add_subdirectory(convnd_bwd_data)
|
|
add_subdirectory(block_to_ctile_map)
|
|
add_subdirectory(softmax)
|
|
# DONOT add client_app, that is tested via CI independently
|