mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-10 16:28:38 +00:00
[CK] add composable kernel support on gfx1250 (#6978) ## Motivation Add composable kernel support on gfx1250. ## Technical Details <!-- Explain the changes along with any relevant GitHub links. --> ## Test Plan <!-- Explain any relevant testing done to verify this PR. --> ## Test Result <!-- Briefly summarize test outcomes. --> ## Submission Checklist - [ ] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests. --------- Co-authored-by: Qun Lin <qlin@amd.com> Co-authored-by: jialuo12_amdeng <jia.luo@amd.com> Co-authored-by: Andriy Roshchenko <andriy.roshchenko@amd.com> Co-authored-by: hsivasun_amdeng <haresh.sivasuntharampillai@amd.com>
75 lines
2.4 KiB
C++
75 lines
2.4 KiB
C++
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
#include "ck/ck.hpp"
|
|
#include "ck/host_utility/device_prop.hpp"
|
|
|
|
#include "prefetch_op_util.hpp"
|
|
|
|
template <typename T, uint32_t NUM_THREADS, uint32_t NUM_SCALARS, bool IS_L1_PREFETCH>
|
|
bool run_test(bool time_kernels)
|
|
{
|
|
bool pass = true;
|
|
|
|
#if defined(__gfx125__)
|
|
const auto coherence =
|
|
IS_L1_PREFETCH ? ck::AmdBufferCoherenceEnum::CU_RT : ck::AmdBufferCoherenceEnum::SE_RT;
|
|
using global_prefetch_op = ck::GlobalPrefetchDataOp<coherence>;
|
|
using flat_prefetch_op = ck::FlatPrefetchDataOp<coherence>;
|
|
#else
|
|
using global_prefetch_op = ck::GlobalPrefetchDataOp<>;
|
|
using flat_prefetch_op = ck::FlatPrefetchDataOp<>;
|
|
#endif
|
|
|
|
const auto global_prefetch_kernel =
|
|
ck::prefetch_op_util::kernel_with_prefetch<T, NUM_THREADS, NUM_SCALARS, global_prefetch_op>;
|
|
const auto flat_prefetch_kernel = ck::prefetch_op_util::
|
|
kernel_with_prefetch_and_shared_mem<T, NUM_THREADS, NUM_SCALARS, flat_prefetch_op>;
|
|
|
|
const auto prefetch_kernel_container =
|
|
std::make_tuple(global_prefetch_kernel, flat_prefetch_kernel);
|
|
|
|
ck::static_for<0, 2, 1>{}([&](auto i) {
|
|
std::string kernel_name = (i == 1 ? "flat_prefetch" : "global_prefetch");
|
|
|
|
auto kernel = std::get<ck::Number<i>{}>(prefetch_kernel_container);
|
|
|
|
pass &=
|
|
ck::prefetch_op_util::test_prefetch_impl<decltype(kernel), T, NUM_THREADS, NUM_SCALARS>(
|
|
time_kernels, kernel, kernel_name);
|
|
});
|
|
|
|
return pass;
|
|
}
|
|
|
|
int main(int argc, char* argv[])
|
|
{
|
|
if(!ck::is_gfx125_supported())
|
|
{
|
|
std::cout << "This feature is not supported by current HW, skipping tests." << std::endl;
|
|
return 0;
|
|
}
|
|
|
|
bool time_kernels = false;
|
|
|
|
if(argc == 2)
|
|
{
|
|
time_kernels = std::stoi(argv[1]);
|
|
}
|
|
|
|
bool pass = true;
|
|
|
|
std::cout << "=== Testing L2 Global Cache Prefetch ===" << std::endl;
|
|
|
|
pass &= run_test<float, 4096, 1024, false>(time_kernels);
|
|
pass &= run_test<double, 4096, 512, false>(time_kernels);
|
|
|
|
std::cout << "=== Testing L1 Global Cache Prefetch ===" << std::endl;
|
|
|
|
pass &= run_test<float, 4096, 1024, true>(time_kernels);
|
|
pass &= run_test<double, 4096, 512, true>(time_kernels);
|
|
|
|
std::cout << "TestGlobalPrefetch ..... " << (pass ? "SUCCESS" : "FAILURE") << std::endl;
|
|
return pass ? 0 : 1;
|
|
}
|