Files
composable_kernel/test/prefetch_op/prefetch_op.cpp
Illia Silin 717f2efef7 [rocm-libraries] ROCm/rocm-libraries#6978 (commit e58096d)
[CK] add composable kernel support on gfx1250 (#6978)

## Motivation

Add composable kernel support on gfx1250.

## Technical Details

<!-- Explain the changes along with any relevant GitHub links. -->

## Test Plan

<!-- Explain any relevant testing done to verify this PR. -->

## Test Result

<!-- Briefly summarize test outcomes. -->

## Submission Checklist

- [ ] Look over the contributing guidelines at
https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.

---------

Co-authored-by: Qun Lin <qlin@amd.com>
Co-authored-by: jialuo12_amdeng <jia.luo@amd.com>
Co-authored-by: Andriy Roshchenko <andriy.roshchenko@amd.com>
Co-authored-by: hsivasun_amdeng <haresh.sivasuntharampillai@amd.com>
2026-05-15 06:46:51 -07:00

75 lines
2.4 KiB
C++

// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include "ck/ck.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "prefetch_op_util.hpp"
template <typename T, uint32_t NUM_THREADS, uint32_t NUM_SCALARS, bool IS_L1_PREFETCH>
bool run_test(bool time_kernels)
{
bool pass = true;
#if defined(__gfx125__)
const auto coherence =
IS_L1_PREFETCH ? ck::AmdBufferCoherenceEnum::CU_RT : ck::AmdBufferCoherenceEnum::SE_RT;
using global_prefetch_op = ck::GlobalPrefetchDataOp<coherence>;
using flat_prefetch_op = ck::FlatPrefetchDataOp<coherence>;
#else
using global_prefetch_op = ck::GlobalPrefetchDataOp<>;
using flat_prefetch_op = ck::FlatPrefetchDataOp<>;
#endif
const auto global_prefetch_kernel =
ck::prefetch_op_util::kernel_with_prefetch<T, NUM_THREADS, NUM_SCALARS, global_prefetch_op>;
const auto flat_prefetch_kernel = ck::prefetch_op_util::
kernel_with_prefetch_and_shared_mem<T, NUM_THREADS, NUM_SCALARS, flat_prefetch_op>;
const auto prefetch_kernel_container =
std::make_tuple(global_prefetch_kernel, flat_prefetch_kernel);
ck::static_for<0, 2, 1>{}([&](auto i) {
std::string kernel_name = (i == 1 ? "flat_prefetch" : "global_prefetch");
auto kernel = std::get<ck::Number<i>{}>(prefetch_kernel_container);
pass &=
ck::prefetch_op_util::test_prefetch_impl<decltype(kernel), T, NUM_THREADS, NUM_SCALARS>(
time_kernels, kernel, kernel_name);
});
return pass;
}
int main(int argc, char* argv[])
{
if(!ck::is_gfx125_supported())
{
std::cout << "This feature is not supported by current HW, skipping tests." << std::endl;
return 0;
}
bool time_kernels = false;
if(argc == 2)
{
time_kernels = std::stoi(argv[1]);
}
bool pass = true;
std::cout << "=== Testing L2 Global Cache Prefetch ===" << std::endl;
pass &= run_test<float, 4096, 1024, false>(time_kernels);
pass &= run_test<double, 4096, 512, false>(time_kernels);
std::cout << "=== Testing L1 Global Cache Prefetch ===" << std::endl;
pass &= run_test<float, 4096, 1024, true>(time_kernels);
pass &= run_test<double, 4096, 512, true>(time_kernels);
std::cout << "TestGlobalPrefetch ..... " << (pass ? "SUCCESS" : "FAILURE") << std::endl;
return pass ? 0 : 1;
}