mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-10 16:28:38 +00:00
[CK] add composable kernel support on gfx1250 (#6978) ## Motivation Add composable kernel support on gfx1250. ## Technical Details <!-- Explain the changes along with any relevant GitHub links. --> ## Test Plan <!-- Explain any relevant testing done to verify this PR. --> ## Test Result <!-- Briefly summarize test outcomes. --> ## Submission Checklist - [ ] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests. --------- Co-authored-by: Qun Lin <qlin@amd.com> Co-authored-by: jialuo12_amdeng <jia.luo@amd.com> Co-authored-by: Andriy Roshchenko <andriy.roshchenko@amd.com> Co-authored-by: hsivasun_amdeng <haresh.sivasuntharampillai@amd.com>
34 lines
939 B
C++
34 lines
939 B
C++
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
#pragma once
|
|
|
|
#include "ck/utility/amd_buffer_coherence.hpp"
|
|
|
|
namespace ck {
|
|
|
|
template <AmdBufferCoherenceEnum Coherence_ = AmdBufferCoherenceEnum::DefaultCoherence>
|
|
struct GlobalPrefetchDataOp
|
|
{
|
|
// addr needs to point to global memory!
|
|
__device__ __forceinline__ void operator()([[maybe_unused]] const void* addr) const
|
|
{
|
|
#if defined(__gfx125__)
|
|
__builtin_amdgcn_global_prefetch(addr, static_cast<index_t>(Coherence_));
|
|
#endif
|
|
}
|
|
};
|
|
|
|
template <AmdBufferCoherenceEnum Coherence_ = AmdBufferCoherenceEnum::DefaultCoherence>
|
|
struct FlatPrefetchDataOp
|
|
{
|
|
__device__ __forceinline__ void operator()([[maybe_unused]] const void* addr) const
|
|
{
|
|
#if defined(__gfx125__)
|
|
__builtin_amdgcn_flat_prefetch(addr, static_cast<index_t>(Coherence_));
|
|
#endif
|
|
}
|
|
};
|
|
|
|
} // namespace ck
|