mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-29 11:16:59 +00:00
[CK] suppress compiler warnings while building pytorch. (#7760) ## Motivation Recently added compiler flags that are required to suppress false warnings by latest staging compiler are not recognized by older compiler versions and are triggering an avalanche of warnings. Previous attempt to suppress them by using -Wno-unknown-warning-option flag didn't help, because that flag wasn't recognized either and just added more warnings. I've verified that current approach by checking the clang version actually works as intended and makes the warnings go away. ## Technical Details <!-- Explain the changes along with any relevant GitHub links. --> ## Test Plan <!-- Explain any relevant testing done to verify this PR. --> ## Test Result <!-- Briefly summarize test outcomes. --> ## Submission Checklist - [ ] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
71 lines
1.7 KiB
C++
71 lines
1.7 KiB
C++
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
#pragma once
|
|
#include <hip/hip_runtime.h>
|
|
#include <stdint.h>
|
|
|
|
#if __clang_major__ >= 23
|
|
#pragma clang diagnostic push
|
|
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
|
#endif
|
|
namespace ck {
|
|
struct workgroup_barrier
|
|
{
|
|
__device__ workgroup_barrier(uint32_t* ptr) : base_ptr(ptr) {}
|
|
|
|
__device__ uint32_t ld(uint32_t offset)
|
|
{
|
|
return __atomic_load_n(base_ptr + offset, __ATOMIC_RELAXED);
|
|
}
|
|
|
|
__device__ void wait_eq(uint32_t offset, uint32_t value)
|
|
{
|
|
if(threadIdx.x == 0)
|
|
{
|
|
while(ld(offset) != value) {}
|
|
}
|
|
__syncthreads();
|
|
}
|
|
|
|
__device__ void wait_lt(uint32_t offset, uint32_t value)
|
|
{
|
|
if(threadIdx.x == 0)
|
|
{
|
|
while(ld(offset) < value) {}
|
|
}
|
|
__syncthreads();
|
|
}
|
|
|
|
__device__ void wait_set(uint32_t offset, uint32_t compare, uint32_t value)
|
|
{
|
|
if(threadIdx.x == 0)
|
|
{
|
|
while(atomicCAS(base_ptr + offset, compare, value) != compare) {}
|
|
}
|
|
__syncthreads();
|
|
}
|
|
|
|
// enter critical zoon, assume buffer is zero when launch kernel
|
|
__device__ void aquire(uint32_t offset) { wait_set(offset, 0, 1); }
|
|
|
|
// exit critical zoon, assume buffer is zero when launch kernel
|
|
__device__ void release(uint32_t offset) { wait_set(offset, 1, 0); }
|
|
|
|
__device__ void inc(uint32_t offset)
|
|
{
|
|
__syncthreads();
|
|
if(threadIdx.x == 0)
|
|
{
|
|
atomicAdd(base_ptr + offset, 1);
|
|
}
|
|
}
|
|
|
|
uint32_t* base_ptr;
|
|
};
|
|
} // namespace ck
|
|
|
|
#if __clang_major__ >= 23
|
|
#pragma clang diagnostic pop
|
|
#endif
|