Files
composable_kernel/include/ck/utility/workgroup_barrier.hpp
Illia Silin c24e528481 [rocm-libraries] ROCm/rocm-libraries#7760 (commit a61bc76)
[CK] suppress compiler warnings while building pytorch. (#7760)

## Motivation

Recently added compiler flags that are required to suppress false
warnings by latest staging compiler are not recognized by older compiler
versions and are triggering an avalanche of warnings. Previous attempt
to suppress them by using -Wno-unknown-warning-option flag didn't help,
because that flag wasn't recognized either and just added more warnings.
I've verified that current approach by checking the clang version
actually works as intended and makes the warnings go away.

## Technical Details

<!-- Explain the changes along with any relevant GitHub links. -->

## Test Plan

<!-- Explain any relevant testing done to verify this PR. -->

## Test Result

<!-- Briefly summarize test outcomes. -->

## Submission Checklist

- [ ] Look over the contributing guidelines at
https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
2026-05-27 06:56:58 -07:00

71 lines
1.7 KiB
C++

// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#pragma once
#include <hip/hip_runtime.h>
#include <stdint.h>
#if __clang_major__ >= 23
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
#endif
namespace ck {
struct workgroup_barrier
{
__device__ workgroup_barrier(uint32_t* ptr) : base_ptr(ptr) {}
__device__ uint32_t ld(uint32_t offset)
{
return __atomic_load_n(base_ptr + offset, __ATOMIC_RELAXED);
}
__device__ void wait_eq(uint32_t offset, uint32_t value)
{
if(threadIdx.x == 0)
{
while(ld(offset) != value) {}
}
__syncthreads();
}
__device__ void wait_lt(uint32_t offset, uint32_t value)
{
if(threadIdx.x == 0)
{
while(ld(offset) < value) {}
}
__syncthreads();
}
__device__ void wait_set(uint32_t offset, uint32_t compare, uint32_t value)
{
if(threadIdx.x == 0)
{
while(atomicCAS(base_ptr + offset, compare, value) != compare) {}
}
__syncthreads();
}
// enter critical zoon, assume buffer is zero when launch kernel
__device__ void aquire(uint32_t offset) { wait_set(offset, 0, 1); }
// exit critical zoon, assume buffer is zero when launch kernel
__device__ void release(uint32_t offset) { wait_set(offset, 1, 0); }
__device__ void inc(uint32_t offset)
{
__syncthreads();
if(threadIdx.x == 0)
{
atomicAdd(base_ptr + offset, 1);
}
}
uint32_t* base_ptr;
};
} // namespace ck
#if __clang_major__ >= 23
#pragma clang diagnostic pop
#endif