mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-07 08:15:04 +00:00
Add atomic add float4
This commit is contained in:
@@ -65,7 +65,7 @@ using DeviceConvBwdWeightInstance =
|
||||
1, // CShuffleMRepeatPerShuffle
|
||||
1, // CShuffleNRepeatPerShuffle
|
||||
S<1, 32, 1, 4>, // CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
|
||||
2>; // CShuffleBlockTransferScalarPerVector_NPerBlock
|
||||
128 / (sizeof(WeiDataType) * CHAR_BIT)>; // CShuffleBlockTransferScalarPerVector_NPerBlock
|
||||
|
||||
template <ck::index_t NDimSpatial>
|
||||
using HostConvBwdWeightInstance = ck::tensor_operation::host::ReferenceConvBwdWeight<NDimSpatial,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
#include "data_type.hpp"
|
||||
@@ -71,6 +71,29 @@ __device__ float2_t atomic_add<float2_t>(float2_t* p_dst, const float2_t& x)
|
||||
return vy.template AsType<float2_t>()[I0];
|
||||
}
|
||||
|
||||
template <>
|
||||
__device__ float4_t atomic_add<float4_t>(float4_t* p_dst, const float4_t& x)
|
||||
{
|
||||
constexpr auto I0 = Number<0>{};
|
||||
constexpr auto I1 = Number<1>{};
|
||||
constexpr auto I2 = Number<2>{};
|
||||
constexpr auto I3 = Number<3>{};
|
||||
|
||||
const vector_type<float, 4> vx{x};
|
||||
vector_type<float, 4> vy{0};
|
||||
|
||||
vy.template AsType<float>()(I0) =
|
||||
atomicAdd(c_style_pointer_cast<float*>(p_dst), vx.template AsType<float>()[I0]);
|
||||
vy.template AsType<float>()(I1) =
|
||||
atomicAdd(c_style_pointer_cast<float*>(p_dst) + 1, vx.template AsType<float>()[I1]);
|
||||
vy.template AsType<float>()(I2) =
|
||||
atomicAdd(c_style_pointer_cast<float*>(p_dst) + 2, vx.template AsType<float>()[I2]);
|
||||
vy.template AsType<float>()(I3) =
|
||||
atomicAdd(c_style_pointer_cast<float*>(p_dst) + 3, vx.template AsType<float>()[I3]);
|
||||
|
||||
return vy.template AsType<float4_t>()[I0];
|
||||
}
|
||||
|
||||
template <>
|
||||
__device__ double2_t atomic_add<double2_t>(double2_t* p_dst, const double2_t& x)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user