mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 17:00:18 +00:00
tidy
This commit is contained in:
@@ -80,16 +80,16 @@ __global__ void
|
||||
// first cast void CONSTANT void* to void*
|
||||
// second cast void* to Desc*
|
||||
// the copy constructor of tensor descriptor doesn't take address_space(4)
|
||||
const auto a_k_m0_m1_grid_desc =
|
||||
*reinterpret_cast<const AKM0M1GridDesc*>((const void*)p_a_k_m0_m1_grid_desc);
|
||||
const auto b_k_n0_n1_grid_desc =
|
||||
*reinterpret_cast<const BKN0N1GridDesc*>((const void*)p_b_k_n0_n1_grid_desc);
|
||||
const auto a_k_m0_m1_grid_desc = *reinterpret_cast<const AKM0M1GridDesc*>(
|
||||
cast_pointer_to_generic_address_space(p_a_k_m0_m1_grid_desc));
|
||||
const auto b_k_n0_n1_grid_desc = *reinterpret_cast<const BKN0N1GridDesc*>(
|
||||
cast_pointer_to_generic_address_space(p_b_k_n0_n1_grid_desc));
|
||||
const auto c_m0_m10_m11_n0_n10_n11_grid_desc =
|
||||
*reinterpret_cast<const CM0M10M11N0N10N11GridDesc*>(
|
||||
(const void*)p_c_m0_m10_m11_n0_n10_n11_grid_desc);
|
||||
cast_pointer_to_generic_address_space(p_c_m0_m10_m11_n0_n10_n11_grid_desc));
|
||||
const auto c_blockid_to_m0_n0_block_cluster_adaptor =
|
||||
*reinterpret_cast<const CBlockIdToM0N0BlockClusterAdaptor*>(
|
||||
(const void*)p_c_blockid_to_m0_n0_block_cluster_adaptor);
|
||||
cast_pointer_to_generic_address_space(p_c_blockid_to_m0_n0_block_cluster_adaptor));
|
||||
|
||||
constexpr index_t shared_block_size =
|
||||
GridwiseGemm::GetSharedMemoryNumberOfByte() / sizeof(FloatAB);
|
||||
|
||||
@@ -80,16 +80,16 @@ __global__ void
|
||||
// first cast void CONSTANT void* to void*
|
||||
// second cast void* to Desc*
|
||||
// the copy constructor of tensor descriptor doesn't take address_space(4)
|
||||
const auto a_k0_m0_m1_k1_grid_desc =
|
||||
*reinterpret_cast<const AK0M0M1K1GridDesc*>((const void*)p_a_k0_m0_m1_k1_grid_desc);
|
||||
const auto b_k0_n0_n1_k1_grid_desc =
|
||||
*reinterpret_cast<const BK0N0N1K1GridDesc*>((const void*)p_b_k0_n0_n1_k1_grid_desc);
|
||||
const auto a_k0_m0_m1_k1_grid_desc = *reinterpret_cast<const AK0M0M1K1GridDesc*>(
|
||||
cast_pointer_to_generic_address_space(p_a_k0_m0_m1_k1_grid_desc));
|
||||
const auto b_k0_n0_n1_k1_grid_desc = *reinterpret_cast<const BK0N0N1K1GridDesc*>(
|
||||
cast_pointer_to_generic_address_space(p_b_k0_n0_n1_k1_grid_desc));
|
||||
const auto c_m0_m10_m11_n0_n10_n11_grid_desc =
|
||||
*reinterpret_cast<const CM0M10M11N0N10N11GridDesc*>(
|
||||
(const void*)p_c_m0_m10_m11_n0_n10_n11_grid_desc);
|
||||
cast_pointer_to_generic_address_space(p_c_m0_m10_m11_n0_n10_n11_grid_desc));
|
||||
const auto c_blockid_to_m0_n0_block_cluster_adaptor =
|
||||
*reinterpret_cast<const CBlockIdToM0N0BlockClusterAdaptor*>(
|
||||
(const void*)p_c_blockid_to_m0_n0_block_cluster_adaptor);
|
||||
cast_pointer_to_generic_address_space(p_c_blockid_to_m0_n0_block_cluster_adaptor));
|
||||
|
||||
constexpr index_t shared_block_size =
|
||||
GridwiseGemm::GetSharedMemoryNumberOfByte() / sizeof(FloatAB);
|
||||
|
||||
@@ -69,14 +69,14 @@ __global__ void
|
||||
constexpr index_t shared_block_size =
|
||||
GridwiseGemm::GetSharedMemoryNumberOfByte() / sizeof(FloatAB);
|
||||
|
||||
const auto a_k0_m_k1_grid_desc =
|
||||
*reinterpret_cast<const AK0MK1GridDesc*>((const void*)p_a_k0_m_k1_grid_desc);
|
||||
const auto b_k0_n_k1_grid_desc =
|
||||
*reinterpret_cast<const BK0NK1GridDesc*>((const void*)p_b_k0_n_k1_grid_desc);
|
||||
const auto c_m0_m1_m2_n_grid_desc =
|
||||
*reinterpret_cast<const CM0M1M2NGridDesc*>((const void*)p_c_m0_m1_m2_n_grid_desc);
|
||||
const auto c_block_cluster_adaptor =
|
||||
*reinterpret_cast<const CBlockClusterAdaptor*>((const void*)p_c_block_cluster_adaptor);
|
||||
const auto a_k0_m_k1_grid_desc = *reinterpret_cast<const AK0MK1GridDesc*>(
|
||||
cast_pointer_to_generic_address_space(p_a_k0_m_k1_grid_desc));
|
||||
const auto b_k0_n_k1_grid_desc = *reinterpret_cast<const BK0NK1GridDesc*>(
|
||||
cast_pointer_to_generic_address_space(p_b_k0_n_k1_grid_desc));
|
||||
const auto c_m0_m1_m2_n_grid_desc = *reinterpret_cast<const CM0M1M2NGridDesc*>(
|
||||
cast_pointer_to_generic_address_space(p_c_m0_m1_m2_n_grid_desc));
|
||||
const auto c_block_cluster_adaptor = *reinterpret_cast<const CBlockClusterAdaptor*>(
|
||||
cast_pointer_to_generic_address_space(p_c_block_cluster_adaptor));
|
||||
|
||||
__shared__ FloatAB p_shared_block[shared_block_size];
|
||||
|
||||
|
||||
25
composable_kernel/include/utility/amd_address_space.hpp
Normal file
25
composable_kernel/include/utility/amd_address_space.hpp
Normal file
@@ -0,0 +1,25 @@
|
||||
#ifndef CK_AMD_ADDRESS_SPACE_HPP
|
||||
#define CK_AMD_ADDRESS_SPACE_HPP
|
||||
|
||||
#include "config.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
enum AddressSpaceEnum_t
|
||||
{
|
||||
Generic,
|
||||
Global,
|
||||
Lds,
|
||||
Sgpr,
|
||||
Vgpr,
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__device__ T* cast_pointer_to_generic_address_space(T CONSTANT* p)
|
||||
{
|
||||
return (T*)p;
|
||||
}
|
||||
|
||||
} // namespace ck
|
||||
|
||||
#endif
|
||||
@@ -1,12 +1,12 @@
|
||||
#ifndef CK_AMD_BUFFER_ADDRESSING_V2_HPP
|
||||
#define CK_AMD_BUFFER_ADDRESSING_V2_HPP
|
||||
#ifndef CK_AMD_BUFFER_ADDRESSING_HPP
|
||||
#define CK_AMD_BUFFER_ADDRESSING_HPP
|
||||
|
||||
#include "data_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
template <typename T>
|
||||
union BufferResource_v2
|
||||
union BufferResource
|
||||
{
|
||||
// 128 bit SGPRs to supply buffer resource in buffer instructions
|
||||
// https://rocm-documentation.readthedocs.io/en/latest/GCN_ISA_Manuals/testdocbook.html#vector-memory-buffer-instructions
|
||||
@@ -19,7 +19,7 @@ union BufferResource_v2
|
||||
template <typename T>
|
||||
__device__ int32x4_t make_wave_buffer_resource(T* p_wave, index_t data_space_size)
|
||||
{
|
||||
BufferResource_v2<T> wave_buffer_resource;
|
||||
BufferResource<T> wave_buffer_resource;
|
||||
|
||||
// wavewise base address (64 bit)
|
||||
wave_buffer_resource.address(Number<0>{}) = const_cast<remove_cv_t<T>*>(p_wave);
|
||||
@@ -23,9 +23,10 @@
|
||||
#include "tuple.hpp"
|
||||
#include "tuple_helper.hpp"
|
||||
#include "type.hpp"
|
||||
#include "utility.hpp"
|
||||
#include "magic_division.hpp"
|
||||
#include "amd_buffer_addressing_v2.hpp"
|
||||
#include "utility.hpp"
|
||||
#include "amd_address_space.hpp"
|
||||
#include "amd_buffer_addressing.hpp"
|
||||
#include "static_buffer.hpp"
|
||||
#include "dynamic_buffer.hpp"
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#endif
|
||||
#include "bfloat16_dev.hpp"
|
||||
|
||||
// address space for kernel parameter
|
||||
// "Constant" address space for kernel parameter
|
||||
#define CONSTANT __attribute__((address_space(4)))
|
||||
|
||||
// GPU target
|
||||
@@ -120,15 +120,6 @@
|
||||
|
||||
namespace ck {
|
||||
|
||||
enum AddressSpaceEnum_t
|
||||
{
|
||||
Generic,
|
||||
Global,
|
||||
Lds,
|
||||
Sgpr,
|
||||
Vgpr
|
||||
};
|
||||
|
||||
enum InMemoryDataOperationEnum_t
|
||||
{
|
||||
Set,
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
namespace ck {
|
||||
|
||||
#include "amd_buffer_addressing_v2.hpp"
|
||||
#include "amd_buffer_addressing.hpp"
|
||||
|
||||
template <AddressSpaceEnum_t BufferAddressSpace, typename T, typename ElementSpaceSize>
|
||||
struct DynamicBuffer
|
||||
|
||||
Reference in New Issue
Block a user