mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 09:16:52 +00:00
tidy
This commit is contained in:
@@ -71,7 +71,7 @@ struct BlockwiseGemmDlops_km_kn_m0m1n0n1_v2r2_pipeline_2x2
|
||||
static constexpr index_t N0 = N / N1;
|
||||
|
||||
__host__ __device__ static constexpr auto
|
||||
MakeAKM0M1BlockDescriptor(const AKMBlockDesc& a_k_m_block_desc)
|
||||
MakeAKM0M1BlockDescriptor(const AKMBlockDesc& /* a_k_m_block_desc */)
|
||||
{
|
||||
const auto a_k_m0_m1_block_desc = transform_dynamic_tensor_descriptor(
|
||||
AKMBlockDesc{},
|
||||
@@ -84,7 +84,7 @@ struct BlockwiseGemmDlops_km_kn_m0m1n0n1_v2r2_pipeline_2x2
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto
|
||||
MakeBKN0N1BlockDescriptor(const BKNBlockDesc& b_k_n_block_desc)
|
||||
MakeBKN0N1BlockDescriptor(const BKNBlockDesc& /* b_k_n_block_desc */)
|
||||
{
|
||||
const auto b_k_n0_n1_block_desc = transform_dynamic_tensor_descriptor(
|
||||
BKNBlockDesc{},
|
||||
@@ -194,7 +194,7 @@ struct BlockwiseGemmDlops_km_kn_m0m1n0n1_v2r2_pipeline_2x2
|
||||
typename ABlockBuffer,
|
||||
typename BBlockBuffer,
|
||||
typename CThreadBuffer>
|
||||
__device__ void Run(const CM0M1N0N1ThreadDesc& c_m0_m1_n0_n1_thread_desc,
|
||||
__device__ void Run(const CM0M1N0N1ThreadDesc& /* c_m0_m1_n0_n1_thread_desc */,
|
||||
const ABlockBuffer& a_block_buf,
|
||||
const BBlockBuffer& b_block_buf,
|
||||
CThreadBuffer& c_thread_buf) const
|
||||
|
||||
@@ -120,9 +120,6 @@ struct BlockwiseGemmDlops_km_kn_m0m1n0n1_v3
|
||||
"wrong! inconsistent type");
|
||||
|
||||
constexpr auto I0 = Number<0>{};
|
||||
constexpr auto I1 = Number<1>{};
|
||||
constexpr auto I2 = Number<2>{};
|
||||
constexpr auto I3 = Number<3>{};
|
||||
|
||||
constexpr auto a_block_mtx = BlockMatrixA{};
|
||||
|
||||
|
||||
@@ -270,7 +270,6 @@ struct BlockwiseGemmXdlops_km_kn_m0m1m2n_v1_2x2pipeline
|
||||
const index_t waveId = thread_id / WaveSize;
|
||||
const index_t laneId = thread_id % WaveSize;
|
||||
const index_t waveId_m = waveId / NWaves;
|
||||
const index_t waveId_n = waveId % NWaves;
|
||||
|
||||
if constexpr(xdlops_gemm.IsKReduction)
|
||||
{
|
||||
|
||||
@@ -619,17 +619,6 @@ struct GridwiseDynamicGemmDlops_km_kn_mn_v1r2
|
||||
|
||||
// output: register to global memory
|
||||
{
|
||||
constexpr index_t M11 =
|
||||
M1PerThreadM111 * M11N11ThreadClusterM1100 * M11N11ThreadClusterM1101;
|
||||
constexpr index_t N11 =
|
||||
N1PerThreadN111 * M11N11ThreadClusterN1100 * M11N11ThreadClusterN1101;
|
||||
|
||||
constexpr index_t M10 = MPerBlockM1 / M11;
|
||||
constexpr index_t N10 = NPerBlockN1 / N11;
|
||||
|
||||
constexpr index_t M111 = M1PerThreadM111;
|
||||
constexpr index_t N111 = N1PerThreadN111;
|
||||
|
||||
constexpr auto c_m0_m10_m11_n0_n10_n11_thread_desc =
|
||||
make_dynamic_naive_tensor_descriptor_packed_v2(
|
||||
make_tuple(I1,
|
||||
|
||||
@@ -191,12 +191,12 @@ struct GridwiseDynamicGemmDlops_km_kn_mn_v1r3
|
||||
const auto M = a_k0_m_k1_grid_desc.GetLength(I1);
|
||||
const auto N = b_k0_n_k1_grid_desc.GetLength(I1);
|
||||
const auto K0 = a_k0_m_k1_grid_desc.GetLength(I0);
|
||||
const auto K1 = a_k0_m_k1_grid_desc.GetLength(I2);
|
||||
|
||||
// TODO: also check validity of all components (blockwise-copy, threadwise-copy, etc)
|
||||
|
||||
return (M == c_m_n_grid_desc.GetLength(I0) && N == c_m_n_grid_desc.GetLength(I1) &&
|
||||
K0 == b_k0_n_k1_grid_desc.GetLength(I0) &&
|
||||
K1 == a_k0_m_k1_grid_desc.GetLength(I2) &&
|
||||
K1 == b_k0_n_k1_grid_desc.GetLength(I2)) &&
|
||||
(M % MPerBlockM1 == 0 && N % NPerBlockN1 == 0 && K0 % KPerBlock == 0);
|
||||
}
|
||||
@@ -608,19 +608,6 @@ struct GridwiseDynamicGemmDlops_km_kn_mn_v1r3
|
||||
|
||||
// output: register to global memory
|
||||
{
|
||||
constexpr auto M11 =
|
||||
Number<container_reduce(M11N11ThreadClusterM110Xs{}, math::multiplies_v2{}, I1) *
|
||||
M1PerThreadM111>{};
|
||||
constexpr auto N11 =
|
||||
Number<container_reduce(M11N11ThreadClusterN110Xs{}, math::multiplies_v2{}, I1) *
|
||||
N1PerThreadN111>{};
|
||||
|
||||
constexpr index_t M10 = MPerBlockM1 / M11;
|
||||
constexpr index_t N10 = NPerBlockN1 / N11;
|
||||
|
||||
constexpr index_t M111 = M1PerThreadM111;
|
||||
constexpr index_t N111 = N1PerThreadN111;
|
||||
|
||||
constexpr auto c_m0_m10_m11_n0_n10_n11_thread_desc =
|
||||
make_dynamic_naive_tensor_descriptor_packed_v2(
|
||||
make_tuple(I1,
|
||||
|
||||
@@ -102,7 +102,6 @@ struct GridwiseDynamicGemmDlops_km_kn_mn_v3
|
||||
|
||||
// divide block work by [M, N]
|
||||
#if 0
|
||||
const auto k_block_work_num = K / Number<KPerBlock>{};
|
||||
const auto ho_block_work_num = Ho / Number<HoPerBlock>{};
|
||||
const auto wo_block_work_num = Wo / Number<WoPerBlock>{};
|
||||
const auto hwo_block_work_num = ho_block_work_num * wo_block_work_num;
|
||||
@@ -114,7 +113,6 @@ struct GridwiseDynamicGemmDlops_km_kn_mn_v3
|
||||
const index_t wo_block_work_id = hwo_block_work_id - ho_block_work_id * wo_block_work_num;
|
||||
#else
|
||||
// Hack: this force result into SGPR
|
||||
const index_t k_block_work_num = __builtin_amdgcn_readfirstlane(K / KPerBlock);
|
||||
const index_t ho_block_work_num = __builtin_amdgcn_readfirstlane(Ho / HoPerBlock);
|
||||
const index_t wo_block_work_num = __builtin_amdgcn_readfirstlane(Wo / WoPerBlock);
|
||||
const index_t hwo_block_work_num = ho_block_work_num * wo_block_work_num;
|
||||
|
||||
@@ -269,11 +269,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3
|
||||
const CM0M1M2NGridDesc& c_m0_m1_m2_n_grid_desc,
|
||||
const CBlockClusterAdaptor& c_block_cluster_adaptor)
|
||||
{
|
||||
constexpr auto I0 = Number<0>{};
|
||||
constexpr auto I1 = Number<1>{};
|
||||
constexpr auto I2 = Number<2>{};
|
||||
constexpr auto I3 = Number<3>{};
|
||||
|
||||
const auto a_grid_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
|
||||
p_a_grid, a_k0_m_k1_grid_desc.GetElementSpaceSize());
|
||||
const auto b_grid_buf = make_dynamic_buffer<AddressSpaceEnum_t::Global>(
|
||||
|
||||
@@ -57,8 +57,6 @@ struct ThreadwiseGemmDlops_km_kn_mn_v3
|
||||
|
||||
constexpr auto I0 = Number<0>{};
|
||||
constexpr auto I1 = Number<1>{};
|
||||
constexpr auto I2 = Number<2>{};
|
||||
constexpr auto I3 = Number<3>{};
|
||||
|
||||
constexpr auto E = ADesc{}.GetLength(I0);
|
||||
constexpr auto K = ADesc{}.GetLength(I1);
|
||||
|
||||
Reference in New Issue
Block a user