mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-07-01 04:07:56 +00:00
Fix clang format
This commit is contained in:
@@ -26,7 +26,8 @@ struct BlockGemmARegBSmemCRegV1K8Policy
|
||||
#if !defined(TOY_FA_FWD_QK_SWIZZLE)
|
||||
return make_tuple(WarpGemmMfmaF16F16F32M32N32K16TransposedCDistribution{}, 4, 1);
|
||||
#else
|
||||
return make_tuple(WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution{}, 4, 1);
|
||||
return make_tuple(
|
||||
WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution{}, 4, 1);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
|
||||
@@ -64,9 +64,9 @@ struct FlashAttentionFwdImpl
|
||||
constexpr index_t kNPerBlock = kN1PerBlock;
|
||||
constexpr index_t kKPerBlock = kK1PerBlock;
|
||||
#if !defined(TOY_FA_FWD_QK_SWIZZLE)
|
||||
constexpr index_t kKPack = 4;
|
||||
constexpr index_t kKPack = 4;
|
||||
#else
|
||||
constexpr index_t kKPack = 8;
|
||||
constexpr index_t kKPack = 8;
|
||||
#endif
|
||||
|
||||
constexpr auto dataTypeSize = sizeof(VDataType);
|
||||
@@ -210,7 +210,7 @@ struct FlashAttentionFwdImpl
|
||||
{0, 0},
|
||||
make_static_tile_distribution(gemm1.MakeBBlockDistributionEncode()));
|
||||
#else
|
||||
auto v_lds_window = make_tile_window(
|
||||
auto v_lds_window = make_tile_window(
|
||||
v_lds, make_tuple(number<kN1PerBlock>{}, number<kK1PerBlock>{}), {0, 0});
|
||||
#endif
|
||||
|
||||
|
||||
@@ -26,7 +26,8 @@ struct BlockGemmARegBSmemCRegV1K8Policy
|
||||
#if !defined(TOY_FA_FWD_QK_SWIZZLE)
|
||||
return make_tuple(WarpGemmMfmaF16F16F32M32N32K16TransposedCDistribution{}, 4, 1);
|
||||
#else
|
||||
return make_tuple(WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution{}, 4, 1);
|
||||
return make_tuple(
|
||||
WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution{}, 4, 1);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
|
||||
@@ -90,13 +90,13 @@ struct FlashAttentionFwd
|
||||
const auto f = [](index_t dividend, index_t divisor) {
|
||||
index_t quotient = dividend / divisor;
|
||||
index_t modulus = dividend - quotient * divisor;
|
||||
|
||||
|
||||
return make_tuple(quotient, modulus);
|
||||
};
|
||||
|
||||
|
||||
const auto [itmp, id_tile_n] = f(id_block, num_tile_n1);
|
||||
const auto [id_tile_batch, id_tile_m] = f(itmp, num_tile_m0);
|
||||
|
||||
|
||||
const index_t iBatch = __builtin_amdgcn_readfirstlane(id_tile_batch);
|
||||
const index_t iM0 = __builtin_amdgcn_readfirstlane(id_tile_m * kM0PerBlock);
|
||||
const index_t iN1 = __builtin_amdgcn_readfirstlane(id_tile_n * kN1PerBlock);
|
||||
|
||||
@@ -64,9 +64,9 @@ struct FlashAttentionFwdImpl
|
||||
constexpr index_t kNPerBlock = kN1PerBlock;
|
||||
constexpr index_t kKPerBlock = kK1PerBlock;
|
||||
#if !defined(TOY_FA_FWD_QK_SWIZZLE)
|
||||
constexpr index_t kKPack = 4;
|
||||
constexpr index_t kKPack = 4;
|
||||
#else
|
||||
constexpr index_t kKPack = 8;
|
||||
constexpr index_t kKPack = 8;
|
||||
#endif
|
||||
|
||||
constexpr auto dataTypeSize = sizeof(VDataType);
|
||||
@@ -210,7 +210,7 @@ struct FlashAttentionFwdImpl
|
||||
{0, 0},
|
||||
make_static_tile_distribution(gemm1.MakeBBlockDistributionEncode()));
|
||||
#else
|
||||
auto v_lds_window = make_tile_window(
|
||||
auto v_lds_window = make_tile_window(
|
||||
v_lds, make_tuple(number<kN1PerBlock>{}, number<kK1PerBlock>{}), {0, 0});
|
||||
#endif
|
||||
|
||||
|
||||
Reference in New Issue
Block a user