mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 13:11:25 +00:00
[CK_TILE] FMHA Reduce register spilling in fwd with dropout (workaround for CI failures with clang-22) (#3221)
* Use vectorized stores for dropout randvals With no kPadSeqLenK the kernel uses 2 buffer_store_dwordx2 instead of 16 buffer_store_byte. This requires less registers and reduces spilling. * Calculate dropout randvals for storing and applying only once Even though it may add a small overhead when storing is not required, it uses significantly less registers and hence no spilling.
This commit is contained in:
@@ -333,23 +333,15 @@ struct BlockDropout
|
||||
return randval;
|
||||
};
|
||||
|
||||
if(is_store_randval)
|
||||
{
|
||||
static_for<0, kMPerBlock / kMPerStep, 1>{}([&](auto i_m0) {
|
||||
static_for<0, kNPerBlock / kNPerStep, 1>{}([&](auto i_n0) {
|
||||
const auto randval = generate_randval(i_m0, i_n0);
|
||||
// save to Global
|
||||
const auto randval_store = cast_tile<RandValOutputDataType>(randval);
|
||||
store_tile(randval_dram_window, randval_store);
|
||||
move_tile_window(randval_dram_window, {0, kNPerStep});
|
||||
});
|
||||
move_tile_window(randval_dram_window, {kMPerStep, -kNPerBlock});
|
||||
});
|
||||
move_tile_window(randval_dram_window, {-kMPerBlock, kNPerBlock});
|
||||
}
|
||||
static_for<0, kMPerBlock / kMPerStep, 1>{}([&](auto i_m0) {
|
||||
static_for<0, kNPerBlock / kNPerStep, 1>{}([&](auto i_n0) {
|
||||
const auto randval = generate_randval(i_m0, i_n0);
|
||||
if(is_store_randval)
|
||||
{
|
||||
const auto randval_store = cast_tile<RandValOutputDataType>(randval);
|
||||
store_tile(randval_dram_window, randval_store);
|
||||
}
|
||||
move_tile_window(randval_dram_window, {0, kNPerStep});
|
||||
// Drop values of P based on the generated probabilities
|
||||
constexpr auto randval_spans = decltype(randval)::get_distributed_spans();
|
||||
sweep_tile_span(randval_spans[number<0>{}], [&](auto idx0) {
|
||||
@@ -369,7 +361,9 @@ struct BlockDropout
|
||||
});
|
||||
});
|
||||
});
|
||||
move_tile_window(randval_dram_window, {kMPerStep, -kNPerBlock});
|
||||
});
|
||||
move_tile_window(randval_dram_window, {-kMPerBlock, kNPerBlock});
|
||||
}
|
||||
|
||||
const unsigned long long ph_seed;
|
||||
|
||||
Reference in New Issue
Block a user