mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 17:55:48 +00:00
Update to the non-whole-k-prefetch path in the whoke_k_prefetch pipeline
This commit is contained in:
@@ -505,10 +505,12 @@ struct BlockFmhaPipelineQRKSVSWholeKPrefetch
|
||||
move_tile_window(k_dram_window, {kN0Sub, 0});
|
||||
};
|
||||
|
||||
if constexpr(i_n0 < NumPrefetchV)
|
||||
if constexpr(i_n0 == n0_loops - 1)
|
||||
{
|
||||
v_tiles[i_n0] = load_tile(v_dram_window);
|
||||
move_tile_window(v_dram_window, {0, kK1});
|
||||
static_for<0, NumPrefetchV, 1>{}([&](auto i_k1) {
|
||||
v_tiles[i_k1] = load_tile(v_dram_window);
|
||||
move_tile_window(v_dram_window, {0, kK1});
|
||||
});
|
||||
};
|
||||
|
||||
__builtin_amdgcn_sched_barrier(0x00000001);
|
||||
|
||||
@@ -509,10 +509,12 @@ struct BlockFmhaPipelineQRKSVSWholeKPrefetchTrLoad
|
||||
move_tile_window(k_dram_window, {kN0Sub, 0});
|
||||
};
|
||||
|
||||
if constexpr(i_n0 < NumPrefetchV)
|
||||
if constexpr(i_n0 == n0_loops - 1)
|
||||
{
|
||||
v_tiles[i_n0] = load_tile(v_dram_window);
|
||||
move_tile_window(v_dram_window, {kK1, 0});
|
||||
static_for<0, NumPrefetchV, 1>{}([&](auto i_k1) {
|
||||
v_tiles[i_k1] = load_tile(v_dram_window);
|
||||
move_tile_window(v_dram_window, {kK1, 0});
|
||||
});
|
||||
};
|
||||
|
||||
__builtin_amdgcn_sched_barrier(0x00000001);
|
||||
|
||||
Reference in New Issue
Block a user