mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
Fix to the using of static_for in amd_buffer_addressing.hpp (#1337)
* Add insert_dummy_dep_per_dword over-loading for length 64
* Fix insert_dummy_dep_per_dword and remove over-loading for length 64
* Remove blank lines
---------
Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>
[ROCm/composable_kernel commit: 37a347e380]
This commit is contained in:
@@ -552,8 +552,9 @@ namespace impl{
|
||||
template<index_t N>
|
||||
CK_TILE_DEVICE void insert_dummy_dep_per_dword(array<float, N>& b)
|
||||
{
|
||||
static_for<0, b.size(), 1>{}([&](auto i){
|
||||
asm volatile(" " : : "v"(b.get(i)) : "memory");
|
||||
constexpr auto kSize = remove_cvref_t<decltype(b)>::size();
|
||||
static_for<0, kSize, 1>{}([&](auto i){
|
||||
asm volatile(" " : : "v"(b.get(number<i>{})) : "memory");
|
||||
});
|
||||
}
|
||||
#if 1
|
||||
|
||||
Reference in New Issue
Block a user