[CK_TILE] Add permuteN optimization to remove lds operation in c_shuffle (#2764)

* permuteN optimization to remove lds operation in c_shuffle

* add the change log

---------

Co-authored-by: ThomasNing <thomas.ning@amd.com>
This commit is contained in:
lalala-sh
2025-09-09 13:02:48 +08:00
committed by GitHub
parent 92b07380d3
commit 75570d0fa8
5 changed files with 189 additions and 4 deletions

View File

@@ -175,6 +175,9 @@ struct sequence
return sequence<type::get(number<Ids>{})...>{};
}
CK_TILE_HOST_DEVICE static constexpr auto sum() { return (Is + ... + 0); }
CK_TILE_HOST_DEVICE static constexpr auto product() { return (Is * ... * 1); }
// modify element at index "I" with value "X"
template <index_t I, index_t X>
CK_TILE_HOST_DEVICE static constexpr auto modify(number<I>, number<X>)