mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-17 11:30:02 +00:00
[CK_TILE] optimize moe sorting kernel, boost large context case up to 20x (#2153)
* combine 2-3 as single stage
* support zeroing
* improve long tokens
* update specialization
* b16 ws
* 8bit topk optimize
* update 15 example
[ROCm/composable_kernel commit: 4e9b76f88c]
This commit is contained in:
@@ -13,6 +13,7 @@
|
||||
#include "ck_tile/core/arch/arch.hpp"
|
||||
#include "ck_tile/core/arch/generic_memory_space_atomic.hpp"
|
||||
#include "ck_tile/core/arch/utility.hpp"
|
||||
#include "ck_tile/core/arch/workgroup_barrier.hpp"
|
||||
#include "ck_tile/core/config.hpp"
|
||||
#include "ck_tile/core/container/array.hpp"
|
||||
#include "ck_tile/core/container/container_helper.hpp"
|
||||
|
||||
Reference in New Issue
Block a user