mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-15 10:37:44 +00:00
* moe sorting ex
* fix bug for race condition
* fix bug and optimze large expert
* fix
* optimize with sub_token_oneshot
* support skip empty tokens for expert sorting
* update moe_sorting
* tidy code
[ROCm/composable_kernel commit: c0adab4850]
21 lines
1.1 KiB
C++
21 lines
1.1 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#pragma once
|
|
|
|
#include "ck_tile/ops/fused_moe/kernel/fused_moegemm_kernel.hpp"
|
|
#include "ck_tile/ops/fused_moe/kernel/fused_moegemm_shape.hpp"
|
|
#include "ck_tile/ops/fused_moe/kernel/fused_moegemm_tile_partitioner.hpp"
|
|
#include "ck_tile/ops/fused_moe/kernel/moe_sorting_kernel.hpp"
|
|
#include "ck_tile/ops/fused_moe/kernel/moe_sorting_problem.hpp"
|
|
#include "ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_ex.hpp"
|
|
#include "ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_policy.hpp"
|
|
#include "ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_uk.hpp"
|
|
#include "ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_problem.hpp"
|
|
#include "ck_tile/ops/fused_moe/pipeline/fused_moegemm_traits.hpp"
|
|
#include "ck_tile/ops/fused_moe/pipeline/moe_sorting_pipeline.hpp"
|
|
#include "ck_tile/ops/fused_moe/pipeline/moe_sorting_policy.hpp"
|
|
#include "ck_tile/ops/common/generic_2d_block_shape.hpp"
|
|
#include "ck_tile/ops/common/tensor_layout.hpp"
|
|
#include "ck_tile/ops/common/utils.hpp"
|