[CK_TILE][HOTFIX] WA for address space by disable it completely (#1947)

* port all moe changes from ck_moe_gemm branch

* refine codes in the pr

* fix tail odd

* fix clang format

* fix clang format2

* make hot loop scheduler compatible with 16x16 and 32x32

* clang format

* fix per token quant

* rename moe example

* clang format

* WA for address space by disable it completely

* hot fix moe gemm2

---------

Co-authored-by: coderfeli <coderfeli@163.com>
Co-authored-by: feli <felix.li@amd.com>
This commit is contained in:
carlushuang
2025-03-06 12:01:25 +08:00
committed by GitHub
parent a88bf76ecc
commit c12fb0a624
2 changed files with 4 additions and 3 deletions

View File

@@ -1492,7 +1492,7 @@ struct GridwiseMoeGemm
using CDEBlockTransferCluster =
CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock;
const auto EGlobalMemoryDataOperation = CGlobalMemoryDataOperation;
constexpr index_t scatter_weight_idx = 1;
constexpr index_t scatter_weight_idx = IsInputGemm ? 1 : 3; // hack fix felix
auto cde_block_copy_lds_and_global = ThreadGroupTensorSliceTransfer_v7r3_scatter<
ThisThreadBlock,
decltype(container_concat(make_tuple(CShuffleDataType{}), DsDataType{})),
@@ -2000,7 +2000,7 @@ struct GridwiseMoeGemm
using CDEBlockTransferCluster =
CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock;
const auto EGlobalMemoryDataOperation = CGlobalMemoryDataOperation;
constexpr index_t scatter_weight_idx = 1;
constexpr index_t scatter_weight_idx = IsInputGemm ? 1 : 3; // hack fix felix
auto cde_block_copy_lds_and_global = ThreadGroupTensorSliceTransfer_v7r3_scatter<
ThisThreadBlock,
decltype(container_concat(make_tuple(CShuffleDataType{}), DsDataType{})),