mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
[CK_TILE][HOTFIX] WA for address space by disable it completely (#1947)
* port all moe changes from ck_moe_gemm branch
* refine codes in the pr
* fix tail odd
* fix clang format
* fix clang format2
* make hot loop scheduler compatible with 16x16 and 32x32
* clang format
* fix per token quant
* rename moe example
* clang format
* WA for address space by disable it completely
* hot fix moe gemm2
---------
Co-authored-by: coderfeli <coderfeli@163.com>
Co-authored-by: feli <felix.li@amd.com>
[ROCm/composable_kernel commit: c12fb0a624]
This commit is contained in:
@@ -1492,7 +1492,7 @@ struct GridwiseMoeGemm
|
||||
using CDEBlockTransferCluster =
|
||||
CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock;
|
||||
const auto EGlobalMemoryDataOperation = CGlobalMemoryDataOperation;
|
||||
constexpr index_t scatter_weight_idx = 1;
|
||||
constexpr index_t scatter_weight_idx = IsInputGemm ? 1 : 3; // hack fix felix
|
||||
auto cde_block_copy_lds_and_global = ThreadGroupTensorSliceTransfer_v7r3_scatter<
|
||||
ThisThreadBlock,
|
||||
decltype(container_concat(make_tuple(CShuffleDataType{}), DsDataType{})),
|
||||
@@ -2000,7 +2000,7 @@ struct GridwiseMoeGemm
|
||||
using CDEBlockTransferCluster =
|
||||
CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock;
|
||||
const auto EGlobalMemoryDataOperation = CGlobalMemoryDataOperation;
|
||||
constexpr index_t scatter_weight_idx = 1;
|
||||
constexpr index_t scatter_weight_idx = IsInputGemm ? 1 : 3; // hack fix felix
|
||||
auto cde_block_copy_lds_and_global = ThreadGroupTensorSliceTransfer_v7r3_scatter<
|
||||
ThisThreadBlock,
|
||||
decltype(container_concat(make_tuple(CShuffleDataType{}), DsDataType{})),
|
||||
|
||||
@@ -50,7 +50,8 @@ CK_TILE_DECLARE_ENV_VAR_BOOL(CK_TILE_LOGGING)
|
||||
|
||||
// implementing the "memory address space" attribute
|
||||
// https://llvm.org/docs/AMDGPUUsage.html#amdgpu-address-spaces-table
|
||||
#ifdef __HIPCC__
|
||||
// WA for https://github.com/ROCm/composable_kernel/issues/1946
|
||||
#if 0
|
||||
#define CK_TILE_GENERIC_ADDR __attribute__((address_space(0)))
|
||||
#define CK_TILE_GLOBAL_ADDR __attribute__((address_space(1)))
|
||||
#define CK_TILE_LDS_ADDR __attribute__((address_space(3)))
|
||||
|
||||
Reference in New Issue
Block a user