mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 21:51:28 +00:00
Replace buffer load/store intrinsics with builtins (#1876)
* replace buffer load/store intrinsics with builtins * fix clang format * replace buffer load/store intrinsics with built-ins in ck_tile * fix clang format * add switch between buffer intrinsics and built-ins * change the builtins threshold to clang20 * fix clang format * fix some compilation errors * revert changes in ck_tile * revert changes in ck_tile * delete all root files and folders when CI completes * try changing the username in CI * fix groovy syntax * add user and group id info to ci dockers * change ownership of all files in CI to jenkins at the end * update changelog
This commit is contained in:
@@ -207,6 +207,7 @@ struct FusedMoeGemmPipeline_FlatmmUk
|
||||
threadIdx.x % (BlockShape::Block_K0 / kAlignmentA) * kAlignmentA;
|
||||
},
|
||||
number<row_ids_a.size()>{});
|
||||
|
||||
auto a_res =
|
||||
make_wave_buffer_resource(reinterpret_cast<const ADataType*>(kargs.a_ptr),
|
||||
kargs.num_tokens * kargs.stride_token * sizeof(ADataType));
|
||||
@@ -318,10 +319,10 @@ struct FusedMoeGemmPipeline_FlatmmUk
|
||||
{0, 0},
|
||||
dist_);
|
||||
}();
|
||||
|
||||
auto o_res =
|
||||
make_wave_buffer_resource(reinterpret_cast<const ODataType*>(kargs.o_ptr),
|
||||
kargs.num_tokens * kargs.stride_token * sizeof(ODataType));
|
||||
|
||||
auto row_coords_o = GetRowCoords_O(sorted_tile_id * BlockShape::Block_M0);
|
||||
auto w_scale = GetWeightScale(
|
||||
row_coords_o, reinterpret_cast<const TopkWeightDataType*>(kargs.sorted_weight_ptr));
|
||||
|
||||
Reference in New Issue
Block a user