mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 13:41:24 +00:00
Tests for CK Tile Flatmm and MOE Smoothquant (#2458)
* CK tile tests for flatmm using example * MOE smoothquant draft tests * fix create_arg default index to zero for MOE smoothquant * revert MOE smoothquant changes * code clean up * Add back MOE smoothquant changes * Add MOE smoothquant cases for different precisions and update cmake * clean up comments * Update flamm cmake * revert change made to moe_smoothquant smoke_test.sh EXE path * remove unecessary comment in MOE smoothquant cmakelist * comment out adding moe_smoothquant subdirectory for now due to bugs with GPU core dump issue on gfx942 and gfx90a * Clean up run_test_case function in MOE smootquant tests * update copyright and licensing on files * Remove flatmm test dir since tests should be done as weighted preshuffle gemm * Add flamm smoke test cases to weighted preshuffle gemm gtests * remove blank line from CMakeLists --------- Co-authored-by: root <root@ctr-ubbsmc16.amd.com> Co-authored-by: Thomas Ning <Thomas.Ning@amd.com>
This commit is contained in:
@@ -0,0 +1,27 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
#if 0
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, 1, 2, 4, 64, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, 1, 4, 4, 64, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, 1, 8, 4, 64, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, 1, 16, 4, 64, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, 1, 1, 1, 256, 4, true, false>>(const S&, A);
|
||||
#endif
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 1, 2, 128, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 2, 2, 128, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 4, 2, 128, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 4, 1, 256, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 1, 2, 128, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 2, 2, 128, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 4, 2, 128, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 4, 1, 256, 1, true, false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,18 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 3, 4, 64, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 3, 2, 128, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 3, 1, 256, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 6, 1, 256, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 3, 4, 64, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 3, 2, 128, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 3, 1, 256, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 6, 1, 256, 1, true, false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,19 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 1, 1, 256, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 2, 1, 256, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 4, 1, 256, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 8, 1, 256, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 1, 1, 256, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 2, 1, 256, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 4, 1, 256, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 8, 1, 256, 1, true, false>>(const S&, A);
|
||||
|
||||
// clang-format on
|
||||
@@ -0,0 +1,16 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 1, 4, 64, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 2, 4, 64, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 4, 4, 64, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 1, 4, 64, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 2, 4, 64, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 4, 4, 64, 1, true, false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,18 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 3, 1, 128, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 3, 1, 256, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 6, 1, 256, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 3, 1, 1024, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 3, 1, 128, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 3, 1, 256, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 6, 1, 256, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 3, 1, 1024, 1, true, false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,18 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 2, 1, 256, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 4, 1, 256, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 2, 1, 1024, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 4, 1, 1024, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 2, 1, 256, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 4, 1, 256, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 2, 1, 1024, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 4, 1, 1024, 1, true, false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,18 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 2, 1, 256, 8, true, true>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 4, 1, 256, 4, true, true>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 2, 1, 1024, 2, true, true>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 4, 1, 1024, 1, true, true>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 2, 1, 256, 8, true, true>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 4, 1, 256, 4, true, true>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 2, 1, 1024, 2, true, true>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 4, 1, 1024, 1, true, true>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,18 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 1, 4, 64, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 2, 4, 64, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 4, 4, 64, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 8, 4, 64, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 1, 4, 64, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 2, 4, 64, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 4, 4, 64, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 8, 4, 64, 1, true, false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,16 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 1, 4, 64, 1, true , false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 1, 4, 64, 2, true , false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 2, 4, 64, 1, true , false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 1, 4, 64, 1, true , false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 1, 4, 64, 2, true , false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 2, 4, 64, 1, true , false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,16 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 3, 4, 64, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 6, 4, 64, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::int8_t, 1, 12, 4, 64, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 3, 4, 64, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 6, 4, 64, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::bf16_t, ck_tile::fp8_t, 1, 12, 4, 64, 1, true, false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,27 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
#if 0
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, 1, 2, 4, 64, 8, true ,false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, 1, 4, 4, 64, 4, true ,false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, 1, 8, 4, 64, 2, true ,false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, 1, 16, 4, 64, 1, true ,false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, 1, 1, 1, 256, 4, true ,false>>(const S&, A);
|
||||
#endif
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 1, 2, 128, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 2, 2, 128, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 2, 128, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 1, 256, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 1, 2, 128, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 2, 2, 128, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 2, 128, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 1, 256, 1, true, false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,18 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 3, 4, 64, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 3, 2, 128, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 3, 1, 256, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 6, 1, 256, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 3, 4, 64, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 3, 2, 128, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 3, 1, 256, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 6, 1, 256, 1, true, false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,18 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 1, 1, 256, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 2, 1, 256, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 1, 256, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 8, 1, 256, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 1, 1, 256, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 2, 1, 256, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 1, 256, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 8, 1, 256, 1, true, false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,16 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 1, 4, 64, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 2, 4, 64, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 4, 64, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 1, 4, 64, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 2, 4, 64, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 4, 64, 1, true, false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,18 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 3, 1, 128, 8,true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 3, 1, 256, 4,true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 6, 1, 256, 2,true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 3, 1, 1024, 1,true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 3, 1, 128, 8,true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 3, 1, 256, 4,true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 6, 1, 256, 2,true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 3, 1, 1024, 1,true, false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,18 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 2, 1, 256, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 1, 256, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 2, 1, 1024, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 1, 1024, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 2, 1, 256, 8, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 1, 256, 4, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 2, 1, 1024, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 1, 1024, 1, true, false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,18 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 2, 1, 256, 8, true, true>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 1, 256, 4, true, true>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 2, 1, 1024, 2, true, true>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 1, 1024, 1, true, true>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 2, 1, 256, 8, true, true>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 1, 256, 4, true, true>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 2, 1, 1024, 2, true, true>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 1, 1024, 1, true, true>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,18 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 1, 4, 64, 8, true , false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 2, 4, 64, 4, true , false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 4, 4, 64, 2, true , false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 8, 4, 64, 1, true , false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 1, 4, 64, 8, true , false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 2, 4, 64, 4, true , false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 4, 4, 64, 2, true , false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 8, 4, 64, 1, true , false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,16 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 1, 4, 64, 1, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 1, 4, 64, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 2, 4, 64, 1, true, false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 1, 4, 64, 1, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 1, 4, 64, 2, true, false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 2, 4, 64, 1, true, false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,16 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include "moe_smoothquant_instance_common.hpp"
|
||||
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 3, 4, 64, 4, true , false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 6, 4, 64, 2, true , false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::int8_t, 1, 12, 4, 64, 1, true , false>>(const S&, A);
|
||||
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 3, 4, 64, 4, true , false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 6, 4, 64, 2, true , false>>(const S&, A);
|
||||
template float moe_smoothquant_<trait_<ck_tile::fp16_t, ck_tile::fp8_t, 1, 12, 4, 64, 1, true , false>>(const S&, A);
|
||||
// clang-format on
|
||||
@@ -0,0 +1,155 @@
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <ck_tile/core.hpp>
|
||||
#include "moe_smoothquant.hpp"
|
||||
|
||||
template <typename InType,
|
||||
typename OutType,
|
||||
ck_tile::index_t Repeat_M_, // each thread repeat along M
|
||||
ck_tile::index_t Repeat_N_, // each thread repeat along N
|
||||
ck_tile::index_t ThreadPerBlock_M_, // num threads along M
|
||||
ck_tile::index_t ThreadPerBlock_N_, // num threads along N
|
||||
ck_tile::index_t Vector_N_, // vector size along N
|
||||
bool kPadN_,
|
||||
bool kTwoPass_>
|
||||
using trait_ = moe_smoothquant_traits_<InType,
|
||||
OutType,
|
||||
Repeat_M_,
|
||||
Repeat_N_,
|
||||
ThreadPerBlock_M_,
|
||||
ThreadPerBlock_N_,
|
||||
Vector_N_,
|
||||
kPadN_,
|
||||
kTwoPass_>;
|
||||
|
||||
template <typename in_type, typename out_type>
|
||||
float moe_smoothquant_dispatch(moe_smoothquant_traits /*t*/,
|
||||
moe_smoothquant_args a,
|
||||
const ck_tile::stream_config& s)
|
||||
{
|
||||
float r = -1;
|
||||
// clang-format off
|
||||
// rm rn tm tn vn pd 2p
|
||||
if(a.hidden_size <= 64) {
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 1, 4, 64, 1, true, false>>(s, a);
|
||||
}
|
||||
else if(a.hidden_size <= 128) {
|
||||
if (a.hidden_size % 2 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 1, 4, 64, 2, true, false>>(s, a);
|
||||
else
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 2, 4, 64, 1, true, false>>(s, a);
|
||||
}
|
||||
else if(a.hidden_size <= 256) {
|
||||
if (a.hidden_size % 4 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 1, 4, 64, 4, true, false>>(s, a);
|
||||
else if (a.hidden_size % 2 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 2, 4, 64, 2, true, false>>(s, a);
|
||||
else
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 4, 4, 64, 1, true, false>>(s, a);
|
||||
}
|
||||
else if(a.hidden_size <= 512) {
|
||||
if (a.hidden_size % 8 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 1, 4, 64, 8, true, false>>(s, a);
|
||||
else if (a.hidden_size % 4 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 2, 4, 64, 4, true, false>>(s, a);
|
||||
else if (a.hidden_size % 2 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 4, 4, 64, 2, true, false>>(s, a);
|
||||
else
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 8, 4, 64, 1, true, false>>(s, a);
|
||||
}
|
||||
else if(a.hidden_size <= 768) {
|
||||
if (a.hidden_size % 4 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 3, 4, 64, 4, true, false>>(s, a);
|
||||
else if (a.hidden_size % 2 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 6, 4, 64, 2, true, false>>(s, a);
|
||||
else
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1,12, 4, 64, 1, true, false>>(s, a);
|
||||
}
|
||||
else if(a.hidden_size <= 1024) {
|
||||
if (a.hidden_size % 8 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 1, 2, 128, 8, true, false>>(s, a);
|
||||
else if (a.hidden_size % 4 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 2, 2, 128, 4, true, false>>(s, a);
|
||||
else if (a.hidden_size % 2 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 4, 2, 128, 2, true, false>>(s, a);
|
||||
else
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 4, 1, 256, 1, true, false>>(s, a);
|
||||
}
|
||||
else if(a.hidden_size <= 1536) {
|
||||
if (a.hidden_size % 8 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 3, 4, 64, 8, true, false>>(s, a);
|
||||
else if (a.hidden_size % 4 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 3, 2, 128, 4, true, false>>(s, a);
|
||||
else if (a.hidden_size % 2 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 3, 1, 256, 2, true, false>>(s, a);
|
||||
else
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 6, 1, 256, 1, true, false>>(s, a);
|
||||
}
|
||||
else if(a.hidden_size <= 2048) {
|
||||
if (a.hidden_size % 8 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 1, 1, 256, 8, true, false>>(s, a);
|
||||
else if (a.hidden_size % 4 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 2, 1, 256, 4, true, false>>(s, a);
|
||||
else if (a.hidden_size % 2 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 4, 1, 256, 2, true, false>>(s, a);
|
||||
else
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 8, 1, 256, 1, true, false>>(s, a);
|
||||
}
|
||||
else if(a.hidden_size <= 3072) {
|
||||
if (a.hidden_size % 8 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 3, 1, 128, 8, true, false>>(s, a);
|
||||
else if (a.hidden_size % 4 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 3, 1, 256, 4, true, false>>(s, a);
|
||||
else if (a.hidden_size % 2 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 6, 1, 256, 2, true, false>>(s, a);
|
||||
else
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 3, 1, 1024, 1, true, false>>(s, a);
|
||||
}
|
||||
else if(a.hidden_size <= 4096) {
|
||||
if (a.hidden_size % 8 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 2, 1, 256, 8, true, false>>(s, a);
|
||||
else if (a.hidden_size % 4 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 4, 1, 256, 4, true, false>>(s, a);
|
||||
else if (a.hidden_size % 2 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 2, 1, 1024, 2, true, false>>(s, a);
|
||||
else
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 4, 1, 1024, 1, true, false>>(s, a);
|
||||
}
|
||||
else if(a.hidden_size > 4096) {
|
||||
if (a.hidden_size % 8 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 2, 1, 256, 8, true, true>>(s, a);
|
||||
else if (a.hidden_size % 4 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 4, 1, 256, 4, true, true>>(s, a);
|
||||
else if (a.hidden_size % 2 == 0)
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 2, 1, 1024, 2, true, true>>(s, a);
|
||||
else
|
||||
r = moe_smoothquant_<trait_<in_type, out_type, 1, 4, 1, 1024, 1, true, true>>(s, a);
|
||||
}
|
||||
return r;
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
float moe_smoothquant(moe_smoothquant_traits t,
|
||||
moe_smoothquant_args a,
|
||||
const ck_tile::stream_config& s)
|
||||
{
|
||||
if(t.in_type.compare("fp16") == 0 && t.out_type == "int8")
|
||||
{
|
||||
return moe_smoothquant_dispatch<ck_tile::fp16_t, ck_tile::int8_t>(t, a, s);
|
||||
}
|
||||
else if(t.in_type.compare("fp16") == 0 && t.out_type == "fp8")
|
||||
{
|
||||
return moe_smoothquant_dispatch<ck_tile::fp16_t, ck_tile::fp8_t>(t, a, s);
|
||||
}
|
||||
else if(t.in_type.compare("bf16") == 0 && t.out_type == "int8")
|
||||
{
|
||||
return moe_smoothquant_dispatch<ck_tile::bf16_t, ck_tile::int8_t>(t, a, s);
|
||||
}
|
||||
else if(t.in_type.compare("bf16") == 0 && t.out_type == "fp8")
|
||||
{
|
||||
return moe_smoothquant_dispatch<ck_tile::bf16_t, ck_tile::fp8_t>(t, a, s);
|
||||
}
|
||||
else
|
||||
throw std::runtime_error("Without supported instances!");
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
|
||||
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <ck_tile/core.hpp>
|
||||
#include "moe_smoothquant.hpp"
|
||||
#include <iostream>
|
||||
|
||||
#pragma once
|
||||
|
||||
using S = ck_tile::stream_config;
|
||||
using A = moe_smoothquant_args;
|
||||
|
||||
template <typename InputType_,
|
||||
typename OutputType_,
|
||||
ck_tile::index_t Repeat_M_, // each thread repeat along M
|
||||
ck_tile::index_t Repeat_N_, // each thread repeat along N
|
||||
ck_tile::index_t ThreadPerBlock_M_, // num threads along M
|
||||
ck_tile::index_t ThreadPerBlock_N_, // num threads along N
|
||||
ck_tile::index_t Vector_N_, // vector size along N
|
||||
bool kPadN_,
|
||||
bool kTwoPass_>
|
||||
using trait_ = moe_smoothquant_traits_<InputType_,
|
||||
OutputType_,
|
||||
Repeat_M_,
|
||||
Repeat_N_,
|
||||
ThreadPerBlock_M_,
|
||||
ThreadPerBlock_N_,
|
||||
Vector_N_,
|
||||
kPadN_,
|
||||
kTwoPass_>;
|
||||
|
||||
template <typename Traits_>
|
||||
float moe_smoothquant_(const S& s, A a)
|
||||
{
|
||||
using InputType = typename Traits_::InputType;
|
||||
using OutputType = typename Traits_::OutputType;
|
||||
|
||||
using PipelineProblem = ck_tile::SmoothquantPipelineProblem<
|
||||
typename MoeSmoothquantTypeConfig<InputType, OutputType>::XDataType,
|
||||
typename MoeSmoothquantTypeConfig<InputType, OutputType>::SmoothScaleDataType,
|
||||
typename MoeSmoothquantTypeConfig<InputType, OutputType>::ComputeDataType,
|
||||
typename MoeSmoothquantTypeConfig<InputType, OutputType>::YScaleDataType,
|
||||
typename MoeSmoothquantTypeConfig<InputType, OutputType>::QYDataType,
|
||||
typename Traits_::Shape,
|
||||
Traits_::kPadN,
|
||||
Traits_::kTwoPass>;
|
||||
|
||||
using OnePassPipeline = ck_tile::SmoothquantPipelineOnePass<PipelineProblem>;
|
||||
using TwoPassPipeline = ck_tile::SmoothquantPipelineTwoPass<PipelineProblem>;
|
||||
using Pipeline = std::conditional_t<Traits_::kTwoPass, TwoPassPipeline, OnePassPipeline>;
|
||||
|
||||
using Kernel = ck_tile::MoeSmoothquant<Pipeline>;
|
||||
|
||||
const dim3 grids = Kernel::GridSize(a);
|
||||
constexpr dim3 blocks = Kernel::BlockSize();
|
||||
constexpr ck_tile::index_t kBlockPerCu = 1;
|
||||
|
||||
auto kargs = Kernel::MakeKargs(a);
|
||||
if(s.log_level_ > 0)
|
||||
std::cout << ", " << Kernel::GetName() << std::flush;
|
||||
|
||||
return ck_tile::launch_kernel(
|
||||
s, ck_tile::make_kernel<blocks.x, kBlockPerCu>(Kernel{}, grids, blocks, 0, kargs));
|
||||
}
|
||||
Reference in New Issue
Block a user