mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 18:17:44 +00:00
* add moe_sorting & check ok
* fix comments & typo
* Run remod.py under include/ck_tile & example/ck_tile directories
* format codes
* fix output ci check bug
* fix moe sorting readme and error commit file
* use magiv div to accelerate compute
* add an loop unroll for moe lds ops
* add extblocksnel to set zeros for moebufs
* [Ck_tile] moe set zero run ok, add size check and fix ref check
* [Ck_tile]fix moe_sorting fuse set_zero remod
* [Ck_tile] change name style, fix zero buffer size err, change folder
* [Ck_tile] moe_sorting: fix name style
* [Ck_tile] moe_sorting, remove useless params in traits
* [Ck_tile] change outputtile cnt * unit_size; change output buf alloc
---------
Co-authored-by: dummycoderfe <noplydummmycoder@163.com>
Co-authored-by: Po Yen, Chen <PoYen.Chen@amd.com>
Co-authored-by: carlushuang <carlus.huang@amd.com>
[ROCm/composable_kernel commit: bec6fbc65f]
35 lines
1.6 KiB
C++
35 lines
1.6 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#pragma once
|
|
|
|
#include "ck_tile/host/arg_parser.hpp"
|
|
#include "ck_tile/host/check_err.hpp"
|
|
#include "ck_tile/host/convolution_host_tensor_descriptor_helper.hpp"
|
|
#include "ck_tile/host/convolution_parameter.hpp"
|
|
#include "ck_tile/host/device_memory.hpp"
|
|
#include "ck_tile/host/fill.hpp"
|
|
#include "ck_tile/host/hip_check_error.hpp"
|
|
#include "ck_tile/host/host_tensor.hpp"
|
|
#include "ck_tile/host/kernel_launch.hpp"
|
|
#include "ck_tile/host/ranges.hpp"
|
|
#include "ck_tile/host/reference/reference_batched_dropout.hpp"
|
|
#include "ck_tile/host/reference/reference_batched_elementwise.hpp"
|
|
#include "ck_tile/host/reference/reference_batched_gemm.hpp"
|
|
#include "ck_tile/host/reference/reference_batched_masking.hpp"
|
|
#include "ck_tile/host/reference/reference_batched_rotary_position_embedding.hpp"
|
|
#include "ck_tile/host/reference/reference_batched_softmax.hpp"
|
|
#include "ck_tile/host/reference/reference_elementwise.hpp"
|
|
#include "ck_tile/host/reference/reference_gemm.hpp"
|
|
#include "ck_tile/host/reference/reference_im2col.hpp"
|
|
#include "ck_tile/host/reference/reference_layernorm2d_fwd.hpp"
|
|
#include "ck_tile/host/reference/reference_moe_sorting.hpp"
|
|
#include "ck_tile/host/reference/reference_permute.hpp"
|
|
#include "ck_tile/host/reference/reference_reduce.hpp"
|
|
#include "ck_tile/host/reference/reference_rmsnorm2d_fwd.hpp"
|
|
#include "ck_tile/host/reference/reference_rowwise_quantization2d.hpp"
|
|
#include "ck_tile/host/reference/reference_softmax.hpp"
|
|
#include "ck_tile/host/reference/reference_topk.hpp"
|
|
#include "ck_tile/host/stream_config.hpp"
|
|
#include "ck_tile/host/timer.hpp"
|