mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 18:17:44 +00:00
* Add reduce2d new api * Prevent user use cross warp reduction * Fix bug of std caculation * Add rmsnorm2d * Add rmsnorm small example * Remove static assert to prevent compile fail * Add script to test performance and correctness * Add missing cmake change * refine naming * refine example of rmsnorm * Fix bug of rmsnorm * Refine naming * Fix cmake * clang format * Refine pipeline name * Add add_rmsnorm2d_rdquant kernel * Add reduce op * host verification * Fix bug of one pass pipeline * Refine tile size * Add two pass pipeline * Rename two pass to three pass * Fix bug of kSaveX == false * Add instance library * Add test script * Fix bug of x verification * Add save_x to trait * Add README * Move reduce2d into reduce folder * Fix bug of welford when number of m warp > 1 * remove reduncant comment * 1. move 06_rmsnorm2d to 10_rmsnorm2d 2. move 07_add_rmsnorm2d_rdquant to 11_add_rmsnorm2d_rdquant * clang format and add missing header * Add host validation of add + layernorm2d + rsquant * Revert "Add host validation of add + layernorm2d + rsquant" This reverts commit936cb45797. * Remove deprecated flag [ROCm/composable_kernel commit:3d60953477]
34 lines
1.6 KiB
C++
34 lines
1.6 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#pragma once
|
|
|
|
#include "ck_tile/host/arg_parser.hpp"
|
|
#include "ck_tile/host/check_err.hpp"
|
|
#include "ck_tile/host/convolution_host_tensor_descriptor_helper.hpp"
|
|
#include "ck_tile/host/convolution_parameter.hpp"
|
|
#include "ck_tile/host/device_memory.hpp"
|
|
#include "ck_tile/host/fill.hpp"
|
|
#include "ck_tile/host/hip_check_error.hpp"
|
|
#include "ck_tile/host/host_tensor.hpp"
|
|
#include "ck_tile/host/kernel_launch.hpp"
|
|
#include "ck_tile/host/ranges.hpp"
|
|
#include "ck_tile/host/reference/reference_batched_dropout.hpp"
|
|
#include "ck_tile/host/reference/reference_batched_elementwise.hpp"
|
|
#include "ck_tile/host/reference/reference_batched_gemm.hpp"
|
|
#include "ck_tile/host/reference/reference_batched_masking.hpp"
|
|
#include "ck_tile/host/reference/reference_batched_rotary_position_embedding.hpp"
|
|
#include "ck_tile/host/reference/reference_batched_softmax.hpp"
|
|
#include "ck_tile/host/reference/reference_elementwise.hpp"
|
|
#include "ck_tile/host/reference/reference_gemm.hpp"
|
|
#include "ck_tile/host/reference/reference_im2col.hpp"
|
|
#include "ck_tile/host/reference/reference_layernorm2d_fwd.hpp"
|
|
#include "ck_tile/host/reference/reference_permute.hpp"
|
|
#include "ck_tile/host/reference/reference_reduce.hpp"
|
|
#include "ck_tile/host/reference/reference_rmsnorm2d_fwd.hpp"
|
|
#include "ck_tile/host/reference/reference_rowwise_quantization2d.hpp"
|
|
#include "ck_tile/host/reference/reference_softmax.hpp"
|
|
#include "ck_tile/host/reference/reference_topk.hpp"
|
|
#include "ck_tile/host/stream_config.hpp"
|
|
#include "ck_tile/host/timer.hpp"
|