mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-15 18:42:06 +00:00
* Add Rmsnorm2dFwdPipelineModelSensitiveT5Pass
* Update rmsnorm2d_fwd_pipeline_model_sensitive_pass
1. Add BlockReduce2dTreeCrossWarpSync
* Add Rmsnorm2dFusedModelSensitiveEnum
* Update patch
1. Reverse generate.py
2. Remove comment in generate.py
3. Update tree cross warp reduce
* Refactor RMSNorm model enum and introduce T5-like option
* Update the n stage for cross warp reduce
* Add new cmdline option in RMSNorm for new pipeline testing
---------
Co-authored-by: Clement Lin <clement.lin@amd.com>
Co-authored-by: ClementLinCF <162283536+ClementLinCF@users.noreply.github.com>
[ROCm/composable_kernel commit: 3499fe67ff]
16 lines
809 B
C++
16 lines
809 B
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#pragma once
|
|
|
|
#include "ck_tile/ops/rmsnorm2d/kernel/rmsnorm2d_fwd_kernel.hpp"
|
|
#include "ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_default_policy.hpp"
|
|
#include "ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_model_sensitive_pass.hpp"
|
|
#include "ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_one_pass.hpp"
|
|
#include "ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_problem.hpp"
|
|
#include "ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_two_pass.hpp"
|
|
#include "ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_traits.hpp"
|
|
#include "ck_tile/ops/common/generic_2d_block_shape.hpp"
|
|
#include "ck_tile/ops/common/tensor_layout.hpp"
|
|
#include "ck_tile/ops/common/utils.hpp"
|