From 3e61925277d67af2bc34867f0ba9bc113fb800c4 Mon Sep 17 00:00:00 2001 From: Clement Lin Date: Wed, 9 Apr 2025 15:08:09 +0800 Subject: [PATCH] Remove unused code --- .../block_gemm_areg_bsmem_creg_problem.hpp | 2 +- .../block_gemm_areg_bsmem_creg_v1.hpp | 57 ++++--------------- ...gemm_areg_bsmem_creg_v1_default_policy.hpp | 2 +- ...emm_areg_bsmem_creg_v1_iteratek_policy.hpp | 2 +- ..._pipeline_agmem_bgmem_creg_v2_askiplds.hpp | 16 +++--- ...ne_agmem_bgmem_creg_v2_askiplds_policy.hpp | 2 +- .../block_gemm_pipeline_problem.hpp | 2 +- ..._pipeline_agmem_bgmem_creg_policy_impl.hpp | 3 + .../flash_attention_fwd.cpp | 3 + .../flash_attention_fwd.hpp | 2 +- .../flash_attention_fwd_impl.hpp | 2 +- .../reference_batched_gemm.hpp | 2 +- .../reference_batched_softmax.hpp | 2 +- .../tile_gemm_shape.hpp | 2 +- .../block/block_gemm_areg_bsmem_creg_v1.hpp | 17 +----- 15 files changed, 35 insertions(+), 81 deletions(-) diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_problem.hpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_problem.hpp index 3d5d9771f6..a0a74c55aa 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_problem.hpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_problem.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_v1.hpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_v1.hpp index 9398ea5c35..740c540d6c 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_v1.hpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_v1.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once @@ -33,8 +33,8 @@ struct BlockGemmARegBSmemCRegV1 const BBlockWindowTmp& b_block_window_tmp) const { static_assert(std::is_same_v> && - std::is_same_v> && - std::is_same_v>, + std::is_same_v> && + std::is_same_v>, "wrong!"); constexpr index_t MPerBlock = ABlockTensorTmp{}.get_lengths()[number<0>{}]; @@ -42,8 +42,7 @@ struct BlockGemmARegBSmemCRegV1 constexpr index_t KPerBlock = ABlockTensorTmp{}.get_lengths()[number<1>{}]; static_assert(MPerBlock == BlockGemmShape::kM && NPerBlock == BlockGemmShape::kN && - KPerBlock == BlockGemmShape::kK, - "wrong!"); + KPerBlock == BlockGemmShape::kK, "wrong!"); constexpr auto config = Policy::template GetWarpGemmMWarpNWarp(); @@ -97,26 +96,11 @@ struct BlockGemmARegBSmemCRegV1 auto b_warp_window_tmp = make_tile_window( b_block_window_tmp.get_bottom_tensor_view(), make_tuple(number{}, number{}), - // b_block_window_tmp.GetWindowOrigin() + MultiIndex<2>{iNWarp * WG::kN, 0}, {b_block_window_tmp.get_window_origin().at(number<0>{}) + iNWarp * WG::kN, b_block_window_tmp.get_window_origin().at(number<1>{})}, make_static_tile_distribution(typename WG::BWarpDstrEncoding{})); -#if 0 // FIXME: using Array will cause register spill - Array, NIterPerWarp> b_warp_windows{ - {b_warp_window_tmp}}; - - for(index_t nIter = 0; nIter < NIterPerWarp; nIter++) - { - for(index_t kIter = 0; kIter < KIterPerWarp; kIter++) - { - move_tile_window(b_warp_windows(nIter)(kIter), - {nIter * NPerBlockPerIter, kIter * KPerBlockPerIter}); - } - } -#else statically_indexed_array, - NIterPerWarp> - b_warp_windows; + NIterPerWarp> b_warp_windows; static_for<0, NIterPerWarp, 1>{}([&](auto nIter) { static_for<0, KIterPerWarp, 1>{}([&](auto kIter) { @@ -126,15 +110,11 @@ struct BlockGemmARegBSmemCRegV1 {nIter * NPerBlockPerIter, kIter * KPerBlockPerIter}); }); }); -#endif // check C-block-distribution static_assert(std::is_same_v, - remove_cvref_t>, - // remove_cvref_t>, - "wrong!"); + remove_cvref_t>, "wrong!"); using AWarpDstr = typename WG::AWarpDstr; using CWarpDstr = typename WG::CWarpDstr; @@ -187,7 +167,7 @@ struct BlockGemmARegBSmemCRegV1 const BBlockWindowTmp& b_block_window_tmp) const { static_assert(std::is_same_v> && - std::is_same_v>, + std::is_same_v>, "wrong!"); constexpr index_t MPerBlock = ABlockTensorTmp{}.get_lengths()[number<0>{}]; @@ -195,8 +175,7 @@ struct BlockGemmARegBSmemCRegV1 constexpr index_t KPerBlock = ABlockTensorTmp{}.get_lengths()[number<1>{}]; static_assert(MPerBlock == BlockGemmShape::kM && NPerBlock == BlockGemmShape::kN && - KPerBlock == BlockGemmShape::kK, - "wrong!"); + KPerBlock == BlockGemmShape::kK, "wrong!"); constexpr auto config = Policy::template GetWarpGemmMWarpNWarp(); @@ -251,26 +230,11 @@ struct BlockGemmARegBSmemCRegV1 auto b_warp_window_tmp = make_tile_window( b_block_window_tmp.get_bottom_tensor_view(), make_tuple(number{}, number{}), - // b_block_window_tmp.GetWindowOrigin() + MultiIndex<2>{iNWarp * WG::kN, 0}, {b_block_window_tmp.get_window_origin().at(number<0>{}) + iNWarp * WG::kN, b_block_window_tmp.get_window_origin().at(number<1>{})}, make_static_tile_distribution(typename WG::BWarpDstrEncoding{})); -#if 0 // FIXME: using Array will cause register spill - Array, NIterPerWarp> b_warp_windows{ - {b_warp_window_tmp}}; - - for(index_t nIter = 0; nIter < NIterPerWarp; nIter++) - { - for(index_t kIter = 0; kIter < KIterPerWarp; kIter++) - { - move_tile_window(b_warp_windows(nIter)(kIter), - {nIter * NPerBlockPerIter, kIter * KPerBlockPerIter}); - } - } -#else statically_indexed_array, - NIterPerWarp> - b_warp_windows; + NIterPerWarp> b_warp_windows; static_for<0, NIterPerWarp, 1>{}([&](auto nIter) { static_for<0, KIterPerWarp, 1>{}([&](auto kIter) { @@ -280,7 +244,6 @@ struct BlockGemmARegBSmemCRegV1 {nIter * NPerBlockPerIter, kIter * KPerBlockPerIter}); }); }); -#endif // Construct C-Block-Tensor auto c_block_tensor = make_static_distributed_tensor(c_block_dstr); diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_v1_default_policy.hpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_v1_default_policy.hpp index 7189f5ee09..fb1516eb52 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_v1_default_policy.hpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_v1_default_policy.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_v1_iteratek_policy.hpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_v1_iteratek_policy.hpp index 90ed9340bf..32dc09f95e 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_v1_iteratek_policy.hpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_areg_bsmem_creg_v1_iteratek_policy.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds.hpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds.hpp index 94559c83c1..fc98792615 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds.hpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once @@ -48,12 +48,12 @@ struct BlockGemmPipelineAGmemBGmemCReg> && - std::is_same_v>, + std::is_same_v>, "wrong!"); static_assert(kMPerBlock == ADramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && - kNPerBlock == BDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && - kKPerBlock == ADramBlockWindowTmp{}.get_window_lengths()[number<1>{}], + kNPerBlock == BDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && + kKPerBlock == ADramBlockWindowTmp{}.get_window_lengths()[number<1>{}], "wrong!"); // A tile in Reg,blockTensor @@ -248,12 +248,12 @@ struct BlockGemmPipelineAGmemBGmemCReg< { static_assert( std::is_same_v> && - std::is_same_v>, + std::is_same_v>, "wrong!"); static_assert(kMPerBlock == ADramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && - kNPerBlock == BDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && - kKPerBlock == ADramBlockWindowTmp{}.get_window_lengths()[number<1>{}], + kNPerBlock == BDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && + kKPerBlock == ADramBlockWindowTmp{}.get_window_lengths()[number<1>{}], "wrong!"); ignore = a_element_func; @@ -402,7 +402,7 @@ struct BlockGemmPipelineAGmemBGmemCReg< "wrong!"); static_assert(kNPerBlock == BDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && - kKPerBlock == BDramBlockWindowTmp{}.get_window_lengths()[number<1>{}], + kKPerBlock == BDramBlockWindowTmp{}.get_window_lengths()[number<1>{}], "wrong!"); ignore = b_element_func; diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds_policy.hpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds_policy.hpp index a0d270a9db..cdce1b1f31 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds_policy.hpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_agmem_bgmem_creg_v2_askiplds_policy.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_problem.hpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_problem.hpp index 679e8e2a76..1a620ba54b 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_problem.hpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/block_gemm_pipeline_problem.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/blockgemm_pipeline_agmem_bgmem_creg_policy_impl.hpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/blockgemm_pipeline_agmem_bgmem_creg_policy_impl.hpp index f53833134e..ad6d6d3996 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/blockgemm_pipeline_agmem_bgmem_creg_policy_impl.hpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/blockgemm_pipeline_agmem_bgmem_creg_policy_impl.hpp @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + #include "ck_tile/core.hpp" #include "ck_tile/core/tensor/tile_distribution.hpp" diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/flash_attention_fwd.cpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/flash_attention_fwd.cpp index 9cd5523e26..299d29b279 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/flash_attention_fwd.cpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/flash_attention_fwd.cpp @@ -1,3 +1,6 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. + #include #include "ck_tile/host.hpp" diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/flash_attention_fwd.hpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/flash_attention_fwd.hpp index 7b7f7aab23..caeeece8e9 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/flash_attention_fwd.hpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/flash_attention_fwd.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/flash_attention_fwd_impl.hpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/flash_attention_fwd_impl.hpp index 12970617ad..4229db5250 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/flash_attention_fwd_impl.hpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/flash_attention_fwd_impl.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/reference_batched_gemm.hpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/reference_batched_gemm.hpp index f304be894b..2762e66464 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/reference_batched_gemm.hpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/reference_batched_gemm.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/reference_batched_softmax.hpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/reference_batched_softmax.hpp index 68e356806f..3713a22c6a 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/reference_batched_softmax.hpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/reference_batched_softmax.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/example/ck_tile/99_toy_example/03_flash_attention_fwd/tile_gemm_shape.hpp b/example/ck_tile/99_toy_example/03_flash_attention_fwd/tile_gemm_shape.hpp index f3c4d8bf67..02a7106eb7 100644 --- a/example/ck_tile/99_toy_example/03_flash_attention_fwd/tile_gemm_shape.hpp +++ b/example/ck_tile/99_toy_example/03_flash_attention_fwd/tile_gemm_shape.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/include/ck_tile/ops/gemm/block/block_gemm_areg_bsmem_creg_v1.hpp b/include/ck_tile/ops/gemm/block/block_gemm_areg_bsmem_creg_v1.hpp index 98e5538c0a..60b4b63022 100644 --- a/include/ck_tile/ops/gemm/block/block_gemm_areg_bsmem_creg_v1.hpp +++ b/include/ck_tile/ops/gemm/block/block_gemm_areg_bsmem_creg_v1.hpp @@ -98,19 +98,6 @@ struct BlockGemmARegBSmemCRegV1 b_block_window_tmp.get_window_origin() + multi_index<2>{iNWarp * WG::kN, 0}, make_static_tile_distribution(typename WG::BWarpDstrEncoding{})); -#if 0 // FIXME: using array will cause register spill - array, NIterPerWarp> b_warp_windows{ - {b_warp_window_tmp}}; - - for(index_t nIter = 0; nIter < NIterPerWarp; nIter++) - { - for(index_t kIter = 0; kIter < KIterPerWarp; kIter++) - { - move_tile_window(b_warp_windows(nIter)(kIter), - {nIter * NPerBlockPerIter, kIter * KPerBlockPerIter}); - } - } -#else statically_indexed_array< statically_indexed_array, NIterPerWarp> @@ -124,14 +111,12 @@ struct BlockGemmARegBSmemCRegV1 {nIter * NPerBlockPerIter, kIter * KPerBlockPerIter}); }); }); -#endif // check C-block-distribution static_assert( std::is_same_v, remove_cvref_t>, - "wrong!"); + .get_static_tile_distribution_encoding())>>, "wrong!"); using AWarpDstr = typename WG::AWarpDstr; using CWarpDstr = typename WG::CWarpDstr;