From 3c50e984c9d60d93e44bcb0ef2cfd9ee24466f85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <ville.pietila@amd.com>
Date: Tue, 7 Oct 2025 13:28:32 +0000
Subject: [PATCH] Remove unused code.

---
 .../algorithm/static_encoding_pattern.hpp     | 82 -------------------
 include/ck_tile/core/tensor/buffer_view.hpp   |  2 +-
 2 files changed, 1 insertion(+), 83 deletions(-)
diff --git a/include/ck_tile/core/algorithm/static_encoding_pattern.hpp b/include/ck_tile/core/algorithm/static_encoding_pattern.hpp
index 775422fe06..c18a6fbb58 100644
--- a/include/ck_tile/core/algorithm/static_encoding_pattern.hpp
+++ b/include/ck_tile/core/algorithm/static_encoding_pattern.hpp
@@ -135,88 +135,6 @@ struct tile_distribution_encoding_pattern_2d : public tile_distribution_encoding
 {
 };
 
-// Sparse rows
-template <index_t BlockSize,
-          index_t YPerTile,
-          index_t XPerTile,
-          index_t VecSize,
-          index_t NumWaveGroups>
-struct tile_distribution_encoding_pattern_2d<BlockSize,
-                                             YPerTile,
-                                             XPerTile,
-                                             VecSize,
-                                             tile_distribution_pattern::sparse_row,
-                                             NumWaveGroups>
-    : public tile_distribution_encoding_pattern
-{
-    static_assert(XPerTile % VecSize == 0, "XPerTile must be a multiple of VecSize!");
-    static_assert(NumWaveGroups == 1, "NumWaveGroups must be 1 for sparse row pattern!");
-    
-    static constexpr index_t warp_size = get_warp_size();
-    static constexpr index_t num_warps = max(1, BlockSize / warp_size);
-    
-    // Calculate optimal vector size
-    static constexpr index_t LargestVec = max(1, (XPerTile * YPerTile) / (num_warps * warp_size));
-    static constexpr index_t X1 = VecSize > LargestVec ? LargestVec : VecSize;
-    static constexpr index_t X0 = XPerTile / X1;
-    
-    // When YPerTile is small, prioritize X dimension distribution
-    // and handle Y dimension with minimal thread usage.
-    
-    // Calculate threads needed for one row.
-    static constexpr index_t threads_per_row = X0;
-    
-    // Calculate how many rows we can process in parallel with available threads
-    static constexpr index_t max_parallel_rows = min(YPerTile, warp_size / threads_per_row);
-    
-    // Y2: Number of rows each warp handles in one iteration
-    static constexpr index_t Y2 = max_parallel_rows;
-    
-    // Y0: Number of warps to use (may be less than total available)
-    static constexpr index_t warps_needed = (YPerTile + Y2 - 1) / Y2;
-    static constexpr index_t Y0 = min(warps_needed, num_warps);
-    
-    // Y1: Number of iterations needed to cover all rows
-    static constexpr index_t Y1 = (YPerTile + (Y0 * Y2) - 1) / (Y0 * Y2);
-    
-    // Validation
-    static_assert(Y0 > 0, "Y0 must be greater than 0!");
-    static_assert(Y1 > 0, "Y1 must be greater than 0!");
-    static_assert(Y2 > 0, "Y2 must be greater than 0!");
-    static_assert(X0 > 0, "X0 must be greater than 0!");
-    static_assert(X1 > 0, "X1 must be greater than 0!");
-    
-    // Ensure we don't exceed available threads per warp
-    static_assert(threads_per_row * Y2 <= warp_size, 
-                  "Threads per row * rows per warp must not exceed warp size!");
-    
-    // Ensure we cover all elements (may over-cover due to ceiling, but that's OK)
-    static_assert(Y0 * Y1 * Y2 >= YPerTile, 
-                  "Y0 * Y1 * Y2 must cover at least YPerTile rows");
-
-    CK_TILE_HOST_DEVICE static constexpr auto make_2d_static_tile_distribution()
-    {
-        return make_static_tile_distribution(
-            tile_distribution_encoding<sequence<1>,
-                                       tuple<sequence<Y0, Y1, Y2>, sequence<X0, X1>>,
-                                       tuple<sequence<1>, sequence<1, 2>>,
-                                       tuple<sequence<0>, sequence<2, 0>>, // -> <Y0>, <Y2, X0>
-                                       sequence<1, 2>,
-                                       sequence<1, 1>>{}); // -> <Y1, X1>
-    }
-
-    CK_TILE_HOST_DEVICE static constexpr auto make_shuffled_2d_static_tile_distribution()
-    {
-        return make_static_tile_distribution(
-            tile_distribution_encoding<sequence<1>,
-                                       tuple<sequence<X0, X1>, sequence<Y0, Y1, Y2>>,
-                                       tuple<sequence<2>, sequence<2, 1>>,
-                                       tuple<sequence<0>, sequence<2, 0>>, // -> <Y0>, <Y2, X0>
-                                       sequence<1, 2>,
-                                       sequence<1, 1>>{}); // -> <X1, Y1>
-    }
-};
-
 // Thread raked
 template <index_t BlockSize,
           index_t YPerTile,
diff --git a/include/ck_tile/core/tensor/buffer_view.hpp b/include/ck_tile/core/tensor/buffer_view.hpp
index dd913a1612..3b747dae84 100644
--- a/include/ck_tile/core/tensor/buffer_view.hpp
+++ b/include/ck_tile/core/tensor/buffer_view.hpp
@@ -96,7 +96,7 @@ struct buffer_view<address_space_enum::generic,
                   std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
                                typename vector_traits<remove_cvref_t<T>>::scalar_type>::value,
                   bool>::type = false>
-    CK_TILE_HOST_DEVICE constexpr auto get(index_t i,
+    CK_TILE_DEVICE constexpr auto get(index_t i,
                                       index_t linear_offset,
                                       bool is_valid_element,
                                       bool_constant<oob_conditional_check> = {}) const