Remove debug prints and obsolete tests.

2026-07-01 12:17:00 +00:00 · 2025-10-07 13:49:18 +00:00
parent d9e9f19ca4
commit 438787dbb6
2 changed files with 0 additions and 160 deletions
--- a/test/ck_tile/tensor_descriptor/test_tensor_descriptor.cpp
+++ b/test/ck_tile/tensor_descriptor/test_tensor_descriptor.cpp
@@ -42,62 +42,6 @@ constexpr auto make_blocked_tensor_descriptor()
    return desc_4d;
 }

-void debug_print_explicit(const std::vector<int>& data, index_t MWidth, index_t NWidth, index_t Gm)
-{
-    std::cout << "Explicit Indexing:" << std::endl;
-    for(int c = 0; c < Gm; ++c)
-    {
-        std::cout << "Col " << c << ": " << std::endl;
-        for(int r = 0; r < Gm; ++r)
-        {
-            for(int m = 0; m < MWidth; ++m)
-            {
-                std::cout << "Row " << r << " (sub-row " << m << "): ";
-                for(int n = 0; n < NWidth; ++n)
-                {
-                    int idx = c + Gm * m + Gm * MWidth * n + Gm * MWidth * NWidth * r;
-                    std::cout << data[idx] << " ";
-                }
-                if(MWidth > 1)
-                {
-                    std::cout << std::endl;
-                }
-            }
-            std::cout << std::endl;
-        }
-        std::cout << std::endl;
-    }
-}
-
-void debug_print_tensor_desc(const std::vector<int>& data, const auto& desc)
-{
-    const auto lengths = desc.get_lengths();
-    std::cout << "Using Tensor Descriptor:" << std::endl;
-    for(int c = 0; c < lengths[number<1>{}]; ++c)
-    {
-        std::cout << "Col " << c << ": " << std::endl;
-        for(int r = 0; r < lengths[number<0>{}]; ++r)
-        {
-            for(int m = 0; m < lengths[number<2>{}]; ++m)
-            {
-                std::cout << "Row " << r << " (sub-row " << m << "): ";
-                for(int n = 0; n < lengths[number<3>{}]; ++n)
-                {
-                    const auto block_coord = make_tuple(r, c, m, n);
-                    const auto idx         = desc.calculate_offset(block_coord);
-                    std::cout << data[idx] << " ";
-                }
-                if(lengths[number<2>{}] > 1)
-                {
-                    std::cout << std::endl;
-                }
-            }
-            std::cout << std::endl;
-        }
-        std::cout << std::endl;
-    }
-}
-
 TEST_F(TestTensorDescriptor, RowMajorBlocksWithColumnMajorData_1x4_blocks)
 {
    constexpr index_t MPerBlock = 2;
@@ -119,7 +63,6 @@ TEST_F(TestTensorDescriptor, RowMajorBlocksWithColumnMajorData_1x4_blocks)

    constexpr auto desc = make_blocked_tensor_descriptor<MPerBlock, NPerBlock, Gm>();

-    debug_print_explicit(data, MWidth, NWidth, Gm);
    std::vector<int> data_explicit_indexing(MPerBlock * NPerBlock, -1);
    for(int col = 0; col < Gm; ++col)
    {
@@ -133,7 +76,6 @@ TEST_F(TestTensorDescriptor, RowMajorBlocksWithColumnMajorData_1x4_blocks)
        }
    }

-    debug_print_tensor_desc(data, desc);
    std::vector<int> data_tensor_desc(MPerBlock * NPerBlock, -1);
    for(int col = 0; col < Gm; ++col)
    {
@@ -154,73 +96,3 @@ TEST_F(TestTensorDescriptor, RowMajorBlocksWithColumnMajorData_1x4_blocks)
    // Verify both methods yield the same result
    EXPECT_EQ(data_explicit_indexing, data_tensor_desc);
 }
-
-TEST_F(TestTensorDescriptor, RowMajorBlocksWithColumnMajorData_2x4_blocks)
-{
-    constexpr index_t MPerBlock = 4;
-    constexpr index_t NPerBlock = 8;
-    constexpr index_t Gm        = 2;
-    constexpr index_t MWidth    = MPerBlock / Gm;
-    constexpr index_t NWidth    = NPerBlock / Gm;
-
-    // This data represents a 4x8 matrix divided into 2x2 blocks of size 2x4 each
-    // Block structure:
-    // Block(0,0) | Block(0,1)
-    // ----------------------
-    // Block(1,0) | Block(1,1)
-    // clang-format off
-    std::vector<int> data {
-        0,  4,  8,   12,  1,  5,  9, 13,    
-        2,  6,  10,  14,  3,  7, 11, 15,     
-        16, 20, 24, 28,  17, 21, 25, 29,
-        18, 22, 26, 30,  19, 23, 27, 31
-    };
-    // clang-format on
-
-    constexpr auto desc = make_blocked_tensor_descriptor<MPerBlock, NPerBlock, Gm>();
-
-    debug_print_explicit(data, MWidth, NWidth, Gm);
-    debug_print_tensor_desc(data, desc);
-}
-
-TEST_F(TestTensorDescriptor, GetSubBlockWithVectorizedAccess)
-{
-    constexpr index_t MPerBlock = 4;
-    constexpr index_t NPerBlock = 8;
-    constexpr index_t Gm        = 2;
-
-    // This data represents a 4x8 matrix divided into 2x2 blocks of size 2x4 each
-    // Block structure:
-    // Block(0,0) | Block(0,1)
-    // ----------------------
-    // Block(1,0) | Block(1,1)
-    // clang-format off
-    std::vector<int> data_vec {
-        0,  4,  8,   12,  1,  5,  9, 13,    
-        2,  6,  10,  14,  3,  7, 11, 15,     
-        16, 20, 24, 28,  17, 21, 25, 29,
-        18, 22, 26, 30,  19, 23, 27, 31
-    };
-    // clang-format on
-
-    constexpr auto desc = make_blocked_tensor_descriptor<MPerBlock, NPerBlock, Gm>();
-
-    const auto tensor_view = make_tensor_view(reinterpret_cast<int4*>(data_vec.data()), desc);
-
-    const auto base_addr    = make_multi_index(number<1>{}, number<1>{}, number<0>{}, number<0>{});
-    const auto block_offset = make_tensor_coordinate(desc, base_addr);
-
-    // First row of sub-block (1,1)
-    const auto row1 = tensor_view.get_vectorized_elements<int4>(block_offset, 0);
-    EXPECT_EQ(row1.x, 20);
-    EXPECT_EQ(row1.y, 21);
-    EXPECT_EQ(row1.z, 22);
-    EXPECT_EQ(row1.w, 23);
-
-    // Second row of sub-block (1,1)
-    const auto row2 = tensor_view.get_vectorized_elements<int4>(block_offset, 1);
-    EXPECT_EQ(row2.x, 28);
-    EXPECT_EQ(row2.y, 29);
-    EXPECT_EQ(row2.z, 30);
-    EXPECT_EQ(row2.w, 31);
-}
--- a/test/ck_tile/tensor_view/test_tensor_view.cpp
+++ b/test/ck_tile/tensor_view/test_tensor_view.cpp
@@ -712,44 +712,12 @@ __global__ void test_4x4_matrix_get_2x2_blocks_with_sfc_and_lds_kernel(int* inpu
    store_tile(in_lds_window, lds_tile);
    block_sync_lds();

-    // Print the contents of LDS
-    if(threadIdx.x == 0 && blockIdx.x == 0)
-    {
-        printf("LDS contents:\n");
-        int* lds_data = reinterpret_cast<int*>(p_smem);
-        for(index_t i = 0; i < 4; i++)
-        {
-            for(index_t j = 0; j < 4; j++)
-            {
-                printf("%3d ", lds_data[i * 4 + j]);
-            }
-            printf("\n");
-        }
-    }
-
    // For the output tensor, we need to copy only the diagonal 2x2 blocks to global memory.
    static_for<0, NumGroupsToMerge, 1>{}([&](auto group) {
        auto out_tensor = load_tile(make_tile_window(out_lds_window, output_tile_distribution));

        store_tile(output_window, out_tensor);

-        // Print the output tensor contents.
-        __syncthreads();
-        if(threadIdx.x == 0 && blockIdx.x == 0)
-        {
-
-            printf("Output tensor contents after loading group %d:\n", group.value);
-            for(index_t i = 0; i < 4; i++)
-            {
-                for(index_t j = 0; j < 2; j++)
-                {
-                    printf("%3d", output[i * 2 + j]);
-                }
-                printf("\n");
-            }
-        }
-        __syncthreads();
-
        // Moving output window works correctly.
        if constexpr(group != NumGroupsToMerge - 1)
        {