diff --git a/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp b/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp index 4682f636eb..375e465721 100644 --- a/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp +++ b/experimental/builder/include/ck_tile/builder/reflect/conv_description.hpp @@ -98,7 +98,7 @@ struct ConvDescription f.writeLine(2, "Weights elementwise operation: ", signature.weight_element_op); f.writeLast(2, "Output elementwise operation: ", signature.output_element_op); - f.writeLine(1, "Algorithm"); + f.writeLast(1, "Algorithm"); // Compute Block section f.writeLine(2, "Thread block size: ", algorithm.thread_block_size); f.writeLine(2, @@ -123,7 +123,7 @@ struct ConvDescription algorithm.warp_gemm.n_iter); // Memory Access section - f.writeLine(2, "Memory access:"); + f.writeLast(2, "Memory access:"); f.writeLine(3, "A Tile transfer: "); f.writeLine(4, @@ -219,8 +219,6 @@ struct ConvDescription f.writeLast(4, "Vector access (GMEM write) instruction size: ", algorithm.c_tile_transfer.scalar_per_vector); - f.writeLast(2); - f.writeLast(1); return f.getString(); } diff --git a/experimental/builder/test/test_conv_description.cpp b/experimental/builder/test/test_conv_description.cpp index b83abe9f43..933995730a 100644 --- a/experimental/builder/test/test_conv_description.cpp +++ b/experimental/builder/test/test_conv_description.cpp @@ -127,41 +127,39 @@ TEST(ConvDescriptionTest, DefaultInstanceHasDetailedDescription) "│ ├─ Input elementwise operation: PASS_THROUGH\n" "│ ├─ Weights elementwise operation: PASS_THROUGH\n" "│ └─ Output elementwise operation: PASS_THROUGH\n" - "├─ Algorithm\n" - "│ ├─ Thread block size: 256\n" - "│ ├─ Data tile size: 256×256×32\n" - "│ ├─ Gemm padding: DEFAULT\n" - "│ ├─ Convolution specialization: DEFAULT\n" - "│ ├─ Pipeline version: V4\n" - "│ ├─ Pipeline scheduler: INTRAWAVE\n" - "│ ├─ Warp Gemm parameters: \n" - "│ │ ├─ subtile size: 16×16\n" - "│ │ └─ Number of warp gemm iterations: 4×4\n" - "│ ├─ Memory access:\n" - "│ │ ├─ A Tile transfer: \n" - "│ │ │ ├─ Tile dimensions: 4×256×8×\n" - "│ │ │ ├─ The innermost K subdimension size: 8\n" - "│ │ │ ├─ Spatial thread distribution over the data tile: 0×1×2\n" - "│ │ │ ├─ The order of accessing data tile axes: 0×1×2\n" - "│ │ │ ├─ Vectorized memory access axis index (with contiguous memory): 2\n" - "│ │ │ ├─ Vector access (GMEM read) instruction size: 8\n" - "│ │ │ ├─ Vector access (LDS write) instruction size: 8\n" - "│ │ │ └─ LDS data layout padding (to prevent bank conflicts): 8\n" - "│ │ ├─ B Tile transfer: \n" - "│ │ │ ├─ Tile dimensions: 4×256×8×\n" - "│ │ │ ├─ The innermost K subdimension size: 8\n" - "│ │ │ ├─ Spatial thread distribution over the data tile: 0×1×2\n" - "│ │ │ ├─ The order of accessing data tile axes: 0×1×2\n" - "│ │ │ ├─ Vectorized memory access axis index (with contiguous memory): 2\n" - "│ │ │ ├─ Vector access (GMEM read) instruction size: 8\n" - "│ │ │ ├─ Vector access (LDS write) instruction size: 8\n" - "│ │ │ └─ LDS data layout padding (to prevent bank conflicts): 8\n" - "│ │ └─ C Tile transfer: \n" - "│ │ ├─ Data shuffle (number of gemm instructions per iteration): 1×1\n" - "│ │ ├─ Spatial thread distribution used to store data: 1×32×1×8\n" - "│ │ └─ Vector access (GMEM write) instruction size: 8\n" - "│ └─ \n" - "└─ ")); + "└─ Algorithm\n" + " ├─ Thread block size: 256\n" + " ├─ Data tile size: 256×256×32\n" + " ├─ Gemm padding: DEFAULT\n" + " ├─ Convolution specialization: DEFAULT\n" + " ├─ Pipeline version: V4\n" + " ├─ Pipeline scheduler: INTRAWAVE\n" + " ├─ Warp Gemm parameters: \n" + " │ ├─ subtile size: 16×16\n" + " │ └─ Number of warp gemm iterations: 4×4\n" + " └─ Memory access:\n" + " ├─ A Tile transfer: \n" + " │ ├─ Tile dimensions: 4×256×8×\n" + " │ ├─ The innermost K subdimension size: 8\n" + " │ ├─ Spatial thread distribution over the data tile: 0×1×2\n" + " │ ├─ The order of accessing data tile axes: 0×1×2\n" + " │ ├─ Vectorized memory access axis index (with contiguous memory): 2\n" + " │ ├─ Vector access (GMEM read) instruction size: 8\n" + " │ ├─ Vector access (LDS write) instruction size: 8\n" + " │ └─ LDS data layout padding (to prevent bank conflicts): 8\n" + " ├─ B Tile transfer: \n" + " │ ├─ Tile dimensions: 4×256×8×\n" + " │ ├─ The innermost K subdimension size: 8\n" + " │ ├─ Spatial thread distribution over the data tile: 0×1×2\n" + " │ ├─ The order of accessing data tile axes: 0×1×2\n" + " │ ├─ Vectorized memory access axis index (with contiguous memory): 2\n" + " │ ├─ Vector access (GMEM read) instruction size: 8\n" + " │ ├─ Vector access (LDS write) instruction size: 8\n" + " │ └─ LDS data layout padding (to prevent bank conflicts): 8\n" + " └─ C Tile transfer: \n" + " ├─ Data shuffle (number of gemm instructions per iteration): 1×1\n" + " ├─ Spatial thread distribution used to store data: 1×32×1×8\n" + " └─ Vector access (GMEM write) instruction size: 8")); } // NOTE: BackwardDataInstanceHasDetailedDescription test is disabled because ConvFactory