From d635209d5933a9bee2d1e37e41606df17807a628 Mon Sep 17 00:00:00 2001 From: bobofang Date: Wed, 19 Mar 2025 12:26:30 +0800 Subject: [PATCH] Fix add accuracy issue 2673 GB/s -> 3271 GB/s Perf: 0.0512898 ms, 3271.06 GB/s --- example/ck_tile/99_toy_example/01_add/add.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/ck_tile/99_toy_example/01_add/add.cpp b/example/ck_tile/99_toy_example/01_add/add.cpp index 604d753a4b..476e8b0a27 100644 --- a/example/ck_tile/99_toy_example/01_add/add.cpp +++ b/example/ck_tile/99_toy_example/01_add/add.cpp @@ -50,7 +50,7 @@ bool run(const ck_tile::ArgParser& arg_parser) using BlockWarps = ck_tile::sequence<2, 2>; using BlockTile = ck_tile::sequence<2, 1024>; using WarpTile = ck_tile::sequence<1, 512>; - using Vector = ck_tile::sequence<1, 4>; + using Vector = ck_tile::sequence<1, 8>; constexpr ck_tile::index_t kBlockSize = 256; constexpr ck_tile::index_t kBlockPerCu = 1;