Fix add accuracy issue

2673 GB/s -> 3271 GB/s
Perf: 0.0512898 ms, 3271.06 GB/s
This commit is contained in:
bobofang
2025-03-19 12:26:30 +08:00
parent ff15e2da7a
commit d635209d59

View File

@@ -50,7 +50,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
using BlockWarps = ck_tile::sequence<2, 2>;
using BlockTile = ck_tile::sequence<2, 1024>;
using WarpTile = ck_tile::sequence<1, 512>;
using Vector = ck_tile::sequence<1, 4>;
using Vector = ck_tile::sequence<1, 8>;
constexpr ck_tile::index_t kBlockSize = 256;
constexpr ck_tile::index_t kBlockPerCu = 1;