mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-29 11:16:59 +00:00
Fix add accuracy issue
2673 GB/s -> 3271 GB/s Perf: 0.0512898 ms, 3271.06 GB/s
This commit is contained in:
@@ -50,7 +50,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
using BlockWarps = ck_tile::sequence<2, 2>;
|
||||
using BlockTile = ck_tile::sequence<2, 1024>;
|
||||
using WarpTile = ck_tile::sequence<1, 512>;
|
||||
using Vector = ck_tile::sequence<1, 4>;
|
||||
using Vector = ck_tile::sequence<1, 8>;
|
||||
|
||||
constexpr ck_tile::index_t kBlockSize = 256;
|
||||
constexpr ck_tile::index_t kBlockPerCu = 1;
|
||||
|
||||
Reference in New Issue
Block a user