diff --git a/include/ck_tile/host/check_err.hpp b/include/ck_tile/host/check_err.hpp index 91d387796f..e8cf559530 100644 --- a/include/ck_tile/host/check_err.hpp +++ b/include/ck_tile/host/check_err.hpp @@ -679,10 +679,12 @@ std::enable_if_t<(std::is_same_v, ranges::range_val auto update_err = [&](pk_fp4_raw_t o, pk_fp4_raw_t r, std::size_t index) { if(o != r) { - std::cerr << msg << " out[" << index << "] != ref[" << index - << "]: " << type_convert(pk_fp4_t{o}) - << " != " << type_convert(pk_fp4_t{r}) << std::endl; - ++err_count; + if(err_count++ < ERROR_DETAIL_LIMIT) + { + std::cerr << msg << " out[" << index << "] != ref[" << index + << "]: " << type_convert(pk_fp4_t{o}) + << " != " << type_convert(pk_fp4_t{r}) << std::endl; + } } }; diff --git a/test/ck_tile/async/kernel.hpp b/test/ck_tile/async/kernel.hpp index cee6756bba..ba5784af1f 100644 --- a/test/ck_tile/async/kernel.hpp +++ b/test/ck_tile/async/kernel.hpp @@ -151,7 +151,7 @@ struct AsyncLSKernel auto lds_0_window = make_tile_window(lds_0_tensor_view, make_tuple(number{}, number{}), - {i_m, i_n}, + {0, 0}, Policy::MakeDRAMDistribution()); #if 0 auto dram_tile = load_tile(a_block_window); diff --git a/test/ck_tile/async/run_test.inc b/test/ck_tile/async/run_test.inc index 04ad5852ca..59570bb553 100644 --- a/test/ck_tile/async/run_test.inc +++ b/test/ck_tile/async/run_test.inc @@ -36,6 +36,9 @@ float load_store_tile(const ck_tile::AsyncLSKernelArgs& args, const ck_tile::str float ave_time = ck_tile::launch_kernel( s, ck_tile::make_kernel(Kernel{}, grids, blocks, 0, args)); + std::cout << "Run Load_Store_Tile with kernel " << M_Tile << "x" << N_Tile << ", input " + << args.M << "x" << args.N << ": " << ave_time << " ms, \n"; + return ave_time; } @@ -50,17 +53,14 @@ float invoke_load_store_tile(ck_tile::DeviceMem& a_dev_buf, auto sc = ck_tile::stream_config{nullptr, true, 1, 0, 1, true, true, 1}; float ave_time = load_store_tile(args, sc); - std::cout << "Run Load_Store_Tile kernel with M=" << M << " N=" << N << " : " << ave_time - << " ms, \n"; - return ave_time; } template bool run_load_store_tile() { - constexpr size_t m = 32; - constexpr size_t n = 256; + constexpr size_t m = 64; + constexpr size_t n = 512; constexpr size_t s = 1; ck_tile::HostTensor a_m_n({m, n}, {n, s});