fix grid bug

This commit is contained in:
Gino Lu
2025-10-20 21:53:13 -05:00
parent 69ed924c36
commit 4da620cc9d
3 changed files with 12 additions and 10 deletions

View File

@@ -679,10 +679,12 @@ std::enable_if_t<(std::is_same_v<ranges::range_value_t<Range>, ranges::range_val
auto update_err = [&](pk_fp4_raw_t o, pk_fp4_raw_t r, std::size_t index) {
if(o != r)
{
std::cerr << msg << " out[" << index << "] != ref[" << index
<< "]: " << type_convert<float>(pk_fp4_t{o})
<< " != " << type_convert<float>(pk_fp4_t{r}) << std::endl;
++err_count;
if(err_count++ < ERROR_DETAIL_LIMIT)
{
std::cerr << msg << " out[" << index << "] != ref[" << index
<< "]: " << type_convert<float>(pk_fp4_t{o})
<< " != " << type_convert<float>(pk_fp4_t{r}) << std::endl;
}
}
};

View File

@@ -151,7 +151,7 @@ struct AsyncLSKernel
auto lds_0_window =
make_tile_window(lds_0_tensor_view,
make_tuple(number<Policy::kMPerBlock>{}, number<Policy::kNPerBlock>{}),
{i_m, i_n},
{0, 0},
Policy::MakeDRAMDistribution());
#if 0
auto dram_tile = load_tile(a_block_window);

View File

@@ -36,6 +36,9 @@ float load_store_tile(const ck_tile::AsyncLSKernelArgs& args, const ck_tile::str
float ave_time = ck_tile::launch_kernel(
s, ck_tile::make_kernel<kBlockPerCu>(Kernel{}, grids, blocks, 0, args));
std::cout << "Run Load_Store_Tile with kernel " << M_Tile << "x" << N_Tile << ", input "
<< args.M << "x" << args.N << ": " << ave_time << " ms, \n";
return ave_time;
}
@@ -50,17 +53,14 @@ float invoke_load_store_tile(ck_tile::DeviceMem& a_dev_buf,
auto sc = ck_tile::stream_config{nullptr, true, 1, 0, 1, true, true, 1};
float ave_time = load_store_tile<DataType>(args, sc);
std::cout << "Run Load_Store_Tile kernel with M=" << M << " N=" << N << " : " << ave_time
<< " ms, \n";
return ave_time;
}
template <typename DataType>
bool run_load_store_tile()
{
constexpr size_t m = 32;
constexpr size_t n = 256;
constexpr size_t m = 64;
constexpr size_t n = 512;
constexpr size_t s = 1;
ck_tile::HostTensor<DataType> a_m_n({m, n}, {n, s});