mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-30 03:37:38 +00:00
fix grid bug
This commit is contained in:
@@ -679,10 +679,12 @@ std::enable_if_t<(std::is_same_v<ranges::range_value_t<Range>, ranges::range_val
|
||||
auto update_err = [&](pk_fp4_raw_t o, pk_fp4_raw_t r, std::size_t index) {
|
||||
if(o != r)
|
||||
{
|
||||
std::cerr << msg << " out[" << index << "] != ref[" << index
|
||||
<< "]: " << type_convert<float>(pk_fp4_t{o})
|
||||
<< " != " << type_convert<float>(pk_fp4_t{r}) << std::endl;
|
||||
++err_count;
|
||||
if(err_count++ < ERROR_DETAIL_LIMIT)
|
||||
{
|
||||
std::cerr << msg << " out[" << index << "] != ref[" << index
|
||||
<< "]: " << type_convert<float>(pk_fp4_t{o})
|
||||
<< " != " << type_convert<float>(pk_fp4_t{r}) << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -151,7 +151,7 @@ struct AsyncLSKernel
|
||||
auto lds_0_window =
|
||||
make_tile_window(lds_0_tensor_view,
|
||||
make_tuple(number<Policy::kMPerBlock>{}, number<Policy::kNPerBlock>{}),
|
||||
{i_m, i_n},
|
||||
{0, 0},
|
||||
Policy::MakeDRAMDistribution());
|
||||
#if 0
|
||||
auto dram_tile = load_tile(a_block_window);
|
||||
|
||||
@@ -36,6 +36,9 @@ float load_store_tile(const ck_tile::AsyncLSKernelArgs& args, const ck_tile::str
|
||||
float ave_time = ck_tile::launch_kernel(
|
||||
s, ck_tile::make_kernel<kBlockPerCu>(Kernel{}, grids, blocks, 0, args));
|
||||
|
||||
std::cout << "Run Load_Store_Tile with kernel " << M_Tile << "x" << N_Tile << ", input "
|
||||
<< args.M << "x" << args.N << ": " << ave_time << " ms, \n";
|
||||
|
||||
return ave_time;
|
||||
}
|
||||
|
||||
@@ -50,17 +53,14 @@ float invoke_load_store_tile(ck_tile::DeviceMem& a_dev_buf,
|
||||
auto sc = ck_tile::stream_config{nullptr, true, 1, 0, 1, true, true, 1};
|
||||
float ave_time = load_store_tile<DataType>(args, sc);
|
||||
|
||||
std::cout << "Run Load_Store_Tile kernel with M=" << M << " N=" << N << " : " << ave_time
|
||||
<< " ms, \n";
|
||||
|
||||
return ave_time;
|
||||
}
|
||||
|
||||
template <typename DataType>
|
||||
bool run_load_store_tile()
|
||||
{
|
||||
constexpr size_t m = 32;
|
||||
constexpr size_t n = 256;
|
||||
constexpr size_t m = 64;
|
||||
constexpr size_t n = 512;
|
||||
constexpr size_t s = 1;
|
||||
|
||||
ck_tile::HostTensor<DataType> a_m_n({m, n}, {n, s});
|
||||
|
||||
Reference in New Issue
Block a user