Revert "[CK_tile] Add rotating buffer feature for universal gemm (#2200)" (#2256)

This reverts commit 99857e10e6.
This commit is contained in:
Illia Silin
2025-05-28 08:46:52 -07:00
committed by GitHub
parent 4286eae09a
commit bbdaf79a52
17 changed files with 74 additions and 409 deletions

View File

@@ -42,8 +42,6 @@ rm -rf tile_engine/ && ninja benchmark_gemm # rebuild
-repeat The number of iterations to benchmark the kernel. Default is 100.
-timer Whether if the timer is gpu timer or not. Possible values are true or false. Default is true.
-init The method of tensor initialization. Set to 0 for random, to 1 for linear, or 2 for constant(1). Default is 0, random.
-flush_cache To flush cache in between different runs.Possible values are true or false. Default is false.
-rotating_count count to flush cache. Default is 5.
-metric Metric with which to measure kernel performance. Set to 0 for latency, 1 for tflops, or 2 for bandwidth. Default is 0, latency.
-csv_filename The filename of benchmark result. Default is gemm_kernel.
-structured_sparsity whether use sparsity kernel or not. Possible values are true or false. Default is false.

View File

@@ -26,15 +26,15 @@ void benchmark_gemm(const ck_tile::ArgParser& arg_parser)
CLayout::name,
arg_parser.get_bool("structured_sparsity")};
Setting setting{arg_parser.get_int("warmup"),
arg_parser.get_int("repeat"),
arg_parser.get_bool("timer"),
arg_parser.get_int("verify"),
arg_parser.get_int("init"),
arg_parser.get_bool("log"),
arg_parser.get_str("csv_filename"),
arg_parser.get_bool("flush_cache"),
arg_parser.get_int("rotating_count")};
Setting setting{
arg_parser.get_int("warmup"),
arg_parser.get_int("repeat"),
arg_parser.get_bool("timer"),
arg_parser.get_int("verify"),
arg_parser.get_int("init"),
arg_parser.get_bool("log"),
arg_parser.get_str("csv_filename"),
};
auto& profiler = GemmProfiler::instance(setting);

View File

@@ -125,8 +125,6 @@ struct Setting
int init_method_;
bool log_;
std::string csv_filename_;
bool flush_cache_;
int rotating_count_;
};
inline std::string get_rocm_version()

5
tile_engine/ops/gemm/gemm_host_api.hpp Normal file → Executable file
View File

@@ -93,11 +93,6 @@ inline auto create_args(int argc, char* argv[])
"0",
"The method of tensor initialization. Set to 0 for random, to 1 for linear, or 2 "
"for constant(1). Default is 0, random.")
.insert("flush_cache",
"false",
"To flush cache, possible values are true or false. "
"Default is false.")
.insert("rotating_count", "5", "number of iterations to rotate the cache. default is 5.")
.insert("metric",
"0",
"Metric with which to measure kernel performance. Set to 0 for latency, 1 for "

View File

@@ -273,52 +273,9 @@ struct GemmKernel {{
<< std::endl;
}}
if(stream.flush_cache_)
{{
std::cout << "Flushing cache..." << std::endl;
static constexpr ck_tile::index_t APackedSize =
std::is_same_v<BDataType, ck_tile::pk_int4_t> ? 2 : 1;
static constexpr ck_tile::index_t BPackedSize =
std::is_same_v<BDataType, ck_tile::pk_int4_t> ? 2 : 1;
auto is_row_major = [](auto layout_) {{
return ck_tile::bool_constant<std::is_same_v<ck_tile::remove_cvref_t<decltype(layout_)>,
ck_tile::tensor_layout::gemm::RowMajor>>{{}};
}};
ck_tile::HostTensor<ADataType> a_m(ck_tile::host_tensor_descriptor(
args.M, args.K, args.stride_A, is_row_major(ALayout{{}})));
ck_tile::HostTensor<BDataType> b_n(ck_tile::host_tensor_descriptor(
args.K, args.N, args.stride_B, is_row_major(BLayout{{}})));
auto size_a_buffer = a_m.get_element_space_size_in_bytes() / APackedSize;
auto size_b_buffer = b_n.get_element_space_size_in_bytes() / BPackedSize;
ck_tile::RotatingMemWrapper<ADataType, BDataType> rotating_mem(
kargs.a_ptr, kargs.b_ptr, stream.rotating_count_, size_a_buffer, size_b_buffer);
rotating_mem.Print();
auto run_flush_cache = [&]() {{
// flush icache
ck_tile::flush_icache();
// rotating mem
rotating_mem.Next();
// clear c mem
if(args.k_batch > 1)
hipGetErrorString(hipMemsetAsync(
args.c_ptr, 0, args.M * args.N * sizeof(CDataType), stream.stream_id_));
}};
ave_time = ck_tile::launch_kernel_preprocess(
stream,
run_flush_cache,
ck_tile::make_kernel<blocks.x, kBlockPerCu>(
Kernel{{}}, grids, blocks, 0, kargs));
}}
else{{
ave_time = ck_tile::launch_kernel(stream,
ave_time = ck_tile::launch_kernel(stream,
ck_tile::make_kernel<blocks.x, kBlockPerCu>(
Kernel{{}}, grids, blocks, 0, kargs));
}}
return ave_time;
}};

View File

@@ -128,9 +128,7 @@ class GemmProfiler
setting_.log_,
setting_.n_warmup_,
setting_.n_repeat_,
setting_.is_gpu_timer_,
setting_.flush_cache_,
setting_.rotating_count_});
setting_.is_gpu_timer_});
process_result(gemm_problem,
c_m_n_dev_buf,
c_m_n_host_result,