mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
[CK_TILE] fix formatting of pooling in ckTileEngine with clang-format
This commit is contained in:
@@ -83,8 +83,7 @@ struct PoolProblem
|
||||
<< " \"rightPadY\": " << problem.rightPadY << ",\n"
|
||||
<< " \"rightPadX\": " << problem.rightPadX << ",\n"
|
||||
<< " \"outputIndex\": " << (problem.outputIndex ? "true" : "false") << ",\n"
|
||||
<< " \"propagateNan\": " << (problem.propagateNan ? "true" : "false")
|
||||
<< "\n"
|
||||
<< " \"propagateNan\": " << (problem.propagateNan ? "true" : "false") << "\n"
|
||||
<< "}";
|
||||
return os;
|
||||
}
|
||||
@@ -175,8 +174,8 @@ bool compare_pool_results(std::string instanceName,
|
||||
{
|
||||
bool pass = ck_tile::check_err(out_dev_result, out_host_result, "Error: Incorrect results!");
|
||||
|
||||
std::cout << "For " << instanceName << " verification result is: "
|
||||
<< (pass ? "correct" : "fail") << std::endl;
|
||||
std::cout << "For " << instanceName
|
||||
<< " verification result is: " << (pass ? "correct" : "fail") << std::endl;
|
||||
|
||||
return pass;
|
||||
}
|
||||
@@ -189,8 +188,8 @@ bool compare_pool_index_results(std::string instanceName,
|
||||
bool pass = ck_tile::check_err(
|
||||
out_index_dev_result, out_index_host_result, "Error: Incorrect index results!");
|
||||
|
||||
std::cout << "For " << instanceName << " index verification result is: "
|
||||
<< (pass ? "correct" : "fail") << std::endl;
|
||||
std::cout << "For " << instanceName
|
||||
<< " index verification result is: " << (pass ? "correct" : "fail") << std::endl;
|
||||
|
||||
return pass;
|
||||
}
|
||||
|
||||
@@ -24,8 +24,7 @@
|
||||
inline auto create_args(int argc, char* argv[])
|
||||
{
|
||||
ck_tile::ArgParser arg_parser;
|
||||
arg_parser
|
||||
.insert("N", "2", "Batch size N dimension. Default is 2.")
|
||||
arg_parser.insert("N", "2", "Batch size N dimension. Default is 2.")
|
||||
.insert("D", "30", "Depth D dimension (for 3D pooling). Default is 30.")
|
||||
.insert("H", "30", "Height H dimension. Default is 30.")
|
||||
.insert("W", "30", "Width W dimension. Default is 30.")
|
||||
@@ -49,15 +48,15 @@ inline auto create_args(int argc, char* argv[])
|
||||
"0",
|
||||
"The type of validation. Set to 0 for no validation, 1 for validation on CPU. "
|
||||
"Default is 0.")
|
||||
.insert("log",
|
||||
"false",
|
||||
"Whether output kernel instance information or not. Default is false")
|
||||
.insert(
|
||||
"log", "false", "Whether output kernel instance information or not. Default is false")
|
||||
.insert("warmup", "20", "The number of warmup iterations. Default is 20.")
|
||||
.insert("repeat", "100", "The number of benchmark iterations. Default is 100.")
|
||||
.insert("timer", "true", "Whether to use GPU timer. Default is true.")
|
||||
.insert("init",
|
||||
"0",
|
||||
"The method of tensor initialization. 0=random, 1=linear, 2=constant(1). Default is 0.")
|
||||
.insert(
|
||||
"init",
|
||||
"0",
|
||||
"The method of tensor initialization. 0=random, 1=linear, 2=constant(1). Default is 0.")
|
||||
.insert("json_output",
|
||||
"false",
|
||||
"Whether to output results in JSON format only. Default is false");
|
||||
@@ -127,18 +126,20 @@ void run_benchmark(const ck_tile::ArgParser& arg_parser)
|
||||
}
|
||||
|
||||
// Create shapes using ck_tile::make_tuple
|
||||
const auto input_shape = ck_tile::make_tuple(N, D, H, W, C);
|
||||
const auto output_shape = ck_tile::make_tuple(N, Do, Ho, Wo, C);
|
||||
const auto input_strides = ck_tile::make_tuple(D * H * W * C, H * W * C, W * C, C, 1);
|
||||
const auto output_strides = ck_tile::make_tuple(Do * Ho * Wo * C, Ho * Wo * C, Wo * C, C, 1);
|
||||
const auto window_lengths = ck_tile::make_tuple(Z, Y, X);
|
||||
const auto window_strides = ck_tile::make_tuple(Sz, Sy, Sx);
|
||||
const auto input_shape = ck_tile::make_tuple(N, D, H, W, C);
|
||||
const auto output_shape = ck_tile::make_tuple(N, Do, Ho, Wo, C);
|
||||
const auto input_strides = ck_tile::make_tuple(D * H * W * C, H * W * C, W * C, C, 1);
|
||||
const auto output_strides =
|
||||
ck_tile::make_tuple(Do * Ho * Wo * C, Ho * Wo * C, Wo * C, C, 1);
|
||||
const auto window_lengths = ck_tile::make_tuple(Z, Y, X);
|
||||
const auto window_strides = ck_tile::make_tuple(Sz, Sy, Sx);
|
||||
const auto window_dilations = ck_tile::make_tuple(Dz, Dy, Dx);
|
||||
const auto input_left_pads = ck_tile::make_tuple(LeftPz, LeftPy, LeftPx);
|
||||
const auto input_right_pads = ck_tile::make_tuple(RightPz, RightPy, RightPx);
|
||||
|
||||
// Allocate host tensors
|
||||
ck_tile::HostTensor<InDataType> in({N, D, H, W, C}, {D * H * W * C, H * W * C, W * C, C, 1});
|
||||
ck_tile::HostTensor<InDataType> in({N, D, H, W, C},
|
||||
{D * H * W * C, H * W * C, W * C, C, 1});
|
||||
ck_tile::HostTensor<OutDataType> out({N, Do, Ho, Wo, C},
|
||||
{Do * Ho * Wo * C, Ho * Wo * C, Wo * C, C, 1});
|
||||
ck_tile::HostTensor<IndexDataType> out_index(
|
||||
@@ -172,21 +173,19 @@ void run_benchmark(const ck_tile::ArgParser& arg_parser)
|
||||
in_buf.ToDevice(in.data());
|
||||
|
||||
// Create host arguments
|
||||
auto host_args =
|
||||
ck_tile::PoolHostArgs<decltype(input_shape), decltype(window_lengths)>{
|
||||
static_cast<InDataType*>(in_buf.GetDeviceBuffer()),
|
||||
static_cast<OutDataType*>(out_buf.GetDeviceBuffer()),
|
||||
OUTPUT_INDEX ? static_cast<IndexDataType*>(out_index_buf.GetDeviceBuffer())
|
||||
: nullptr,
|
||||
input_shape,
|
||||
output_shape,
|
||||
input_strides,
|
||||
output_strides,
|
||||
window_lengths,
|
||||
window_strides,
|
||||
window_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads};
|
||||
auto host_args = ck_tile::PoolHostArgs<decltype(input_shape), decltype(window_lengths)>{
|
||||
static_cast<InDataType*>(in_buf.GetDeviceBuffer()),
|
||||
static_cast<OutDataType*>(out_buf.GetDeviceBuffer()),
|
||||
OUTPUT_INDEX ? static_cast<IndexDataType*>(out_index_buf.GetDeviceBuffer()) : nullptr,
|
||||
input_shape,
|
||||
output_shape,
|
||||
input_strides,
|
||||
output_strides,
|
||||
window_lengths,
|
||||
window_strides,
|
||||
window_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads};
|
||||
|
||||
auto kernel_args = Kernel::MakeKernelArgs(host_args);
|
||||
|
||||
@@ -335,21 +334,19 @@ void run_benchmark(const ck_tile::ArgParser& arg_parser)
|
||||
|
||||
in_buf.ToDevice(in.data());
|
||||
|
||||
auto host_args =
|
||||
ck_tile::PoolHostArgs<decltype(input_shape), decltype(window_lengths)>{
|
||||
static_cast<InDataType*>(in_buf.GetDeviceBuffer()),
|
||||
static_cast<OutDataType*>(out_buf.GetDeviceBuffer()),
|
||||
OUTPUT_INDEX ? static_cast<IndexDataType*>(out_index_buf.GetDeviceBuffer())
|
||||
: nullptr,
|
||||
input_shape,
|
||||
output_shape,
|
||||
input_strides,
|
||||
output_strides,
|
||||
window_lengths,
|
||||
window_strides,
|
||||
window_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads};
|
||||
auto host_args = ck_tile::PoolHostArgs<decltype(input_shape), decltype(window_lengths)>{
|
||||
static_cast<InDataType*>(in_buf.GetDeviceBuffer()),
|
||||
static_cast<OutDataType*>(out_buf.GetDeviceBuffer()),
|
||||
OUTPUT_INDEX ? static_cast<IndexDataType*>(out_index_buf.GetDeviceBuffer()) : nullptr,
|
||||
input_shape,
|
||||
output_shape,
|
||||
input_strides,
|
||||
output_strides,
|
||||
window_lengths,
|
||||
window_strides,
|
||||
window_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads};
|
||||
|
||||
auto kernel_args = Kernel::MakeKernelArgs(host_args);
|
||||
|
||||
|
||||
@@ -86,68 +86,63 @@ class PoolProfiler
|
||||
|
||||
// Create input/output tensors based on pool dimension (3D: NDHWC, 2D: NHWC)
|
||||
ck_tile::HostTensor<InDataType> in_tensor(
|
||||
pool_problem.poolDim == 3
|
||||
? std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(D),
|
||||
static_cast<std::size_t>(H),
|
||||
static_cast<std::size_t>(W),
|
||||
static_cast<std::size_t>(C)}
|
||||
: std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(H),
|
||||
static_cast<std::size_t>(W),
|
||||
static_cast<std::size_t>(C)});
|
||||
pool_problem.poolDim == 3 ? std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(D),
|
||||
static_cast<std::size_t>(H),
|
||||
static_cast<std::size_t>(W),
|
||||
static_cast<std::size_t>(C)}
|
||||
: std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(H),
|
||||
static_cast<std::size_t>(W),
|
||||
static_cast<std::size_t>(C)});
|
||||
|
||||
ck_tile::HostTensor<OutDataType> out_tensor(
|
||||
pool_problem.poolDim == 3
|
||||
? std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Do),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)}
|
||||
: std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)});
|
||||
pool_problem.poolDim == 3 ? std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Do),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)}
|
||||
: std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)});
|
||||
|
||||
ck_tile::HostTensor<OutDataType> out_host_result(
|
||||
pool_problem.poolDim == 3
|
||||
? std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Do),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)}
|
||||
: std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)});
|
||||
pool_problem.poolDim == 3 ? std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Do),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)}
|
||||
: std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)});
|
||||
|
||||
ck_tile::HostTensor<IndexDataType> out_index_tensor(
|
||||
pool_problem.outputIndex
|
||||
? (pool_problem.poolDim == 3
|
||||
? std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Do),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)}
|
||||
: std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)})
|
||||
: std::vector<std::size_t>{1});
|
||||
pool_problem.outputIndex ? (pool_problem.poolDim == 3
|
||||
? std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Do),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)}
|
||||
: std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)})
|
||||
: std::vector<std::size_t>{1});
|
||||
|
||||
ck_tile::HostTensor<IndexDataType> out_index_host_result(
|
||||
pool_problem.outputIndex
|
||||
? (pool_problem.poolDim == 3
|
||||
? std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Do),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)}
|
||||
: std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)})
|
||||
: std::vector<std::size_t>{1});
|
||||
pool_problem.outputIndex ? (pool_problem.poolDim == 3
|
||||
? std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Do),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)}
|
||||
: std::vector<std::size_t>{static_cast<std::size_t>(N),
|
||||
static_cast<std::size_t>(Ho),
|
||||
static_cast<std::size_t>(Wo),
|
||||
static_cast<std::size_t>(C)})
|
||||
: std::vector<std::size_t>{1});
|
||||
|
||||
// Initialize input tensor
|
||||
if(setting_.init_method_ == 0)
|
||||
@@ -275,8 +270,8 @@ class PoolProfiler
|
||||
std::size_t flop = output_elements * window_size;
|
||||
|
||||
// Calculate memory bandwidth
|
||||
std::size_t num_byte = sizeof(InDataType) * N * D * H * W * C +
|
||||
sizeof(OutDataType) * N * Do * Ho * Wo * C;
|
||||
std::size_t num_byte =
|
||||
sizeof(InDataType) * N * D * H * W * C + sizeof(OutDataType) * N * Do * Ho * Wo * C;
|
||||
|
||||
// Update performance results
|
||||
kernel_instance.perf_result_.latency_ = avg_time;
|
||||
@@ -357,15 +352,12 @@ class PoolProfiler
|
||||
{
|
||||
file << "rocm_version,device_name,"
|
||||
<< "in_dtype,out_dtype,compute_dtype,index_dtype,"
|
||||
<< "block_shape,reduce_op,pool_dim,"
|
||||
<< "N,D,H,W,C,"
|
||||
<< "window_z,window_y,window_x,"
|
||||
<< "stride_z,stride_y,stride_x,"
|
||||
<< "block_shape,reduce_op,pool_dim," << "N,D,H,W,C,"
|
||||
<< "window_z,window_y,window_x," << "stride_z,stride_y,stride_x,"
|
||||
<< "dilation_z,dilation_y,dilation_x,"
|
||||
<< "left_pad_z,left_pad_y,left_pad_x,"
|
||||
<< "right_pad_z,right_pad_y,right_pad_x,"
|
||||
<< "output_index,propagate_nan," << "name,"
|
||||
<< "latency(ms),tflops(TFlops),bandwidth(GB/s),metric\n";
|
||||
<< "right_pad_z,right_pad_y,right_pad_x," << "output_index,propagate_nan,"
|
||||
<< "name," << "latency(ms),tflops(TFlops),bandwidth(GB/s),metric\n";
|
||||
}
|
||||
|
||||
const auto& problem = kernel_instance.problem_;
|
||||
|
||||
Reference in New Issue
Block a user