mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-01 20:21:23 +00:00
[CK Tile] enable building examples by default (#3259)
* remove EXCLUDE_FROM_ALL from ck-tile examples -> +15 min build time w/ 64 threads for a single arch * fix cpp17 compile error in the ck-tile examples --------- Co-authored-by: khuagarw <khuagarw@amd.com> Co-authored-by: Ding, Yi <yi.ding@amd.com>
This commit is contained in:
@@ -1,20 +1,22 @@
|
||||
set(EXAMPLE_CONV_COMPILE_OPTIONS)
|
||||
list(APPEND EXAMPLE_CONV_COMPILE_OPTIONS -mllvm -enable-noalias-to-md-conversion=0)
|
||||
if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx90a")
|
||||
set(EXAMPLE_CONV_COMPILE_OPTIONS)
|
||||
list(APPEND EXAMPLE_CONV_COMPILE_OPTIONS -mllvm -enable-noalias-to-md-conversion=0)
|
||||
|
||||
add_executable(tile_example_grouped_conv_fwd EXCLUDE_FROM_ALL grouped_convolution_forward.cpp)
|
||||
target_compile_options(tile_example_grouped_conv_fwd PRIVATE ${EXAMPLE_CONV_COMPILE_OPTIONS})
|
||||
add_executable(tile_example_grouped_conv_fwd grouped_convolution_forward.cpp)
|
||||
target_compile_options(tile_example_grouped_conv_fwd PRIVATE ${EXAMPLE_CONV_COMPILE_OPTIONS})
|
||||
|
||||
add_executable(tile_example_grouped_conv_fwd_large_tensor EXCLUDE_FROM_ALL grouped_convolution_forward_large_tensor.cpp)
|
||||
target_compile_options(tile_example_grouped_conv_fwd_large_tensor PRIVATE ${EXAMPLE_CONV_COMPILE_OPTIONS})
|
||||
add_executable(tile_example_grouped_conv_fwd_large_tensor grouped_convolution_forward_large_tensor.cpp)
|
||||
target_compile_options(tile_example_grouped_conv_fwd_large_tensor PRIVATE ${EXAMPLE_CONV_COMPILE_OPTIONS})
|
||||
|
||||
add_executable(tile_example_grouped_conv_fwd_bias_clamp EXCLUDE_FROM_ALL grouped_convolution_forward_bias_clamp.cpp)
|
||||
target_compile_options(tile_example_grouped_conv_fwd_bias_clamp PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
||||
add_executable(tile_example_grouped_conv_fwd_bias_clamp grouped_convolution_forward_bias_clamp.cpp)
|
||||
target_compile_options(tile_example_grouped_conv_fwd_bias_clamp PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
||||
|
||||
add_executable(tile_example_grouped_conv_bwd_weight EXCLUDE_FROM_ALL grouped_convolution_backward_weight.cpp)
|
||||
target_compile_options(tile_example_grouped_conv_bwd_weight PRIVATE ${EXAMPLE_CONV_COMPILE_OPTIONS})
|
||||
add_executable(tile_example_grouped_conv_bwd_weight grouped_convolution_backward_weight.cpp)
|
||||
target_compile_options(tile_example_grouped_conv_bwd_weight PRIVATE ${EXAMPLE_CONV_COMPILE_OPTIONS})
|
||||
|
||||
add_executable(tile_example_grouped_conv_bwd_weight_two_stage EXCLUDE_FROM_ALL grouped_convolution_backward_weight_two_stage.cpp)
|
||||
target_compile_options(tile_example_grouped_conv_bwd_weight_two_stage PRIVATE ${EXAMPLE_CONV_COMPILE_OPTIONS})
|
||||
add_executable(tile_example_grouped_conv_bwd_weight_two_stage grouped_convolution_backward_weight_two_stage.cpp)
|
||||
target_compile_options(tile_example_grouped_conv_bwd_weight_two_stage PRIVATE ${EXAMPLE_CONV_COMPILE_OPTIONS})
|
||||
|
||||
add_executable(tile_example_grouped_conv_bwd_data EXCLUDE_FROM_ALL grouped_convolution_backward_data.cpp)
|
||||
target_compile_options(tile_example_grouped_conv_bwd_data PRIVATE ${EXAMPLE_CONV_COMPILE_OPTIONS})
|
||||
add_executable(tile_example_grouped_conv_bwd_data grouped_convolution_backward_data.cpp)
|
||||
target_compile_options(tile_example_grouped_conv_bwd_data PRIVATE ${EXAMPLE_CONV_COMPILE_OPTIONS})
|
||||
endif()
|
||||
|
||||
@@ -101,7 +101,6 @@ struct GroupedConvolutionForwardInvoker
|
||||
const ck_tile::index_t num_loop = TilePartitioner::GetLoopNum(K_split);
|
||||
const bool has_hot_loop = BaseGemmPipeline::BlockHasHotloop(num_loop);
|
||||
const ck_tile::TailNumber tail_num = BaseGemmPipeline::GetBlockLoopTailNum(num_loop);
|
||||
float ave_time{0};
|
||||
|
||||
using TransformType =
|
||||
ck_tile::TransformConvFwdToGemm<NDimSpatial,
|
||||
@@ -242,13 +241,15 @@ struct GroupedConvolutionForwardInvoker
|
||||
// =====================================================================
|
||||
// Kernel launch lambda: Uses EnableSplitImage based on layout support
|
||||
// =====================================================================
|
||||
const auto Run = [&]<bool EnableSplitImage>(const auto has_hot_loop_,
|
||||
const auto tail_number_,
|
||||
const auto memory_operation_) {
|
||||
const auto Run = [&](const auto has_hot_loop_,
|
||||
const auto tail_number_,
|
||||
const auto memory_operation_,
|
||||
const auto enable_split_image_) {
|
||||
constexpr bool has_hot_loop_v = has_hot_loop_.value;
|
||||
constexpr auto tail_number_v = tail_number_.value;
|
||||
constexpr auto scheduler = ConvConfig::Scheduler;
|
||||
constexpr auto memory_operation = memory_operation_.value;
|
||||
constexpr bool EnableSplitImage = enable_split_image_.value;
|
||||
|
||||
using GroupedConvTraitsType = std::conditional_t<EnableSplitImage,
|
||||
GroupedConvTraitsTypeLargeTensor,
|
||||
@@ -357,11 +358,9 @@ struct GroupedConvolutionForwardInvoker
|
||||
<< ", Vector size C: " << ConvEpilogue::GetVectorSizeC() << std::endl;
|
||||
}
|
||||
|
||||
ave_time = ck_tile::launch_kernel(
|
||||
return ck_tile::launch_kernel(
|
||||
s,
|
||||
ck_tile::make_kernel<ConvConfig::kBlockPerCu>(Kernel{}, grids, blocks, 0, kargs));
|
||||
|
||||
return ave_time;
|
||||
};
|
||||
|
||||
// =====================================================================
|
||||
@@ -369,28 +368,33 @@ struct GroupedConvolutionForwardInvoker
|
||||
// =====================================================================
|
||||
if(use_split_image)
|
||||
{
|
||||
// Use split-image kernel (Kernel<true>)
|
||||
const auto RunSplitImage = [&](const auto has_hot_loop_, const auto tail_number_) {
|
||||
if(args.k_batch == 1)
|
||||
Run.template operator()<true>(has_hot_loop_, tail_number_, MemoryOpSet{});
|
||||
return Run(
|
||||
has_hot_loop_, tail_number_, MemoryOpSet{}, ck_tile::bool_constant<true>{});
|
||||
else
|
||||
Run.template operator()<true>(has_hot_loop_, tail_number_, MemoryOpAtomicAdd{});
|
||||
return Run(has_hot_loop_,
|
||||
tail_number_,
|
||||
MemoryOpAtomicAdd{},
|
||||
ck_tile::bool_constant<true>{});
|
||||
};
|
||||
BaseGemmPipeline::TailHandler(RunSplitImage, has_hot_loop, tail_num);
|
||||
return BaseGemmPipeline::TailHandler(RunSplitImage, has_hot_loop, tail_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use regular kernel (Kernel<false>)
|
||||
const auto RunRegular = [&](const auto has_hot_loop_, const auto tail_number_) {
|
||||
if(args.k_batch == 1)
|
||||
Run.template operator()<false>(has_hot_loop_, tail_number_, MemoryOpSet{});
|
||||
return Run(has_hot_loop_,
|
||||
tail_number_,
|
||||
MemoryOpSet{},
|
||||
ck_tile::bool_constant<false>{});
|
||||
else
|
||||
Run.template operator()<false>(
|
||||
has_hot_loop_, tail_number_, MemoryOpAtomicAdd{});
|
||||
return Run(has_hot_loop_,
|
||||
tail_number_,
|
||||
MemoryOpAtomicAdd{},
|
||||
ck_tile::bool_constant<false>{});
|
||||
};
|
||||
BaseGemmPipeline::TailHandler(RunRegular, has_hot_loop, tail_num);
|
||||
return BaseGemmPipeline::TailHandler(RunRegular, has_hot_loop, tail_num);
|
||||
}
|
||||
|
||||
return ave_time;
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user