From 0f8c7cad09a0e7bab33fc8ec9de34ea4cdcf4b7c Mon Sep 17 00:00:00 2001 From: Max Podkorytov <4273004+tenpercent@users.noreply.github.com> Date: Tue, 3 Feb 2026 02:54:18 -0800 Subject: [PATCH] Remove concrete performance numbers from BUILD_TIME_OPTIMIZATION.md (#3702) Replace specific benchmark numbers with qualitative descriptions since measurements vary across environments and may become outdated. Co-authored-by: Claude [ROCm/composable_kernel commit: 3f04d27b687365332d2f1654f169444cab192927] --- include/ck/BUILD_TIME_OPTIMIZATION.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/ck/BUILD_TIME_OPTIMIZATION.md b/include/ck/BUILD_TIME_OPTIMIZATION.md index 94b292b878..045d4bb929 100644 --- a/include/ck/BUILD_TIME_OPTIMIZATION.md +++ b/include/ck/BUILD_TIME_OPTIMIZATION.md @@ -105,7 +105,7 @@ struct generate_identity_sequence generate_tuple(generate_identity_sequence{}, Number{}); ``` -This reduced `transform_tensor_descriptor` instantiations from 388 to 32 (92% reduction). +This significantly reduces template instantiations for `transform_tensor_descriptor`. **Example: container_concat** @@ -135,7 +135,7 @@ __host__ __device__ constexpr auto container_concat(const Tuple& tx, const } ``` -This reduced `container_concat` instantiations from 186 to 93 (50% reduction). +This reduces `container_concat` template instantiations. **Example: make_uniform_tuple** @@ -192,7 +192,7 @@ __host__ __device__ constexpr index_t find_source_index(Sequence) } ``` -This reduced `sequence_map_inverse` instantiations from 45 to 10 (78% reduction) and wall-clock time by 95%. +This significantly reduces `sequence_map_inverse` instantiations and compile time. ### 4. Use Fold Expressions for Accumulation @@ -222,4 +222,4 @@ __host__ __device__ constexpr auto compute_element_space_size( } ``` -This reduced `calculate_element_space_size` instantiations from 24 to 10 (58% reduction) and wall-clock time by 73%. +This reduces `calculate_element_space_size` instantiations and compile time.