From b9780eb8a17f8ecafed1ac3e0c65a04ba2a545bf Mon Sep 17 00:00:00 2001 From: "Graner, Johannes" Date: Mon, 22 Dec 2025 11:34:00 -0500 Subject: [PATCH] Only use both offset hacks at the same time --- .../gpu/device/impl/split_k_offset_utils.hpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/include/ck/tensor_operation/gpu/device/impl/split_k_offset_utils.hpp b/include/ck/tensor_operation/gpu/device/impl/split_k_offset_utils.hpp index bdce7fe6a4..c2b0d48225 100644 --- a/include/ck/tensor_operation/gpu/device/impl/split_k_offset_utils.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/split_k_offset_utils.hpp @@ -1,5 +1,5 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once @@ -73,16 +73,15 @@ struct SplitKHackEligibility const bool is_a_compact = IsDescriptorCompact(a_desc); const bool is_b_compact = IsDescriptorCompact(b_desc); - // Determine hack flags based on all conditions - const bool split_k_offset_a_hack = can_divide_n_spatial_by_k_batch && is_k_not_paded && - is_correct_layout && is_a_stride_divisible && - is_a_compact; + // Require BOTH A and B to be eligible for the hack to avoid KBatch dimension mismatch + // The gridwise kernel's CheckValidity requires A.KBatch == B.KBatch, so we must + // ensure symmetric hack flags to maintain kernel applicability + const bool both_eligible = can_divide_n_spatial_by_k_batch && can_divide_n_by_k_batch && + is_k_not_paded && is_correct_layout && is_a_stride_divisible && + is_b_stride_divisible && is_a_compact && is_b_compact; - const bool split_k_offset_b_hack = can_divide_n_by_k_batch && is_k_not_paded && - is_correct_layout && is_b_stride_divisible && - is_b_compact; - - return std::make_pair(split_k_offset_a_hack, split_k_offset_b_hack); + // Return symmetric flags - both enabled or both disabled + return std::make_pair(both_eligible, both_eligible); } };