Merge commit '700b2ec9c02da8d367ebe8a223a6dbf16622db09' into develop

This commit is contained in:
assistant-librarian[bot]
2025-12-18 10:15:48 +00:00
parent 792c3eb377
commit cef729b554
11 changed files with 268 additions and 98 deletions

View File

@@ -1,7 +1,7 @@
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx90a")
if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx90a|gfx11|gfx12")
set(EXAMPLE_CONV_COMPILE_OPTIONS)
list(APPEND EXAMPLE_CONV_COMPILE_OPTIONS -mllvm -enable-noalias-to-md-conversion=0)

View File

@@ -21,6 +21,9 @@ struct GroupedConvolutionBackwardWeightTwoStageInvoker
const ck_tile::stream_config& s)
{
using WorkspaceDataType = float;
// Force Vector Size C to 1 for two stage to check main
// two stage use case
constexpr ck_tile::index_t VectorSizeC = 1;
// Implicit GEMM Traits
using GemmShape = ck_tile::TileGemmShape<
@@ -39,7 +42,7 @@ struct GroupedConvolutionBackwardWeightTwoStageInvoker
OutLayout,
ConvConfig::VectorSizeA,
ConvConfig::VectorSizeB,
ConvConfig::VectorSizeC,
VectorSizeC,
ConvConfig::NumGroupsToMerge>;
using TilePartitioner = ck_tile::GemmSpatiallyLocalTilePartitioner<