Update AMD buffer coherency (#3403)

* Update AMD buffer coherency [AICK-421]

* fixes

* fix

* fixes

* fixes

* Add backward compatilibity

* fix

* fixes

* fix

* fix

* fix

* Update grouped_convolution_backward_weight_kernel.hpp
This commit is contained in:
Bartłomiej Kocot
2025-12-18 10:16:22 +01:00
committed by GitHub
parent 15e81397a4
commit 700b2ec9c0
11 changed files with 268 additions and 98 deletions

View File

@@ -1,7 +1,7 @@
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx90a")
if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx90a|gfx11|gfx12")
set(EXAMPLE_CONV_COMPILE_OPTIONS)
list(APPEND EXAMPLE_CONV_COMPILE_OPTIONS -mllvm -enable-noalias-to-md-conversion=0)

View File

@@ -21,6 +21,9 @@ struct GroupedConvolutionBackwardWeightTwoStageInvoker
const ck_tile::stream_config& s)
{
using WorkspaceDataType = float;
// Force Vector Size C to 1 for two stage to check main
// two stage use case
constexpr ck_tile::index_t VectorSizeC = 1;
// Implicit GEMM Traits
using GemmShape = ck_tile::TileGemmShape<
@@ -39,7 +42,7 @@ struct GroupedConvolutionBackwardWeightTwoStageInvoker
OutLayout,
ConvConfig::VectorSizeA,
ConvConfig::VectorSizeB,
ConvConfig::VectorSizeC,
VectorSizeC,
ConvConfig::NumGroupsToMerge>;
using TilePartitioner = ck_tile::GemmSpatiallyLocalTilePartitioner<