mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-19 04:19:36 +00:00
Fix split N for large images in groupd conv fwd (#2004)
* Fix split N for large images in groupd conv fwd
* Fix comments
[ROCm/composable_kernel commit: 5b0873c31a]
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
@@ -106,9 +106,10 @@ struct TransformConvBwdDataToGemm_v1
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not possible to support even after split N.
|
||||
// Too large tensor.
|
||||
return N;
|
||||
// Split Convolution's N dimension into N workgroups. However
|
||||
// this still might not result in sufficiently small tensor,
|
||||
// but at least later on we could divide the image as well.
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
@@ -83,9 +83,10 @@ struct TransformConvFwdToGemm
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not possible to support even after split N.
|
||||
// Too large tensor.
|
||||
return N;
|
||||
// Split Convolution's N dimension into N workgroups. However
|
||||
// this still might not result in sufficiently small tensor,
|
||||
// but at least later on we could divide the image as well.
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user