mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-18 20:09:25 +00:00
[CK_TILE] Split-K autodeduction (#3351)
* First version of split-K autodeduction.
* Fix circular dependency and kernel construction.
* Fix tolerance calculation for bwd weight example.
* Simplify kernel construction.
* Fix kernel launching bug for split-K autodeduce.
* Add split-K autodeduction support for the two stage example.
* Fix a corner case.
* Fix clang-format.
* Fix clang-format for inc files.
* Add missing header.
* Prevent too large split-K values.
* Fix formatting.
* Add unit tests for IsSupportedArgument in grouped bwd conv.
* clang-format.
* Fix merge conflicts.
* Address feedback from code review.
* clang-format
* Fix new tests after merge.
---------
Co-authored-by: Ville Pietilä <>
[ROCm/composable_kernel commit: fc22320d78]
This commit is contained in:
@@ -70,6 +70,24 @@ inline bool is_load_tr_supported()
|
||||
// Check if load transpose is supported.
|
||||
return get_device_name() == "gfx950";
|
||||
}
|
||||
|
||||
inline size_t get_num_cus()
|
||||
{
|
||||
hipDeviceProp_t props{};
|
||||
int device;
|
||||
auto status = hipGetDevice(&device);
|
||||
if(status != hipSuccess)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
status = hipGetDeviceProperties(&props, device);
|
||||
if(status != hipSuccess)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return static_cast<size_t>(props.multiProcessorCount);
|
||||
}
|
||||
|
||||
} // namespace ck_tile
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user