mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
Tile engine for streamk (#3157)
* [CK TILE STREAMK] Introduce initial support for tile engine in streamk GEMM.
- This commit lays the groundwork for integrating the tile engine into streamk GEMM.
It focuses on creating benchmark executables for streamk GEMM.
- Additional scripts like test_benchmark.sh and gemm_benchmark.py will be added once
the streamk implementation reaches stability.
* [CK TILE STREAMK] Enable CI to execute tile engine benchmarks for StreamK GEMM
* [CK TILE STREAMK] Refactor: Extract common utility functions.
* [CK TILE STREAMK] Revise tile engine of streamk to align with the updated implementation
* Add pre-commit
* [CK TILE STREAMK] Add 'dp_persistent' and 'reduction_strategy' in output of CK TILE STREAMK
* [CK TILE STREAMK] Fix a bug about value of 'dp_persistent' of CK TILE STREAMK
* [CK TILE STREAMK] Update Jenkinsfile
* [CK TILE Engine] Update StreamK tile engine help message
Remove default value messages as they are automatically printed
* [CK TILE Engine] Update StreamK tile engine
- Remove namespace reboot
* [CK TILE Engine] Update StreamK tile engine
- Fix merge error
[ROCm/composable_kernel commit: 30727c48fc]
This commit is contained in:
@@ -28,8 +28,7 @@ struct StreamKHostArgs : public ck_tile::UniversalGemmHostArgs<>
|
||||
index_t K_,
|
||||
index_t stride_A_,
|
||||
index_t stride_B_,
|
||||
index_t stride_C_,
|
||||
StreamKReductionStrategy reduction_strategy_)
|
||||
index_t stride_C_)
|
||||
: UniversalGemmHostArgs<>({a_ptr_},
|
||||
{b_ptr_},
|
||||
{/*ds_ptr*/},
|
||||
@@ -41,12 +40,9 @@ struct StreamKHostArgs : public ck_tile::UniversalGemmHostArgs<>
|
||||
{stride_A_},
|
||||
{stride_B_},
|
||||
{/*stride_Ds_*/},
|
||||
stride_C_),
|
||||
reduction_strategy{reduction_strategy_}
|
||||
stride_C_)
|
||||
{
|
||||
}
|
||||
|
||||
ck_tile::StreamKReductionStrategy reduction_strategy;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -133,7 +129,6 @@ struct StreamKKernel
|
||||
host_args.stride_Ds,
|
||||
host_args.stride_E,
|
||||
host_args.k_batch},
|
||||
reduction_strategy{host_args.reduction_strategy},
|
||||
// The workspace pointer is set to nullptr because we must first
|
||||
// instantiate the TilePartitioner to get the necessary size
|
||||
workspace_ptr{nullptr},
|
||||
@@ -141,10 +136,6 @@ struct StreamKKernel
|
||||
|
||||
{
|
||||
}
|
||||
/**
|
||||
* @brief The strategy used by work groups to compute final results in C tensor.
|
||||
*/
|
||||
StreamKReductionStrategy reduction_strategy;
|
||||
/**
|
||||
* @brief A pointer to a buffer in device memory for accumulating partial via reduction
|
||||
* strategy.
|
||||
|
||||
Reference in New Issue
Block a user