mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-05 06:01:23 +00:00
Support for dtypes (fp8, bf8, bf16 and fp16) for the ck_tile/03_gemm example. (#1845)
* Support bf16/fb8/bf8 datatypes for ck_tile/gemm * remove commented out code. * Addressing code review comments and enabling universal_gemm for all the supported data types. * Merge conflict resolution. * Solve the memory pipeline compilation error. Merge with the new change of CShuffle * finish the feature, pass the tests * Fix the pipeline and add the benchmark script for other data types --------- Co-authored-by: ThomasNing <thomas.ning@amd.com>
This commit is contained in:
@@ -77,6 +77,7 @@ struct CShuffleEpilogue
|
||||
*
|
||||
* @return The vector store size for C tensor.
|
||||
*/
|
||||
template <typename ODataType>
|
||||
CK_TILE_HOST_DEVICE static constexpr auto GetVectorSizeC()
|
||||
{
|
||||
constexpr index_t MaxVectorStoreSize = 16;
|
||||
@@ -142,7 +143,7 @@ struct CShuffleEpilogue
|
||||
TileDistributionEncodingPattern2D<kBlockSize,
|
||||
kMPerIteration,
|
||||
kNPerIteration,
|
||||
GetVectorSizeC(),
|
||||
GetVectorSizeC<ODataType>(),
|
||||
tile_distribution_pattern::thread_raked>;
|
||||
constexpr auto dram_tile_distribution = TileEncodingPattern::Make2DStaticTileDistribution();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user