mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 05:31:24 +00:00
[rocm-libraries] ROCm/rocm-libraries#4302 (commit e62bd8a)
[CK_TILE] add tf32 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Proposed changes TF32 is added in CK on gfx942 and gfx950. This PR is to initiate tf32 in CK_TILE on gfx942 and gfx950. ## Checklist Please put an into the boxes that apply. You can also fill these out after creating the PR. If you're not sure, please don't hesitate to ask. - [ ] I have added tests relevant to the introduced functionality, and the unit tests are passing locally - [ ] I have added the test to REGRESSION_TESTS list defined at the top of CMakeLists.txt in tests/CMakeLists.txt, **IF** the test takes more than 30 seconds to run. - [ ] I have added inline documentation which enables the maintainers with understanding the motivation - [ ] I have removed the stale documentation which is no longer relevant after this pull request - [ ] (If this change is user-facing) I have added release notes which provide the end users with a brief summary of the improvement from this pull request - [x] I have run on all changed files - [ ] Any dependent changes have been merged ## Discussion
This commit is contained in:
committed by
assistant-librarian[bot]
parent
652d3456ca
commit
d460ab35b6
@@ -89,19 +89,32 @@ struct CShuffleEpilogue
|
||||
remove_cvref_t<BsDataType>,
|
||||
remove_cvref_t<tuple<BsDataType>>>;
|
||||
|
||||
using ADataType = remove_cvref_t<std::tuple_element_t<number<0>{}, AsDataTypeTuple>>;
|
||||
using BDataType = remove_cvref_t<std::tuple_element_t<number<0>{}, BsDataTypeTuple>>;
|
||||
// ADataTypeCompute: compute type from Problem (may be tf32_t for TF32 mode)
|
||||
using ADataTypeCompute = remove_cvref_t<std::tuple_element_t<number<0>{}, AsDataTypeTuple>>;
|
||||
using BDataTypeCompute = remove_cvref_t<std::tuple_element_t<number<0>{}, BsDataTypeTuple>>;
|
||||
|
||||
using ATypeToUse = std::conditional_t<std::is_same_v<ADataType, pk_int4_t> ||
|
||||
std::is_same_v<ADataType, pk_fp4_t>,
|
||||
BDataType,
|
||||
ADataType>;
|
||||
// ADataTypeBuf: buffer/storage type (fp32 when tf32)
|
||||
using ADataTypeBuf = if_select_t<ADataTypeCompute, tf32_t, float, ADataTypeCompute>;
|
||||
using BDataTypeBuf = if_select_t<BDataTypeCompute, tf32_t, float, BDataTypeCompute>;
|
||||
|
||||
// For warp gemm selection: use tf32_t if compute type was tf32_t
|
||||
// For pk_int4/pk_fp4: use the other data type
|
||||
using ATypeToUse =
|
||||
std::conditional_t<std::is_same_v<ADataTypeCompute, tf32_t>,
|
||||
tf32_t,
|
||||
std::conditional_t<std::is_same_v<ADataTypeBuf, pk_int4_t> ||
|
||||
std::is_same_v<ADataTypeBuf, pk_fp4_t>,
|
||||
BDataTypeBuf,
|
||||
ADataTypeBuf>>;
|
||||
// Used for weight-only quantization kernel, B would be dequantized to the same data type as A
|
||||
using BTypeToUse = std::conditional_t<std::is_same_v<BDataType, pk_int4_t> ||
|
||||
std::is_same_v<BDataType, pk_fp4_t> ||
|
||||
sizeof(BDataType) < sizeof(ADataType),
|
||||
ADataType,
|
||||
BDataType>;
|
||||
using BTypeToUse =
|
||||
std::conditional_t<std::is_same_v<BDataTypeCompute, tf32_t>,
|
||||
tf32_t,
|
||||
std::conditional_t<std::is_same_v<BDataTypeBuf, pk_int4_t> ||
|
||||
std::is_same_v<BDataTypeBuf, pk_fp4_t> ||
|
||||
sizeof(BDataTypeBuf) < sizeof(ADataTypeBuf),
|
||||
ADataTypeBuf,
|
||||
BDataTypeBuf>>;
|
||||
|
||||
using ELayout = remove_cvref_t<typename Problem::ELayout>;
|
||||
using CDElementwise = remove_cvref_t<typename Problem::CDElementwise>;
|
||||
@@ -137,7 +150,7 @@ struct CShuffleEpilogue
|
||||
[[nodiscard]] CK_TILE_HOST static const std::string GetName()
|
||||
{
|
||||
// clang-format off
|
||||
return concat('_', "CShuffleEpilogue",
|
||||
return concat('_', "CShuffleEpilogue",
|
||||
concat('x', MWave, NWave),
|
||||
concat('x', MPerXdl, NPerXdl, KPerXdl),
|
||||
VectorSizeC,
|
||||
@@ -440,8 +453,8 @@ struct CShuffleEpilogue
|
||||
constexpr int RakedXDLN_PerWarp = NumNXdlPerWavePerShuffle / BlockedXDLN_PerWarp;
|
||||
// BlockedLayout
|
||||
// this branch is for original a16w4
|
||||
if constexpr(is_950 || is_any_of<ADataType, pk_int4_t, pk_fp4_t>::value ||
|
||||
is_any_of<BDataType, pk_int4_t, pk_fp4_t>::value)
|
||||
if constexpr(is_950 || is_any_of<ADataTypeBuf, pk_int4_t, pk_fp4_t>::value ||
|
||||
is_any_of<BDataTypeBuf, pk_int4_t, pk_fp4_t>::value)
|
||||
{
|
||||
if constexpr(EightWave)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user