mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-02 20:51:23 +00:00
[rocm-libraries] ROCm/rocm-libraries#4594 (commit 1fce4cb)
[CK_TILE] MX GEMM non-preshuffled RCR layout ## Motivation Implements a GEMM with MX scaling for fp4 and fp8 in non-preshuffled layouts using async pipeline. ## Technical Details <!-- Explain the changes along with any relevant GitHub links. --> ## Test Plan <!-- Explain any relevant testing done to verify this PR. --> ## Test Result <!-- Briefly summarize test outcomes. --> ## Submission Checklist - [ ] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
This commit is contained in:
committed by
assistant-librarian[bot]
parent
b8def2c724
commit
8f27f65d44
@@ -1599,6 +1599,9 @@ struct WarpGemmAttributeMfmaImpl_f32_16x16x128_f8f6f4
|
||||
static constexpr index_t kCM0PerLane = 1;
|
||||
static constexpr index_t kCM1PerLane = 4;
|
||||
|
||||
// To get unity scale: 2^(kDefaultScale - 127) = 1.0
|
||||
static constexpr index_t kDefaultScale = 0x7F7F7F7F;
|
||||
|
||||
// c_vec += a_vec * b_vec
|
||||
template <index_t opselA, index_t opselB, bool post_nop_ = false>
|
||||
CK_TILE_DEVICE void operator()(CVecType& c_vec,
|
||||
@@ -1669,13 +1672,13 @@ struct WarpGemmAttributeMfmaImpl_f32_16x16x128_f8f6f4
|
||||
const BVecType& b_vec,
|
||||
bool_constant<post_nop_> = {}) const
|
||||
{
|
||||
operator()<0, 0>(c_vec, a_vec, 0, b_vec, 0);
|
||||
operator()<0, 0>(c_vec, a_vec, kDefaultScale, b_vec, kDefaultScale);
|
||||
}
|
||||
|
||||
// c_vec = a_vec * b_vec
|
||||
CK_TILE_DEVICE CVecType operator()(const AVecType& a_vec, const BVecType& b_vec) const
|
||||
{
|
||||
return operator()<0, 0>(a_vec, 0, b_vec, 0);
|
||||
return operator()<0, 0>(a_vec, kDefaultScale, b_vec, kDefaultScale);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user