[CK_TILE] Enable MXFP6 for MX GEMM op (#5095)

## Motivation

Add support for MXFP6 in the MX GEMM op in CK-Tile.

Depends on https://github.com/ROCm/rocm-libraries/pull/4594

## Technical Details

<!-- Explain the changes along with any relevant GitHub links. -->

## Test Plan

<!-- Explain any relevant testing done to verify this PR. -->

## Test Result

<!-- Briefly summarize test outcomes. -->

## Submission Checklist

- [ ] Look over the contributing guidelines at
https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
This commit is contained in:
Sami Remes
2026-03-20 03:07:47 +02:00
committed by GitHub
parent a61238b69f
commit a699df9fdc
13 changed files with 160 additions and 31 deletions

View File

@@ -22,7 +22,10 @@ struct pk_fp6_t
static constexpr index_t vector_size = (packed_size * num_bits_elem) / num_bits_vec_elem;
element_type data_[vector_size]; // packed data
using type = pk_fp6_t<packed_size>;
CK_TILE_HOST_DEVICE constexpr explicit pk_fp6_t(int value = 0)
CK_TILE_HOST_DEVICE constexpr pk_fp6_t() : data_{element_type{}} {}
CK_TILE_HOST_DEVICE constexpr explicit pk_fp6_t(int value)
{
for(size_t i = 0; i < vector_size; ++i)
{
@@ -59,13 +62,14 @@ struct pk_fp6_t
const int bit_offset = bit_pos % num_bits_vec_elem;
const int overhang = bit_offset + num_bits_elem - num_bits_vec_elem;
int32_t bits = pk.data_[arr_idx] >> bit_offset;
uint32_t bits = static_cast<uint32_t>(pk.data_[arr_idx]) >> bit_offset;
if(overhang > 0 && (arr_idx + 1) < vector_size)
{
bits |= (pk.data_[arr_idx + 1] & ((1u << overhang) - 1)) << (num_bits_elem - overhang);
bits |= (static_cast<uint32_t>(pk.data_[arr_idx + 1]) & ((1u << overhang) - 1))
<< (num_bits_elem - overhang);
}
return bits & 0x3F;
return static_cast<int32_t>(bits & 0x3F);
}
CK_TILE_HOST_DEVICE int32_t unpack(const index_t i) const { return unpack(*this, i); }
@@ -97,6 +101,22 @@ struct pk_fp6_t
}
return sign == 1 ? -1 * result : result;
}
CK_TILE_HOST static int32_t float_to_fp6_e2m3(float val)
{
int32_t best = 0;
float best_err = 1e30f;
for(int32_t i = 0; i < 64; i++)
{
float err = std::fabs(val - fp6_e2m3_to_float(i));
if(err < best_err)
{
best = i;
best_err = err;
}
}
return best;
}
};
using pk_fp6x16_t = pk_fp6_t<16>;
@@ -105,5 +125,7 @@ template <>
struct numeric_traits<pk_fp6x16_t>
{
static constexpr int PackedSize = 16;
static constexpr int exp = 2;
static constexpr int mant = 3;
};
} // namespace ck_tile