Enable fp4 tests (#2329)

This commit is contained in:
Rostyslav Geyyer
2025-06-25 07:38:54 -05:00
committed by GitHub
parent 37e1a27537
commit daf71fb8e4
3 changed files with 0 additions and 15 deletions

View File

@@ -1500,16 +1500,9 @@ inline __host__ __device__ f4x2_t f4_convert_sr(float2_t x, float scale = 1.0f)
uint32_t bitwise;
f4x2_t f4x2_array[4];
} value{0};
// apply a temporary workaround for gfx950
#if CK_WORKAROUND_FP32_TO_FP4_SR_CONVERSION
uint8_t l = utils::sat_convert_to_type_sr<f4_t>(x[1] / scale, rng);
uint8_t h = utils::sat_convert_to_type_sr<f4_t>(x[0] / scale, rng);
value.bitwise = (h << 4) | l;
#else
// permute high bits and low bits to match the order of the original vector
value.bitwise = __builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f32(
value.bitwise, float2_t{x[1], x[0]}, rng, scale, 0);
#endif // CK_WORKAROUND_FP32_TO_FP4_SR_CONVERSION
return value.f4x2_array[0];
#else
constexpr int seed = 1254739;