mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-15 02:27:57 +00:00
Merge commit 'daf71fb8e4442352b1c5bb0a7c5a3ecc9f7f0c5a' into develop
This commit is contained in:
@@ -244,12 +244,6 @@
|
||||
// workaround: compiler issue on gfx908
|
||||
#define CK_WORKAROUND_SWDEV_388832 1
|
||||
|
||||
// workaround: compiler issue on gfx950
|
||||
#define CK_WORKAROUND_FP32_TO_FP4_SR_CONVERSION 1
|
||||
|
||||
// workaround: compiler issue on gfx950
|
||||
#define CK_TEMP_DISABLE_FP4_TESTS 1
|
||||
|
||||
// workaround: compiler issue on gfx950
|
||||
#define CK_WORKAROUND_FP16_TO_FP8_CONVERSION 1
|
||||
|
||||
|
||||
@@ -1500,16 +1500,9 @@ inline __host__ __device__ f4x2_t f4_convert_sr(float2_t x, float scale = 1.0f)
|
||||
uint32_t bitwise;
|
||||
f4x2_t f4x2_array[4];
|
||||
} value{0};
|
||||
// apply a temporary workaround for gfx950
|
||||
#if CK_WORKAROUND_FP32_TO_FP4_SR_CONVERSION
|
||||
uint8_t l = utils::sat_convert_to_type_sr<f4_t>(x[1] / scale, rng);
|
||||
uint8_t h = utils::sat_convert_to_type_sr<f4_t>(x[0] / scale, rng);
|
||||
value.bitwise = (h << 4) | l;
|
||||
#else
|
||||
// permute high bits and low bits to match the order of the original vector
|
||||
value.bitwise = __builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f32(
|
||||
value.bitwise, float2_t{x[1], x[0]}, rng, scale, 0);
|
||||
#endif // CK_WORKAROUND_FP32_TO_FP4_SR_CONVERSION
|
||||
return value.f4x2_array[0];
|
||||
#else
|
||||
constexpr int seed = 1254739;
|
||||
|
||||
@@ -240,7 +240,6 @@ TEST(MXFP4, HostScaledConvert)
|
||||
EXPECT_EQ(test_size, i);
|
||||
}
|
||||
|
||||
#if !CK_TEMP_DISABLE_FP4_TESTS
|
||||
__global__ void test_mx_fp4_device_scaled_convert(uint64_t N, float* p_test, uint64_t* p_completed)
|
||||
{
|
||||
test_mx_fp4_scaled_convert(N, p_test, p_completed);
|
||||
@@ -540,4 +539,3 @@ TEST(MXFP4, DeviceF4x32ToF32x32ScaledConvert)
|
||||
EXPECT_EQ(N, completed);
|
||||
EXPECT_EQ(N, i);
|
||||
}
|
||||
#endif // CK_TEMP_DISABLE_FP4_TESTS
|
||||
|
||||
Reference in New Issue
Block a user