diff --git a/include/ck/ck.hpp b/include/ck/ck.hpp index 3c1373a387..794c6f4e20 100644 --- a/include/ck/ck.hpp +++ b/include/ck/ck.hpp @@ -244,12 +244,6 @@ // workaround: compiler issue on gfx908 #define CK_WORKAROUND_SWDEV_388832 1 -// workaround: compiler issue on gfx950 -#define CK_WORKAROUND_FP32_TO_FP4_SR_CONVERSION 1 - -// workaround: compiler issue on gfx950 -#define CK_TEMP_DISABLE_FP4_TESTS 1 - // workaround: compiler issue on gfx950 #define CK_WORKAROUND_FP16_TO_FP8_CONVERSION 1 diff --git a/include/ck/utility/type_convert.hpp b/include/ck/utility/type_convert.hpp index 2208a73860..69a953b575 100644 --- a/include/ck/utility/type_convert.hpp +++ b/include/ck/utility/type_convert.hpp @@ -1500,16 +1500,9 @@ inline __host__ __device__ f4x2_t f4_convert_sr(float2_t x, float scale = 1.0f) uint32_t bitwise; f4x2_t f4x2_array[4]; } value{0}; -// apply a temporary workaround for gfx950 -#if CK_WORKAROUND_FP32_TO_FP4_SR_CONVERSION - uint8_t l = utils::sat_convert_to_type_sr(x[1] / scale, rng); - uint8_t h = utils::sat_convert_to_type_sr(x[0] / scale, rng); - value.bitwise = (h << 4) | l; -#else // permute high bits and low bits to match the order of the original vector value.bitwise = __builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f32( value.bitwise, float2_t{x[1], x[0]}, rng, scale, 0); -#endif // CK_WORKAROUND_FP32_TO_FP4_SR_CONVERSION return value.f4x2_array[0]; #else constexpr int seed = 1254739; diff --git a/test/data_type/test_mx_fp4.cpp b/test/data_type/test_mx_fp4.cpp index 7aca42567c..449f6fc777 100644 --- a/test/data_type/test_mx_fp4.cpp +++ b/test/data_type/test_mx_fp4.cpp @@ -240,7 +240,6 @@ TEST(MXFP4, HostScaledConvert) EXPECT_EQ(test_size, i); } -#if !CK_TEMP_DISABLE_FP4_TESTS __global__ void test_mx_fp4_device_scaled_convert(uint64_t N, float* p_test, uint64_t* p_completed) { test_mx_fp4_scaled_convert(N, p_test, p_completed); @@ -540,4 +539,3 @@ TEST(MXFP4, DeviceF4x32ToF32x32ScaledConvert) EXPECT_EQ(N, completed); EXPECT_EQ(N, i); } -#endif // CK_TEMP_DISABLE_FP4_TESTS