fixup build for #2871 when multiple device targets are used (#2885)

This commit is contained in:
Max Podkorytov
2025-09-22 08:02:41 -07:00
committed by GitHub
parent 624c46866e
commit de47ae2fdf

View File

@@ -351,7 +351,7 @@ inline __host__ __device__ f8_fnuz_t f8_convert_sr<f8_fnuz_t, float>(float x)
val.fval = __builtin_amdgcn_fmed3f(val.fval, max_fp8, -max_fp8);
ival = __builtin_amdgcn_cvt_sr_fp8_f32(val.fval, rng, ival, 0); // 0 pos
val.i32val = ival;
return f8_t{val.i8val[0]}; // little endian
return f8_fnuz_t{val.i8val[0]}; // little endian
#else
constexpr bool negative_zero_nan = true;
constexpr bool clip = true;
@@ -419,7 +419,7 @@ inline __host__ __device__ bf8_fnuz_t f8_convert_sr<bf8_fnuz_t, float>(float x)
val.fval = __builtin_amdgcn_fmed3f(val.fval, max_bf8, -max_bf8);
ival = __builtin_amdgcn_cvt_sr_bf8_f32(val.fval, rng, ival, 0); // 0 pos
val.i32val = ival;
return bf8_t{val.i8val[0]}; // little endian
return bf8_fnuz_t{val.i8val[0]}; // little endian
#else
constexpr bool negative_zero_nan = true;
constexpr bool clip = true;
@@ -655,7 +655,7 @@ inline __host__ __device__ f8_fnuz_t f8_convert_rne<f8_fnuz_t, float>(float x)
val.fval = __builtin_amdgcn_fmed3f(val.fval, max_fp8, -max_fp8);
ival = __builtin_amdgcn_cvt_pk_fp8_f32(val.fval, val.fval, ival, false); // false -> WORD0
val.i32val = ival;
return f8_t{val.i8val[0]};
return f8_fnuz_t{val.i8val[0]};
#else
constexpr bool negative_zero_nan = true;
constexpr bool clip = true;
@@ -707,7 +707,7 @@ inline __host__ __device__ bf8_fnuz_t f8_convert_rne<bf8_fnuz_t, float>(float x)
val.fval = __builtin_amdgcn_fmed3f(val.fval, max_bf8, -max_bf8);
ival = __builtin_amdgcn_cvt_pk_bf8_f32(val.fval, val.fval, ival, false); // false -> WORD0
val.i32val = ival;
return bf8_t{val.i8val[0]};
return bf8_fnuz_t{val.i8val[0]};
#else
constexpr bool negative_zero_nan = true;
constexpr bool clip = true;