clang-format-12

This commit is contained in:
Andriy Roshchenko
2025-06-13 18:06:25 +00:00
parent d57de07d68
commit 3471f7d7cf
3 changed files with 5 additions and 3 deletions

View File

@@ -234,7 +234,8 @@ struct DeviceGemmMultiD_BlockScale_Xdl_CShuffle_V3_BPreshuffle
};
// unconditional 2 to remove agpr usage
constexpr index_t minimum_occupancy = MPerBlock * NPerBlock * KPerBlock > (128*128*128/sizeof(ADataType))? 1: 2;
constexpr index_t minimum_occupancy =
MPerBlock * NPerBlock * KPerBlock > (128 * 128 * 128 / sizeof(ADataType)) ? 1 : 2;
if(has_main_k_block_loop)
{

View File

@@ -1391,7 +1391,8 @@ inline __host__ __device__ f4x2_t f4_convert_rne(float2_t x, float scale = 1.0f)
// If we keep origin order, error occured:
value.bitwise = __builtin_amdgcn_cvt_scalef32_pk_fp4_f32(value.bitwise, x[0], x[1], scale, 0);
// permute high bits and low bits to match the order of the original vector
// value.bitwise = __builtin_amdgcn_cvt_scalef32_pk_fp4_f32(value.bitwise, x[1], x[0], scale, 0);
// value.bitwise = __builtin_amdgcn_cvt_scalef32_pk_fp4_f32(value.bitwise, x[1], x[0], scale,
// 0);
return value.f4x2_array[0];
#else
union

View File

@@ -91,7 +91,7 @@ test_mx_fp4_scaled_convert(uint64_t N, float* p_test, uint64_t* p_completed)
{
return;
}
p_test[i++] = f32x2[1]; // 2* 0b0001(=0.5) = 1.0
p_test[i++] = f32x2[1]; // 2* 0b0001(=0.5) = 1.0
if(i >= N)
{
return;