mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-29 11:16:59 +00:00
clang-format-12
This commit is contained in:
@@ -234,7 +234,8 @@ struct DeviceGemmMultiD_BlockScale_Xdl_CShuffle_V3_BPreshuffle
|
||||
};
|
||||
|
||||
// unconditional 2 to remove agpr usage
|
||||
constexpr index_t minimum_occupancy = MPerBlock * NPerBlock * KPerBlock > (128*128*128/sizeof(ADataType))? 1: 2;
|
||||
constexpr index_t minimum_occupancy =
|
||||
MPerBlock * NPerBlock * KPerBlock > (128 * 128 * 128 / sizeof(ADataType)) ? 1 : 2;
|
||||
|
||||
if(has_main_k_block_loop)
|
||||
{
|
||||
|
||||
@@ -1391,7 +1391,8 @@ inline __host__ __device__ f4x2_t f4_convert_rne(float2_t x, float scale = 1.0f)
|
||||
// If we keep origin order, error occured:
|
||||
value.bitwise = __builtin_amdgcn_cvt_scalef32_pk_fp4_f32(value.bitwise, x[0], x[1], scale, 0);
|
||||
// permute high bits and low bits to match the order of the original vector
|
||||
// value.bitwise = __builtin_amdgcn_cvt_scalef32_pk_fp4_f32(value.bitwise, x[1], x[0], scale, 0);
|
||||
// value.bitwise = __builtin_amdgcn_cvt_scalef32_pk_fp4_f32(value.bitwise, x[1], x[0], scale,
|
||||
// 0);
|
||||
return value.f4x2_array[0];
|
||||
#else
|
||||
union
|
||||
|
||||
@@ -91,7 +91,7 @@ test_mx_fp4_scaled_convert(uint64_t N, float* p_test, uint64_t* p_completed)
|
||||
{
|
||||
return;
|
||||
}
|
||||
p_test[i++] = f32x2[1]; // 2* 0b0001(=0.5) = 1.0
|
||||
p_test[i++] = f32x2[1]; // 2* 0b0001(=0.5) = 1.0
|
||||
if(i >= N)
|
||||
{
|
||||
return;
|
||||
|
||||
Reference in New Issue
Block a user