This commit is contained in:
yadaish
2025-12-04 10:38:45 +00:00
parent b36c7d76a0
commit 7059613404

View File

@@ -109,14 +109,11 @@ CK_TILE_HOST_DEVICE fp32x2_t pk_int4_t_to_fp32x2_t(const pk_int4_t& x)
float x_l = ((x_u8 & 0x0f) >> 0) - 8.f;
float x_h = ((x_u8 & 0xf0) >> 4) - 8.f;
/*
#ifdef CK_TILE_USE_PK4_LAYOUT_SHUFFLE
fp32x2_t res = {x_h, x_l};
#elif
fp32x2_t res = {x_l, x_h};
#endif
*/
fp32x2_t res = {x_l, x_h};
return res;
}
@@ -130,14 +127,11 @@ CK_TILE_HOST_DEVICE fp32x2_t pk_int4_t_to_fp32x2_t_signed_conversion(const pk_in
x_l = x_l > 7 ? x_l - 16 : x_l;
x_h = x_h > 7 ? x_h - 16 : x_h;
/*
#ifdef CK_TILE_USE_PK4_LAYOUT_SHUFFLE
fp32x2_t res = {x_h, x_l};
#elif
fp32x2_t res = {x_l, x_h};
#endif
*/
fp32x2_t res = {x_l, x_h};
return res;
}
@@ -161,8 +155,6 @@ CK_TILE_HOST_DEVICE fp16x2_t pk_int4_t_to_halfx2_t(const pk_int4_t& x)
CK_TILE_HOST_DEVICE fp16x2_t pk_int4_t_to_halfx2_t(const pk_int4_t& x, float scale)
{
// TODO(yadai): confirm quanzation algorithm
// auto float_vec2 = pk_int4_t_to_fp32x2_t_signed_conversion(x);
auto float_vec2 = pk_int4_t_to_fp32x2_t(x);
float_vec2.x = float_vec2.x * scale;
float_vec2.y = float_vec2.y * scale;