From d1499dd805568009d648c902c7249456f5793760 Mon Sep 17 00:00:00 2001 From: Rostyslav Geyyer Date: Fri, 14 Feb 2025 21:13:35 +0000 Subject: [PATCH] Add a repro test --- test/data_type/CMakeLists.txt | 6 ++ test/data_type/test_mx_fp4_repro.cpp | 122 +++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 test/data_type/test_mx_fp4_repro.cpp diff --git a/test/data_type/CMakeLists.txt b/test/data_type/CMakeLists.txt index 8a0f631b39..1fe005e2f5 100644 --- a/test/data_type/CMakeLists.txt +++ b/test/data_type/CMakeLists.txt @@ -81,6 +81,12 @@ if(GPU_TARGETS MATCHES "gfx950") endif() add_dependencies(test_mx_data_types test_mx_fp4) + add_gtest_executable(test_mx_fp4_repro test_mx_fp4_repro.cpp) + if(result EQUAL 0) + target_link_libraries(test_mx_fp4_repro PRIVATE utility) + endif() + add_dependencies(test_mx_data_types test_mx_fp4_repro) + add_gtest_executable(test_e8m0 test_e8m0.cpp) if(result EQUAL 0) target_link_libraries(test_e8m0 PRIVATE utility) diff --git a/test/data_type/test_mx_fp4_repro.cpp b/test/data_type/test_mx_fp4_repro.cpp new file mode 100644 index 0000000000..5210ca9dc9 --- /dev/null +++ b/test/data_type/test_mx_fp4_repro.cpp @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. + +#include "gtest/gtest.h" +#include "ck/library/utility/device_memory.hpp" +#include "ck/utility/scaled_type_convert.hpp" + +using ck::e8m0_bexp_t; +using ck::float16_t; +using ck::float2_t; +using ck::float32_t; +using ck::scaled_type_convert; +using ck::type_convert; + +using ck::f4_convert_rne; +using ck::f4_convert_sr; +using ck::f4_t; +using ck::f4x16_t; +using ck::f4x2_pk_t; +using ck::f4x2_t; +using ck::f4x32_t; + +__host__ __device__ void test_mx_fp4_to_fp32(float* p_test) +{ + /// Test vector conversions + // f4x2 -> f32x2 + f4x2_t f4x2{f4x2_t::data_v{0b00011100}}; // 0b0001(=0.5) and 0b1100(=-2.0) + auto scale2 = e8m0_bexp_t(2.0f); + + float2_t f32x2 = scaled_type_convert(scale2, f4x2); + p_test[0] = f32x2[0]; + p_test[1] = f32x2[1]; +} + +__global__ void run_test_mx_fp4_to_fp32(float* p_test) { test_mx_fp4_to_fp32(p_test); } + +__host__ __device__ void test_mx_fp32_to_fp4_rne(float* p_test) +{ + // f32x2 -> f4x2 + float2_t f32x2 = {1.0f, -4.0f}; + auto scale2 = e8m0_bexp_t(2.0f); + f4x2_t f4x2 = f4_convert_rne(f32x2, type_convert(scale2)); // expect {0.5, -2} + + p_test[0] = type_convert( + f4_t(f4x2.AsType()(ck::Number<0>{}).unpack<>(ck::Number<0>{}))); // 0.5f + p_test[1] = type_convert( + f4_t(f4x2.AsType()(ck::Number<0>{}).unpack<>(ck::Number<1>{}))); // -2.0f +} + +__global__ void run_test_mx_fp32_to_fp4_rne(float* p_test) { test_mx_fp32_to_fp4_rne(p_test); } + +__host__ __device__ void test_mx_fp32_to_fp4_sr(float* p_test) +{ + float2_t f32x2 = {1.0f, -4.0f}; + auto scale2 = e8m0_bexp_t(2.0f); + f4x2_t f4x2 = f4_convert_sr(f32x2, type_convert(scale2)); // expect {0.5, -2} + + p_test[0] = type_convert( + f4_t(f4x2.AsType()(ck::Number<0>{}).unpack<>(ck::Number<0>{}))); // 0.5f + p_test[1] = type_convert( + f4_t(f4x2.AsType()(ck::Number<0>{}).unpack<>(ck::Number<1>{}))); // -2.0f +} + +__global__ void run_test_mx_fp32_to_fp4_sr(float* p_test) { test_mx_fp32_to_fp4_sr(p_test); } + +TEST(MXFP4, FP4ToFP32) +{ + std::vector out(2, -1.0f); + + DeviceMem device_out(2 * sizeof(float)); + // DeviceMem device_completed(sizeof(uint64_t)); + + // device_out.SetValue(-21.0f); + // device_completed.SetValue(-21.0f); + + run_test_mx_fp4_to_fp32<<<1, 1>>>(static_cast(device_out.GetDeviceBuffer())); + + // uint64_t completed = 0; + // device_completed.FromDevice(&completed); + device_out.FromDevice(out.data()); + + // f4x2 -> f32x2 + EXPECT_EQ(out[0], 1.0f); + EXPECT_EQ(out[1], -4.0f); +} + +TEST(MXFP4, FP32ToFP4RNE) +{ + std::vector out(2, -1.0f); + + DeviceMem device_out(2 * sizeof(float)); + // DeviceMem device_completed(sizeof(uint64_t)); + + run_test_mx_fp32_to_fp4_rne<<<1, 1>>>(static_cast(device_out.GetDeviceBuffer())); + + // uint64_t completed = 0; + // device_completed.FromDevice(&completed); + device_out.FromDevice(out.data()); + + // f32x2 -> f4x2 + // RNE + EXPECT_EQ(out[0], 0.5f); + EXPECT_EQ(out[1], -2.0f); +} + +TEST(MXFP4, FP32ToFP4SR) +{ + std::vector out(2, -1.0f); + + DeviceMem device_out(2 * sizeof(float)); + // DeviceMem device_completed(sizeof(uint64_t)); + + run_test_mx_fp32_to_fp4_sr<<<1, 1>>>(static_cast(device_out.GetDeviceBuffer())); + + // uint64_t completed = 0; + // device_completed.FromDevice(&completed); + device_out.FromDevice(out.data()); + + // SR + EXPECT_EQ(out[0], 0.5f); + EXPECT_EQ(out[1], -2.0f); +}