Add fp16/fp8 support into Grouped gemm FixedNK (#874)

* move all arguments into device * add b2c_tile_map * add examples * add SetDeviceKernelArgs * dedicated fixed_nk solution * init client api * add grouped_gemm_bias example * add a instance * add instances * formatting * fixed cmake * Update EnableCompilerWarnings.cmake * Update cmake-ck-dev.sh * clean; fixed comments * fixed comment * add instances for fp32 output * add instances for fp32 output * add fp32 out client example * fixed CI * init commit for kbatch * add splitk gridwise * format * fixed * clean deviceop * clean code * finish splitk * fixed instances * change m_loops to tile_loops * add setkbatch * clean code * add splitK+bias * add instances * opt mk_nk instances * clean examples * fixed CI * remove zero * finished non-zero * clean * clean code * optimized global_barrier * fixed ci * fixed CI * instance and client * removed AddBias * format * fixed CI * fixed CI * move 20_grouped_gemm to 21_grouped_gemm * clean * formatting * clean * clean * fixed computeType --------- Co-authored-by: Jing Zhang <jizha@amd.com>
2026-05-03 05:01:25 +00:00 · 2023-09-14 21:04:10 -05:00
parent 0d8efaa13d
commit f9d0eddb90
17 changed files with 1749 additions and 28 deletions
--- a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
+++ b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
@@ -75,12 +75,24 @@ struct PassThrough
        y = type_convert<bhalf_t>(x);
    }

+    template <>
+    __host__ __device__ void operator()<float, half_t>(float& y, const half_t& x) const
+    {
+        y = type_convert<float>(x);
+    }
+
    template <>
    __host__ __device__ void operator()<int8_t, int8_t>(int8_t& y, const int8_t& x) const
    {
        y = x;
    }

+    template <>
+    __host__ __device__ void operator()<half_t, int8_t>(half_t& y, const int8_t& x) const
+    {
+        y = type_convert<half_t>(x);
+    }
+
    template <>
    __host__ __device__ void operator()<int8_t, int32_t>(int8_t& y, const int32_t& x) const
    {