mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 05:01:25 +00:00
Multi-kernel CGEMM (#230)
* Reference CGEMM + test stub * Format. * Incomplete simple implementation * Library instances * Sketch of tests * Test fixes. * Example added * Cosmetics * Add elementwise operation kernel and example * Add comment * Add template argument of dim . Prepare to support multiple dimension * Rename example * Support 1 dimension * Add static assert * Add comment * Second auxiliary buffer added * Extract pad * Remove redundant argument * Support any dimension for elementwise operation * Remove line * Let it be the multiple number of CU * Move thread per block to the parameter of constructor * Consuming binary ops to do A+B / A-B * Fix + cosmetics + bf16 test commented out temporarily * Format * Enabling bf16 test * Revert "Enabling bf16 test" This reverts commitf497e2ba44. * Fix + test reenabled * fix build * Revert "fix build" This reverts commitd73102384b. * post PR #235 merge fix * amend * Single workspace for cgemm + helper * Perf calc fix * Review remarks: static_cast * Review remarks: binary ops templated * Cleaning * Removal of instances and their tests * Review remarks from aosew addressed * Review remark: unnecessary attribute * Post-merge fixes * Restrict 4gemm to PassThrough + bug fix * Review remarks * update licence * change cgemm example to fp16 Co-authored-by: rocking <chunylai@amd.com> Co-authored-by: Chao Liu <chao.liu2@amd.com> Co-authored-by: Anthony Chang <ac.chang@outlook.com>
This commit is contained in:
@@ -1,3 +1,28 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#pragma once
|
||||
#include "data_type.hpp"
|
||||
|
||||
@@ -5,14 +30,22 @@ namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace binary_element_wise {
|
||||
|
||||
struct Add
|
||||
template <typename Y, typename X1, typename X2>
|
||||
struct Add;
|
||||
|
||||
template <>
|
||||
struct Add<double, double, double>
|
||||
{
|
||||
__host__ __device__ constexpr void
|
||||
operator()(double& dst, const double& src1, const double& src2) const
|
||||
{
|
||||
dst = src1 + src2;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Add<float, float, float>
|
||||
{
|
||||
__host__ __device__ constexpr void
|
||||
operator()(float& dst, const float& src1, const float& src2) const
|
||||
{
|
||||
@@ -20,6 +53,75 @@ struct Add
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Add<half_t, half_t, half_t>
|
||||
{
|
||||
__host__ __device__ constexpr void
|
||||
operator()(half_t& dst, const half_t& src1, const half_t& src2) const
|
||||
{
|
||||
dst = src1 + src2;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Add<bhalf_t, bhalf_t, bhalf_t>
|
||||
{
|
||||
__host__ __device__ constexpr void
|
||||
operator()(bhalf_t& dst, const bhalf_t& src1, const bhalf_t& src2) const
|
||||
{
|
||||
const float x1 = ck::type_convert<float>(src1);
|
||||
const float x2 = ck::type_convert<float>(src2);
|
||||
const float y = x1 + x2;
|
||||
dst = ck::type_convert<bhalf_t>(y);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Y, typename X1, typename X2>
|
||||
struct Substract;
|
||||
|
||||
template <>
|
||||
struct Substract<double, double, double>
|
||||
{
|
||||
__host__ __device__ constexpr void
|
||||
operator()(double& dst, const double& src1, const double& src2) const
|
||||
{
|
||||
dst = src1 - src2;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Substract<float, float, float>
|
||||
{
|
||||
__host__ __device__ constexpr void
|
||||
operator()(float& dst, const float& src1, const float& src2) const
|
||||
{
|
||||
dst = src1 - src2;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Substract<half_t, half_t, half_t>
|
||||
{
|
||||
__host__ __device__ constexpr void
|
||||
operator()(half_t& dst, const half_t& src1, const half_t& src2) const
|
||||
{
|
||||
dst = src1 - src2;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Substract<bhalf_t, bhalf_t, bhalf_t>
|
||||
{
|
||||
__host__ __device__ constexpr void
|
||||
operator()(bhalf_t& dst, const bhalf_t& src1, const bhalf_t& src2) const
|
||||
{
|
||||
const float x1 = ck::type_convert<float>(src1);
|
||||
const float x2 = ck::type_convert<float>(src2);
|
||||
const float y = x1 - x2;
|
||||
dst = ck::type_convert<bhalf_t>(y);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace binary_element_wise
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
|
||||
Reference in New Issue
Block a user