mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
Optimized GEMMs for MX FP4/8 (#2294)
Adds V3 GEMM pipeline for MX FP4 and MX FP8 Adds V3 GEMM pipeline for MX FP4 with preshuffling Adds MXFP4 GEMM tests (#2275) Adds MXFP4 GEMM examples Adds MXFP4 GEMMs to ckProfiler Co-authored-by: Andriy Roshchenko <107577548+andriy-ca@users.noreply.github.com> Co-authored-by: Andriy Roshchenko <andriy.roshchenko@amd.com> Co-authored-by: aska-0096 <haocwang@amd.com> Co-authored-by: lalala-sh <Jiaxing.Wen@amd.com> Co-authored-by: OscarXu <huaiguxu@amd.com> Co-authored-by: mtgu0705 <mtgu@amd.com> Co-authored-by: Ding, Yi <yi.ding@amd.com> Co-authored-by: feifei14119 <feiw@amd.com> Co-authored-by: Lin, Qun <qlin@amd.com> Co-authored-by: joye <joye@amd.com> Co-authored-by: Rostyslav Geyyer <46627076+geyyer@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
233e274077
commit
00247e3c29
@@ -1,10 +1,11 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ck/utility/functional.hpp"
|
||||
#include "ck/utility/sequence.hpp"
|
||||
#include "ck/utility/tuple.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -70,4 +71,44 @@ struct static_for<0, N, 1> : detail::make_applier<N>
|
||||
using detail::make_applier<N>::operator();
|
||||
};
|
||||
|
||||
template <typename... Is>
|
||||
struct static_for_range
|
||||
{
|
||||
template <typename F>
|
||||
__host__ __device__ constexpr void operator()(F f) const
|
||||
{
|
||||
// tweak -fbracket-depth if compilation fails. Clang default limit is 256
|
||||
(f(Is{}), ...);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename... Ts>
|
||||
struct static_for_product;
|
||||
template <typename... Is>
|
||||
struct static_for_product<Tuple<Is...>> : public static_for_range<Is...>
|
||||
{
|
||||
};
|
||||
template <typename... Is, typename... Rest>
|
||||
struct static_for_product<Tuple<Is...>, Rest...>
|
||||
{
|
||||
template <typename F>
|
||||
__host__ __device__ constexpr void operator()(F f) const
|
||||
{
|
||||
static_for_product<Tuple<Is...>>{}([&](auto i0) { //
|
||||
static_for_product<Rest...>{}([&](auto... is) { //
|
||||
f(i0, is...);
|
||||
});
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
struct identity
|
||||
{
|
||||
template <typename T>
|
||||
__host__ __device__ constexpr T&& operator()(T&& arg) const noexcept
|
||||
{
|
||||
return forward<T>(arg);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace ck
|
||||
|
||||
Reference in New Issue
Block a user