mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
Fused GEMM+GEMM (#351)
* initial stub for gemm_gemm_xdl_cshuffle * set up example code * compiles * prevent integer overflow * harmonize interface between ref_gemm and ref_batched_gemm * batched_gemm_gemm * fix example * host tensor gen: diagonal pattern in lowest two-dimensions only * make c descriptors containing only integral constants * clean up * add BlockwiseGemmXdlops_v2 while exploring an unified approach * implement proper interface * tidy up example * fix compilation warnings * coarsely controlled 2nd gemm padding * remove rocm-cmake's hard requirement for certain revision * clang-format * resolve merge conflict * fix compilation error on gfx10 * adds acc0 elementwise op to interface * add gemm_gemm instances and tests * avoid LDS data hazard * fix build Co-authored-by: Chao Liu <chao.liu2@amd.com>
This commit is contained in:
@@ -1,8 +1,7 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#ifndef CK_STATIC_BUFFER_HPP
|
||||
#define CK_STATIC_BUFFER_HPP
|
||||
#pragma once
|
||||
|
||||
#include "statically_indexed_array.hpp"
|
||||
|
||||
@@ -20,13 +19,6 @@ struct StaticBuffer : public StaticallyIndexedArray<T, N>
|
||||
|
||||
__host__ __device__ constexpr StaticBuffer() : base{} {}
|
||||
|
||||
__host__ __device__ constexpr StaticBuffer& operator=(StaticBuffer& y)
|
||||
{
|
||||
StaticBuffer& x = *this;
|
||||
static_for<0, base::Size(), 1>{}([&](auto i) { x(i) = y[i]; });
|
||||
return x;
|
||||
}
|
||||
|
||||
template <typename... Ys>
|
||||
__host__ __device__ constexpr StaticBuffer& operator=(const Tuple<Ys...>& y)
|
||||
{
|
||||
@@ -201,4 +193,3 @@ __host__ __device__ constexpr auto make_static_buffer(LongNumber<N>)
|
||||
}
|
||||
|
||||
} // namespace ck
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user