mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-19 12:30:16 +00:00
Fused GEMM+GEMM (#351)
* initial stub for gemm_gemm_xdl_cshuffle
* set up example code
* compiles
* prevent integer overflow
* harmonize interface between ref_gemm and ref_batched_gemm
* batched_gemm_gemm
* fix example
* host tensor gen: diagonal pattern in lowest two-dimensions only
* make c descriptors containing only integral constants
* clean up
* add BlockwiseGemmXdlops_v2 while exploring an unified approach
* implement proper interface
* tidy up example
* fix compilation warnings
* coarsely controlled 2nd gemm padding
* remove rocm-cmake's hard requirement for certain revision
* clang-format
* resolve merge conflict
* fix compilation error on gfx10
* adds acc0 elementwise op to interface
* add gemm_gemm instances and tests
* avoid LDS data hazard
* fix build
Co-authored-by: Chao Liu <chao.liu2@amd.com>
[ROCm/composable_kernel commit: c20a75b07d]
This commit is contained in:
@@ -1,8 +1,7 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#ifndef CK_STATIC_BUFFER_HPP
|
||||
#define CK_STATIC_BUFFER_HPP
|
||||
#pragma once
|
||||
|
||||
#include "statically_indexed_array.hpp"
|
||||
|
||||
@@ -20,13 +19,6 @@ struct StaticBuffer : public StaticallyIndexedArray<T, N>
|
||||
|
||||
__host__ __device__ constexpr StaticBuffer() : base{} {}
|
||||
|
||||
__host__ __device__ constexpr StaticBuffer& operator=(StaticBuffer& y)
|
||||
{
|
||||
StaticBuffer& x = *this;
|
||||
static_for<0, base::Size(), 1>{}([&](auto i) { x(i) = y[i]; });
|
||||
return x;
|
||||
}
|
||||
|
||||
template <typename... Ys>
|
||||
__host__ __device__ constexpr StaticBuffer& operator=(const Tuple<Ys...>& y)
|
||||
{
|
||||
@@ -201,4 +193,3 @@ __host__ __device__ constexpr auto make_static_buffer(LongNumber<N>)
|
||||
}
|
||||
|
||||
} // namespace ck
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user