mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
WMMA grouped conv fwd large tensor extra flavors (#3582)
* Additional flavors for WMMA conv fwd large tensor - added F16/BF16 clamp operation - added F16/BF16 bias_clamp operation - small modification to the device code to accomodate extra tensors * changed strategy to handle GemmArgs array * Adding generic instance * Added generic instance to clamp and bias_clamp ops
This commit is contained in:
committed by
GitHub
parent
7b3db1a878
commit
81ee19bd2c
@@ -6,6 +6,8 @@
|
||||
|
||||
#include "functional2.hpp"
|
||||
#include "sequence.hpp"
|
||||
#include <type_traits>
|
||||
#include <cassert>
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -27,6 +29,15 @@ struct Array
|
||||
|
||||
__host__ __device__ constexpr TData& operator()(index_t i) { return At(i); }
|
||||
|
||||
template <typename... Args>
|
||||
__host__ constexpr auto Emplace(index_t i, Args&&... args)
|
||||
-> std::enable_if_t<std::is_nothrow_constructible_v<TData, Args&&...>>
|
||||
{
|
||||
assert(i >= 0 && i < NSize);
|
||||
mData[i].~TData();
|
||||
new(mData + i) TData(ck::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ __device__ constexpr auto operator=(const T& a)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user