mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
Merge commit '069500464de6a55b80e8341c79239b13ac8ef379' into develop
This commit is contained in:
@@ -68,6 +68,8 @@ set(GTEST_CXX_FLAGS
|
||||
-Wno-deprecated
|
||||
-Wno-unsafe-buffer-usage
|
||||
-Wno-float-equal
|
||||
-Wno-lifetime-safety-intra-tu-suggestions
|
||||
-Wno-lifetime-safety-cross-tu-suggestions
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
|
||||
@@ -106,7 +106,7 @@ struct bias_info
|
||||
return info;
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const bias_info& bi)
|
||||
friend std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, const bias_info& bi)
|
||||
{
|
||||
bi.serialize(os);
|
||||
return os;
|
||||
|
||||
@@ -191,7 +191,7 @@ struct mask_info
|
||||
return area;
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const mask_info& mi)
|
||||
friend std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, const mask_info& mi)
|
||||
{
|
||||
mi.serialize(os);
|
||||
return os;
|
||||
|
||||
@@ -8,6 +8,9 @@
|
||||
#include "ck_tile/core.hpp"
|
||||
#include "ck_tile/ops/fmha.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
// keep sync with BlockAttentionQuantScaleEnum
|
||||
enum class quant_scale_enum
|
||||
{
|
||||
@@ -58,3 +61,4 @@ struct quant_scale_info
|
||||
return os;
|
||||
}
|
||||
};
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
namespace ck {
|
||||
|
||||
template <typename T>
|
||||
std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
|
||||
std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, const std::vector<T>& v)
|
||||
{
|
||||
std::copy(std::begin(v), std::end(v), std::ostream_iterator<T>(os, " "));
|
||||
return os;
|
||||
@@ -27,7 +27,8 @@ std::ostream& operator<<(std::ostream& os, const std::array<T, N>& v)
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
std::ostream& operator<<(std::ostream& os, const TensorDescriptor<Ts...>& desc)
|
||||
std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os,
|
||||
const TensorDescriptor<Ts...>& desc)
|
||||
{
|
||||
constexpr index_t nDim = remove_cvref_t<decltype(desc)>::GetNumOfDimension();
|
||||
|
||||
|
||||
@@ -110,4 +110,5 @@ ConvParam parse_conv_param(int num_dim_spatial, int arg_idx, char* const argv[])
|
||||
} // namespace utils
|
||||
} // namespace ck
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const ck::utils::conv::ConvParam& p);
|
||||
std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os,
|
||||
const ck::utils::conv::ConvParam& p);
|
||||
|
||||
@@ -23,10 +23,14 @@
|
||||
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-cross-tu-suggestions"
|
||||
|
||||
namespace ck {
|
||||
|
||||
template <typename Range>
|
||||
std::ostream& LogRange(std::ostream& os, Range&& range, std::string delim)
|
||||
std::ostream& LogRange([[clang::lifetimebound]] std::ostream& os, Range&& range, std::string delim)
|
||||
{
|
||||
bool first = true;
|
||||
for(auto&& v : range)
|
||||
@@ -580,8 +584,9 @@ struct HostTensorDescriptor
|
||||
return std::inner_product(iss.begin(), iss.end(), mStrides.begin(), std::size_t{0});
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const HostTensorDescriptor& desc);
|
||||
friend std::ostream& operator<<(std::ostream& os, ChosenLayout tag);
|
||||
friend std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os,
|
||||
const HostTensorDescriptor& desc);
|
||||
friend std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, ChosenLayout tag);
|
||||
|
||||
private:
|
||||
std::vector<std::size_t> mLens;
|
||||
@@ -1171,3 +1176,4 @@ struct Tensor
|
||||
};
|
||||
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
#ifndef CK_STATIC_TENSOR_HPP
|
||||
#define CK_STATIC_TENSOR_HPP
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
namespace ck {
|
||||
|
||||
// StaticTensor for Scalar
|
||||
@@ -270,4 +272,5 @@ __host__ __device__ constexpr auto make_static_tensor(TensorDesc, X invalid_elem
|
||||
}
|
||||
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
#include "ck/utility/common_header.hpp"
|
||||
#include "ck/utility/multi_index.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck {
|
||||
|
||||
template <typename LowLength>
|
||||
@@ -29,7 +32,10 @@ struct PassThrough
|
||||
|
||||
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; }
|
||||
|
||||
__host__ __device__ constexpr const auto& GetUpperLengths() const { return up_lengths_; }
|
||||
__host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]]
|
||||
{
|
||||
return up_lengths_;
|
||||
}
|
||||
|
||||
template <typename LowIdx, typename UpIdx>
|
||||
__host__ __device__ static constexpr void CalculateLowerIndex(LowIdx& idx_low,
|
||||
@@ -305,7 +311,10 @@ struct RightPad
|
||||
|
||||
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; }
|
||||
|
||||
__host__ __device__ constexpr const auto& GetUpperLengths() const { return up_lengths_; }
|
||||
__host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]]
|
||||
{
|
||||
return up_lengths_;
|
||||
}
|
||||
|
||||
template <typename LowIdx, typename UpIdx>
|
||||
__host__ __device__ static constexpr void CalculateLowerIndex(LowIdx& idx_low,
|
||||
@@ -403,7 +412,10 @@ struct Embed
|
||||
|
||||
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return NDimUp; }
|
||||
|
||||
__host__ __device__ constexpr const auto& GetUpperLengths() const { return up_lengths_; }
|
||||
__host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]]
|
||||
{
|
||||
return up_lengths_;
|
||||
}
|
||||
|
||||
template <typename LowIdx, typename UpIdx>
|
||||
__host__ __device__ constexpr void CalculateLowerIndex(LowIdx& idx_low,
|
||||
@@ -1074,7 +1086,10 @@ struct Merge_v2_magic_division
|
||||
|
||||
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; }
|
||||
|
||||
__host__ __device__ constexpr const auto& GetUpperLengths() const { return up_lengths_; }
|
||||
__host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]]
|
||||
{
|
||||
return up_lengths_;
|
||||
}
|
||||
|
||||
template <typename LowIdx, typename UpIdx>
|
||||
__host__ __device__ constexpr void CalculateLowerIndex(LowIdx& idx_low,
|
||||
@@ -1366,7 +1381,10 @@ struct Merge_v3_division_mod
|
||||
|
||||
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; }
|
||||
|
||||
__host__ __device__ constexpr const auto& GetUpperLengths() const { return up_lengths_; }
|
||||
__host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]]
|
||||
{
|
||||
return up_lengths_;
|
||||
}
|
||||
|
||||
template <typename LowIdx, typename UpIdx>
|
||||
__host__ __device__ constexpr void CalculateLowerIndex(LowIdx& idx_low,
|
||||
@@ -1480,7 +1498,10 @@ struct UnMerge
|
||||
|
||||
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return NDimUp; }
|
||||
|
||||
__host__ __device__ constexpr const auto& GetUpperLengths() const { return up_lengths_; }
|
||||
__host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]]
|
||||
{
|
||||
return up_lengths_;
|
||||
}
|
||||
|
||||
template <typename LowIdx, typename UpIdx>
|
||||
__host__ __device__ constexpr void CalculateLowerIndex(LowIdx& idx_low,
|
||||
@@ -1640,7 +1661,10 @@ struct ConvBwdDataImplicitGemmOutTransform
|
||||
|
||||
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 3; }
|
||||
|
||||
__host__ __device__ constexpr const auto& GetUpperLengths() const { return up_lengths_; }
|
||||
__host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]]
|
||||
{
|
||||
return up_lengths_;
|
||||
}
|
||||
|
||||
template <typename UpIdx>
|
||||
__host__ __device__ constexpr auto CalculateLowerIndexN(const UpIdx& idx_up) const
|
||||
@@ -2236,3 +2260,4 @@ struct Xor
|
||||
}
|
||||
};
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -23,7 +23,10 @@ struct TensorAdaptor
|
||||
{
|
||||
__host__ __device__ static constexpr index_t GetNumOfTransform() { return Transforms::Size(); }
|
||||
|
||||
__host__ __device__ constexpr const auto& GetTransforms() const { return transforms_; }
|
||||
__host__ __device__ constexpr const auto& GetTransforms() const [[clang::lifetimebound]]
|
||||
{
|
||||
return transforms_;
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto GetLowerDimensionHiddenIdss()
|
||||
{
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
#include "ck/utility/sequence_helper.hpp"
|
||||
#include "ck/tensor_description/multi_index_transform.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
namespace ck {
|
||||
|
||||
template <index_t NDimHidden, typename VisibleDimensionIds>
|
||||
@@ -179,7 +181,10 @@ struct TensorDescriptor
|
||||
}
|
||||
|
||||
// TODO make these private
|
||||
__host__ __device__ constexpr const auto& GetTransforms() const { return transforms_; }
|
||||
__host__ __device__ constexpr const auto& GetTransforms() const [[clang::lifetimebound]]
|
||||
{
|
||||
return transforms_;
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto GetLowerDimensionIdss()
|
||||
{
|
||||
@@ -253,9 +258,12 @@ struct TensorCoordinate
|
||||
__host__ __device__ constexpr index_t GetOffset() const { return idx_hidden_[Number<0>{}]; }
|
||||
|
||||
// TODO make these private
|
||||
__host__ __device__ constexpr const auto& GetHiddenIndex() const { return idx_hidden_; }
|
||||
__host__ __device__ constexpr const auto& GetHiddenIndex() const [[clang::lifetimebound]]
|
||||
{
|
||||
return idx_hidden_;
|
||||
}
|
||||
|
||||
__host__ __device__ auto& GetHiddenIndex() { return idx_hidden_; }
|
||||
__host__ __device__ auto& GetHiddenIndex() [[clang::lifetimebound]] { return idx_hidden_; }
|
||||
|
||||
__host__ __device__ constexpr auto GetVisibleIndex() const
|
||||
{
|
||||
@@ -284,7 +292,7 @@ struct TensorCoordinateStep
|
||||
__host__ __device__ constexpr const auto& GetIndexDiff() const { return GetVisibleIndexDiff(); }
|
||||
|
||||
// TODO make these private
|
||||
__host__ __device__ constexpr const auto& GetVisibleIndexDiff() const
|
||||
__host__ __device__ constexpr const auto& GetVisibleIndexDiff() const [[clang::lifetimebound]]
|
||||
{
|
||||
return idx_diff_visible_;
|
||||
}
|
||||
@@ -613,3 +621,4 @@ using TensorCoordinateStep_t = decltype(make_tensor_coordinate_step(
|
||||
TensorDesc{}, MultiIndex<remove_cvref_t<TensorDesc>::GetNumOfDimension()>{}));
|
||||
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#include "ck/tensor_operation/gpu/warp/wmma_gemm.hpp"
|
||||
#include "ck/tensor_description/tensor_adaptor.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
namespace ck {
|
||||
|
||||
template <index_t BlockSize,
|
||||
@@ -485,3 +487,4 @@ struct BlockwiseGemmWmmaops_pipeline_base
|
||||
};
|
||||
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
// Prefetech 2 stage
|
||||
// Local prefetch 1 stage
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
namespace ck {
|
||||
|
||||
template <index_t BlockSize,
|
||||
@@ -992,3 +994,4 @@ struct BlockwiseGemmXdlops_pipeline_v4
|
||||
};
|
||||
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -9,6 +9,9 @@
|
||||
#include "ck/tensor_operation/gpu/warp/xdlops_gemm.hpp"
|
||||
#include "ck/tensor_description/tensor_adaptor.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck {
|
||||
|
||||
template <index_t BlockSize,
|
||||
@@ -404,3 +407,4 @@ struct BlockwiseGemmXdlops_pipeline_base
|
||||
};
|
||||
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -11,6 +11,9 @@
|
||||
|
||||
#define CK_MNK_LOOP
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck {
|
||||
|
||||
#ifdef __gfx12__
|
||||
@@ -1028,3 +1031,4 @@ struct BlockwiseGemmWMMA
|
||||
#endif
|
||||
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
#include "ck/tensor_operation/gpu/warp/xdlops_gemm.hpp"
|
||||
#include "ck/tensor_description/tensor_adaptor.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
namespace ck {
|
||||
|
||||
template <index_t MNXdlPerWave, index_t MNWaves, index_t MNPerXdl, typename TileDesc_K0_MN_K1>
|
||||
@@ -1031,3 +1033,4 @@ struct BlockwiseGemmXdlops_v2
|
||||
};
|
||||
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -8,6 +8,9 @@
|
||||
#include "ck/tensor_operation/gpu/warp/xdlops_gemm.hpp"
|
||||
#include "ck/tensor_description/tensor_adaptor.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck {
|
||||
|
||||
template <index_t BlockSize,
|
||||
@@ -317,3 +320,4 @@ struct BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1
|
||||
};
|
||||
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -455,7 +455,7 @@ struct G_NDHW : public BaseConvolutionLayout
|
||||
template <
|
||||
typename Layout,
|
||||
typename std::enable_if<std::is_base_of<BaseTensorLayout, Layout>::value, bool>::type = false>
|
||||
std::ostream& operator<<(std::ostream& os, const Layout&)
|
||||
std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, const Layout&)
|
||||
{
|
||||
os << Layout::name;
|
||||
return os;
|
||||
|
||||
@@ -17,6 +17,9 @@
|
||||
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_common.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/device_base.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck {
|
||||
|
||||
// Implementation of "Merge" transformation primitive that uses division and mod. It is supposed to
|
||||
@@ -1132,3 +1135,4 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_bwd_weight
|
||||
}; // namespace ck
|
||||
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -44,7 +44,8 @@ struct get_carrier<3>
|
||||
|
||||
// replacement of host std::copy_n()
|
||||
template <typename InputIterator, typename Size, typename OutputIterator>
|
||||
__device__ static OutputIterator copy_n(InputIterator from, Size size, OutputIterator to)
|
||||
__device__ static OutputIterator
|
||||
copy_n(InputIterator from, Size size, [[clang::lifetimebound]] OutputIterator to)
|
||||
{
|
||||
if(0 < size)
|
||||
{
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
#pragma once
|
||||
#include "ck/utility/data_type.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
namespace ck {
|
||||
|
||||
// vector_type
|
||||
@@ -116,7 +118,7 @@ struct vector_type<T, 2, typename ck::enable_if_t<is_native_type<T>()>>
|
||||
__host__ __device__ constexpr vector_type(type v) : data_{v} {}
|
||||
|
||||
template <typename X>
|
||||
__host__ __device__ constexpr const auto& AsType() const
|
||||
__host__ __device__ constexpr const auto& AsType() const [[clang::lifetimebound]]
|
||||
{
|
||||
static_assert(is_same<X, d1_t>::value || is_same<X, d2_t>::value,
|
||||
"Something went wrong, please check src and dst types.");
|
||||
@@ -136,7 +138,7 @@ struct vector_type<T, 2, typename ck::enable_if_t<is_native_type<T>()>>
|
||||
}
|
||||
|
||||
template <typename X>
|
||||
__host__ __device__ constexpr auto& AsType()
|
||||
__host__ __device__ constexpr auto& AsType() [[clang::lifetimebound]]
|
||||
{
|
||||
static_assert(is_same<X, d1_t>::value || is_same<X, d2_t>::value,
|
||||
"Something went wrong, please check src and dst types.");
|
||||
@@ -248,7 +250,7 @@ struct vector_type<T, 4, typename ck::enable_if_t<is_native_type<T>()>>
|
||||
__host__ __device__ constexpr vector_type(type v) : data_{v} {}
|
||||
|
||||
template <typename X>
|
||||
__host__ __device__ constexpr const auto& AsType() const
|
||||
__host__ __device__ constexpr const auto& AsType() const [[clang::lifetimebound]]
|
||||
{
|
||||
static_assert(is_same<X, d1_t>::value || is_same<X, d2_t>::value || is_same<X, d4_t>::value,
|
||||
"Something went wrong, please check src and dst types.");
|
||||
@@ -272,7 +274,7 @@ struct vector_type<T, 4, typename ck::enable_if_t<is_native_type<T>()>>
|
||||
}
|
||||
|
||||
template <typename X>
|
||||
__host__ __device__ constexpr auto& AsType()
|
||||
__host__ __device__ constexpr auto& AsType() [[clang::lifetimebound]]
|
||||
{
|
||||
static_assert(is_same<X, d1_t>::value || is_same<X, d2_t>::value || is_same<X, d4_t>::value,
|
||||
"Something went wrong, please check src and dst types.");
|
||||
@@ -583,7 +585,7 @@ struct vector_type<T, 8, typename ck::enable_if_t<is_native_type<T>()>>
|
||||
}
|
||||
|
||||
template <typename X>
|
||||
__host__ __device__ constexpr auto& AsType()
|
||||
__host__ __device__ constexpr auto& AsType() [[clang::lifetimebound]]
|
||||
{
|
||||
static_assert(is_same<X, d1_t>::value || is_same<X, d2_t>::value ||
|
||||
is_same<X, d4_t>::value || is_same<X, d8_t>::value,
|
||||
@@ -754,7 +756,7 @@ struct vector_type<T, 16, typename ck::enable_if_t<is_native_type<T>()>>
|
||||
}
|
||||
|
||||
template <typename X>
|
||||
__host__ __device__ constexpr auto& AsType()
|
||||
__host__ __device__ constexpr auto& AsType() [[clang::lifetimebound]]
|
||||
{
|
||||
static_assert(is_same<X, d1_t>::value || is_same<X, d2_t>::value ||
|
||||
is_same<X, d4_t>::value || is_same<X, d8_t>::value ||
|
||||
@@ -1427,7 +1429,7 @@ struct non_native_vector_base<
|
||||
}
|
||||
|
||||
template <typename X>
|
||||
__host__ __device__ constexpr auto& AsType()
|
||||
__host__ __device__ constexpr auto& AsType() [[clang::lifetimebound]]
|
||||
{
|
||||
static_assert(is_same_v<X, data_t> || is_same_v<X, T> || is_same_v<X, data_v>,
|
||||
"Something went wrong, please check src and dst types.");
|
||||
@@ -1627,7 +1629,7 @@ struct vector_type<T, 2, typename ck::enable_if_t<!is_native_type<T>()>>
|
||||
__host__ __device__ constexpr vector_type(type v) : data_{v} {}
|
||||
|
||||
template <typename X>
|
||||
__host__ __device__ constexpr const auto& AsType() const
|
||||
__host__ __device__ constexpr const auto& AsType() const [[clang::lifetimebound]]
|
||||
{
|
||||
static_assert(is_same<X, d1_t>::value || is_same<X, d1_nnv_t>::value ||
|
||||
is_same<X, d2_t>::value,
|
||||
@@ -1797,7 +1799,7 @@ struct vector_type<T, 8, typename ck::enable_if_t<!is_native_type<T>()>>
|
||||
}
|
||||
|
||||
template <typename X>
|
||||
__host__ __device__ constexpr auto& AsType()
|
||||
__host__ __device__ constexpr auto& AsType() [[clang::lifetimebound]]
|
||||
{
|
||||
static_assert(is_same<X, d1_t>::value || is_same<X, d1_nnv_t>::value ||
|
||||
is_same<X, d2_t>::value || is_same<X, d4_t>::value ||
|
||||
@@ -2284,3 +2286,4 @@ using pk_i4x4_t = typename vector_type<pk_i4_t, 4>::type;
|
||||
using pk_i4x8_t = typename vector_type<pk_i4_t, 8>::type;
|
||||
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -9,6 +9,9 @@
|
||||
#include <string_view>
|
||||
#include <map>
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck {
|
||||
namespace internal {
|
||||
template <typename T>
|
||||
@@ -188,5 +191,5 @@ void UpdateEnvVar(EnvVar, const std::string_view& val)
|
||||
// environment variable to enable logging:
|
||||
// export CK_LOGGING=ON or CK_LOGGING=1 or CK_LOGGING=ENABLED
|
||||
CK_DECLARE_ENV_VAR_BOOL(CK_LOGGING)
|
||||
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
@@ -25,7 +25,8 @@ enum struct PipelineVersion
|
||||
} // namespace ck
|
||||
|
||||
#if !defined(__HIPCC_RTC__) || !defined(CK_CODE_GEN_RTC)
|
||||
inline std::ostream& operator<<(std::ostream& os, const ck::PipelineVersion& p)
|
||||
inline std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os,
|
||||
const ck::PipelineVersion& p)
|
||||
{
|
||||
switch(p)
|
||||
{
|
||||
|
||||
@@ -70,7 +70,8 @@ enum struct TailNumber
|
||||
} // namespace ck
|
||||
|
||||
#if !defined(__HIPCC_RTC__) || !defined(CK_CODE_GEN_RTC)
|
||||
inline std::ostream& operator<<(std::ostream& os, const ck::LoopScheduler& s)
|
||||
inline std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os,
|
||||
const ck::LoopScheduler& s)
|
||||
{
|
||||
switch(s)
|
||||
{
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
|
||||
#include "statically_indexed_array.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
namespace ck {
|
||||
|
||||
// static buffer for scalar
|
||||
@@ -104,7 +106,7 @@ struct StaticBufferTupleOfVector
|
||||
// Set S
|
||||
// i is offset of S
|
||||
template <index_t I>
|
||||
__host__ __device__ constexpr S& operator()(Number<I> i)
|
||||
__host__ __device__ constexpr S& operator()(Number<I> i) [[clang::lifetimebound]]
|
||||
{
|
||||
constexpr auto i_v = i / s_per_v;
|
||||
constexpr auto i_s = i % s_per_v;
|
||||
@@ -195,3 +197,4 @@ __host__ __device__ constexpr auto make_static_buffer(LongNumber<N>)
|
||||
}
|
||||
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -51,7 +51,7 @@ get_tuple_element_data_reference(const TupleElementKeyData<Key, Data>& x)
|
||||
// for write access of tuple element
|
||||
template <typename Key, typename Data>
|
||||
__host__ __device__ constexpr Data&
|
||||
get_tuple_element_data_reference(TupleElementKeyData<Key, Data>& x)
|
||||
get_tuple_element_data_reference([[clang::lifetimebound]] TupleElementKeyData<Key, Data>& x)
|
||||
{
|
||||
return x.mData;
|
||||
}
|
||||
@@ -106,6 +106,7 @@ struct TupleImpl<Sequence<Is...>, Xs...> : TupleElementKeyData<TupleElementKey<I
|
||||
|
||||
template <index_t I>
|
||||
__host__ __device__ constexpr auto& GetElementDataByKey(TupleElementKey<I>)
|
||||
[[clang::lifetimebound]]
|
||||
{
|
||||
return get_tuple_element_data_reference<TupleElementKey<I>>(*this);
|
||||
}
|
||||
@@ -147,7 +148,7 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
|
||||
|
||||
// write access
|
||||
template <index_t I>
|
||||
__host__ __device__ constexpr auto& At(Number<I>)
|
||||
__host__ __device__ constexpr auto& At(Number<I>) [[clang::lifetimebound]]
|
||||
{
|
||||
static_assert(I < base::Size(), "wrong! out of range");
|
||||
return base::GetElementDataByKey(detail::TupleElementKey<I>{});
|
||||
@@ -162,7 +163,7 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
|
||||
|
||||
// write access
|
||||
template <index_t I>
|
||||
__host__ __device__ constexpr auto& operator()(Number<I> i)
|
||||
__host__ __device__ constexpr auto& operator()(Number<I> i) [[clang::lifetimebound]]
|
||||
{
|
||||
return At(i);
|
||||
}
|
||||
|
||||
@@ -5,6 +5,9 @@
|
||||
|
||||
#include "ck/wrapper/utils/layout_utils.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
// Disable from doxygen docs generation
|
||||
/// @cond INTERNAL
|
||||
namespace ck {
|
||||
@@ -482,3 +485,4 @@ struct Layout
|
||||
|
||||
} // namespace wrapper
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -7,6 +7,9 @@
|
||||
#include "utils/tensor_partition.hpp"
|
||||
#include "utils/layout_utils.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
// Disable from doxygen docs generation
|
||||
/// @cond INTERNAL
|
||||
namespace ck {
|
||||
@@ -441,3 +444,4 @@ struct Tensor
|
||||
|
||||
} // namespace wrapper
|
||||
} // namespace ck
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -11,6 +11,9 @@
|
||||
#include "ck_tile/core/utility/magic_div.hpp"
|
||||
#include "ck_tile/core/utility/print.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
enum struct coord_transform_enum
|
||||
@@ -1776,3 +1779,4 @@ make_indexing_transform_with_adaptor(const UpLength& up_lengths, const IndexingA
|
||||
}
|
||||
|
||||
} // namespace ck_tile
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -7,6 +7,9 @@
|
||||
#include "ck_tile/core/numeric/vector_type.hpp"
|
||||
#include "ck_tile/core/utility/ignore.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck_tile::core::arch::mma {
|
||||
|
||||
/**
|
||||
@@ -112,6 +115,7 @@ struct amdgcn_mma
|
||||
};
|
||||
|
||||
} // namespace ck_tile::core::arch::mma
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
// Include the implementations
|
||||
#include "wmma/wmma.hpp"
|
||||
|
||||
@@ -8,6 +8,9 @@
|
||||
#include "ck_tile/core/container/sequence.hpp"
|
||||
#include "ck_tile/core/container/tuple.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
// naive map
|
||||
@@ -157,3 +160,4 @@ CK_TILE_HOST_DEVICE static void print(const map<key, data, max_size>& m)
|
||||
}
|
||||
|
||||
} // namespace ck_tile
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -13,6 +13,9 @@
|
||||
#include <utility>
|
||||
#include <initializer_list>
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
#ifndef CK_TILE_TUPLE_IMPL
|
||||
#define CK_TILE_TUPLE_IMPL 1
|
||||
#endif
|
||||
@@ -98,13 +101,14 @@ CK_TILE_HOST_DEVICE constexpr T getv(const tuple_object<I, T, true>&)
|
||||
}
|
||||
|
||||
template <index_t I, class T>
|
||||
CK_TILE_HOST_DEVICE constexpr const T& getv(const tuple_object<I, T, false>& x)
|
||||
CK_TILE_HOST_DEVICE constexpr const T&
|
||||
getv([[clang::lifetimebound]] const tuple_object<I, T, false>& x)
|
||||
{
|
||||
return x.element;
|
||||
}
|
||||
|
||||
template <index_t I, class T>
|
||||
CK_TILE_HOST_DEVICE constexpr T& getv(tuple_object<I, T, false>& x)
|
||||
CK_TILE_HOST_DEVICE constexpr T& getv([[clang::lifetimebound]] tuple_object<I, T, false>& x)
|
||||
{
|
||||
return x.element;
|
||||
}
|
||||
@@ -292,7 +296,7 @@ struct tuple : impl::tuple_base<make_index_sequence<sizeof...(T)>, T...>
|
||||
//template <typename Tx> CK_TILE_HOST_DEVICE constexpr decltype(auto) get_as(index_t i) const { TP_COM_(); return reinterpret_cast<const tuple_array<Tx, size()>&>(*this).at(i); }
|
||||
template <typename Tx, index_t I> CK_TILE_HOST_DEVICE constexpr decltype(auto) get_as(number<I>) { TP_COM_(); return reinterpret_cast<tuple_array<Tx, size()>&>(*this).at(number<I>{}); }
|
||||
template <typename Tx, index_t I> CK_TILE_HOST_DEVICE constexpr decltype(auto) get_as(number<I>) const { TP_COM_(); return reinterpret_cast<const tuple_array<Tx, size()>&>(*this).at(number<I>{}); }
|
||||
|
||||
|
||||
// template <typename Tx> CK_TILE_HOST_DEVICE constexpr void set_as(index_t i, const Tx & x) { TP_COM_(); reinterpret_cast<tuple_array<Tx, size()>&>(*this).at(i) = x; }
|
||||
template <typename Tx, index_t I> CK_TILE_HOST_DEVICE constexpr void set_as(number<I>, const Tx & x) { TP_COM_(); reinterpret_cast<tuple_array<Tx, size()>&>(*this).at(number<I>{}) = x; }
|
||||
|
||||
@@ -864,3 +868,4 @@ struct tuple_element<I, const ck_tile::tuple<Ts...>>
|
||||
} \
|
||||
}()
|
||||
#endif
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
#include "ck_tile/core/config.hpp"
|
||||
#include "ck_tile/core/numeric/mxfp_convert.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
/**
|
||||
@@ -100,3 +103,4 @@ CK_TILE_HOST_DEVICE constexpr e8m0_bexp_t::operator float() const
|
||||
}
|
||||
|
||||
} // namespace ck_tile
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -9,6 +9,9 @@
|
||||
#include "ck_tile/core/numeric/float8.hpp"
|
||||
#include "ck_tile/core/numeric/mxfp_convert.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
#if defined(__gfx950__)
|
||||
#define CK_TILE_FP4_CVT_DEVICE 1
|
||||
#else
|
||||
@@ -517,3 +520,4 @@ CK_TILE_HOST_DEVICE constexpr fp8x2_t pk_fp4_t::to_fp8x2(float scale) const
|
||||
#endif
|
||||
|
||||
} // namespace ck_tile
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -14,6 +14,9 @@
|
||||
#include "ck_tile/core/tensor/tile_distribution.hpp"
|
||||
#include "ck_tile/core/container/thread_buffer.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
template <typename DataType_, typename StaticTileDistribution_>
|
||||
@@ -266,3 +269,4 @@ inline constexpr bool is_similiar_distributed_tensor_v =
|
||||
} // namespace detail
|
||||
|
||||
} // namespace ck_tile
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -12,6 +12,9 @@
|
||||
#include "ck_tile/core/utility/type_traits.hpp"
|
||||
#include "ck_tile/core/numeric/numeric.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
// Transforms: Tuple<transforms...>
|
||||
@@ -950,3 +953,4 @@ CK_TILE_HOST_DEVICE constexpr auto chain_tensor_adaptors(const X& x, const Xs&..
|
||||
remove_cvref_t<decltype(bottom_dim_ids)>, \
|
||||
remove_cvref_t<decltype(top_dim_ids)>>{trans}; \
|
||||
}()
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -14,6 +14,9 @@
|
||||
#include "ck_tile/core/utility/type_traits.hpp"
|
||||
#include "ck_tile/core/utility/print.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
template <index_t NDimHidden, typename BottomDimensionHiddenIds, typename TopDimensionHiddenIds>
|
||||
@@ -367,3 +370,4 @@ CK_TILE_HOST_DEVICE void print(const tensor_adaptor_coordinate<N, B, T>& coord)
|
||||
detail::CK_PRINT_X_<>{}(coord);
|
||||
}
|
||||
} // namespace ck_tile
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -14,6 +14,9 @@
|
||||
#include "ck_tile/core/utility/functional.hpp"
|
||||
#include "ck_tile/core/utility/type_traits.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
/*
|
||||
@@ -582,3 +585,4 @@ pad_tensor_view(const TensorView& tensor_view, const TileLengths& tile_lengths,
|
||||
}
|
||||
|
||||
} // namespace ck_tile
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -15,6 +15,9 @@
|
||||
#include "ck_tile/core/utility/functional.hpp"
|
||||
#include "ck_tile/core/utility/type_traits.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
template <typename Distribution>
|
||||
@@ -731,3 +734,4 @@ CK_TILE_HOST_DEVICE void print(const tile_distribution<PsYs2XsAdaptor_,
|
||||
}
|
||||
|
||||
} // namespace ck_tile
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
template <typename... Args>
|
||||
@@ -206,3 +209,4 @@ void UpdateEnvVar(EnvVar, const std::string_view& val)
|
||||
// environment variable to enable logging:
|
||||
// export CK_TILE_LOGGING=ON or CK_TILE_LOGGING=1 or CK_TILE_LOGGING=ENABLED
|
||||
CK_TILE_DECLARE_ENV_VAR_BOOL(CK_TILE_LOGGING)
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#include <stdint.h>
|
||||
#include <utility>
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
namespace ck_tile {
|
||||
|
||||
namespace detail {
|
||||
@@ -270,3 +272,4 @@ constexpr auto conditional_expr(X&& x, Y&& y)
|
||||
}
|
||||
|
||||
} // namespace ck_tile
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -13,6 +13,9 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck_tile {
|
||||
/*
|
||||
* a host side utility, arg parser for, either
|
||||
@@ -234,3 +237,4 @@ class ArgParser
|
||||
std::vector<std::string> keys;
|
||||
};
|
||||
} // namespace ck_tile
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -17,6 +17,9 @@
|
||||
#include "ck_tile/host/joinable_thread.hpp"
|
||||
#include "ck_tile/host/ranges.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
template <typename Range>
|
||||
@@ -859,3 +862,4 @@ auto get_default_stride(std::size_t row,
|
||||
return stride;
|
||||
}
|
||||
} // namespace ck_tile
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -41,7 +41,8 @@ enum struct TailNumber
|
||||
|
||||
} // namespace ck_tile
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const ck_tile::GemmPipelineScheduler& s)
|
||||
inline std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os,
|
||||
const ck_tile::GemmPipelineScheduler& s)
|
||||
{
|
||||
switch(s)
|
||||
{
|
||||
@@ -53,7 +54,8 @@ inline std::ostream& operator<<(std::ostream& os, const ck_tile::GemmPipelineSch
|
||||
return os;
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const ck_tile::TailNumber& s)
|
||||
inline std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os,
|
||||
const ck_tile::TailNumber& s)
|
||||
{
|
||||
switch(s)
|
||||
{
|
||||
|
||||
@@ -9,6 +9,9 @@
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
class ProfilerOperationRegistry final
|
||||
{
|
||||
ProfilerOperationRegistry() = default;
|
||||
@@ -83,3 +86,4 @@ class ProfilerOperationRegistry final
|
||||
::ProfilerOperationRegistry::GetInstance().Add(name, description, operation) \
|
||||
_Pragma("clang diagnostic pop")
|
||||
// clang-format on
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -9,6 +9,9 @@
|
||||
#include "ck_tile/core.hpp"
|
||||
#include "ck_tile/ops/fmha.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
#ifndef TEST_ALIBI_VERBOSE
|
||||
#define TEST_ALIBI_VERBOSE 0
|
||||
#endif
|
||||
@@ -213,3 +216,4 @@ int main()
|
||||
// clang-format on
|
||||
return rtn ? 0 : -1;
|
||||
}
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -13,6 +13,9 @@
|
||||
#include "ck_tile/host.hpp"
|
||||
#include "gemm_multi_d_common.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-seggestions"
|
||||
|
||||
// Data types and Layouts are defined by the generated kernel headers
|
||||
// No hardcoded type definitions here to avoid conflicts
|
||||
|
||||
@@ -230,3 +233,4 @@ void gemm_multi_d_host_reference(int verify,
|
||||
a_m_k, b_k_n, {d0_m_n, d1_m_n}, c_m_n_host_result);
|
||||
}
|
||||
}
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -7,6 +7,9 @@
|
||||
#include "ck_tile/host.hpp"
|
||||
#include "gemm_preshuffle_common.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
//[TODO] Move parts of this File to commons
|
||||
enum class Metric
|
||||
{
|
||||
@@ -234,3 +237,4 @@ void gemm_host_reference(int verify,
|
||||
c_m_n_gpu_buf_ref.FromDevice(c_m_n_ref.data());
|
||||
}
|
||||
}
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
#include "ck_tile/host.hpp"
|
||||
#include "gemm_common.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
// Data types and Layouts are defined by the generated kernel headers
|
||||
// No hardcoded type definitions here to avoid conflicts
|
||||
|
||||
@@ -240,3 +242,4 @@ void gemm_host_reference(int verify,
|
||||
c_m_n_gpu_buf_ref.FromDevice(c_m_n_host_result.data());
|
||||
}
|
||||
}
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -17,6 +17,9 @@
|
||||
// Data types and Layouts are defined by the generated kernel headers
|
||||
// No hardcoded type definitions here to avoid conflicts
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions"
|
||||
|
||||
enum class Metric
|
||||
{
|
||||
LATENCY = 0,
|
||||
@@ -199,3 +202,4 @@ void gemm_host_reference(int verify,
|
||||
c_m_n_gpu_buf_ref.FromDevice(c_m_n_host_result.data());
|
||||
}
|
||||
}
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
Reference in New Issue
Block a user