mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-21 13:29:20 +00:00
Merge commit '57e0f5df29abefd919c334c994628a994ba2868c' into develop
This commit is contained in:
@@ -360,10 +360,9 @@ struct Tensor
|
||||
|
||||
std::size_t GetElementSpaceSize() const
|
||||
{
|
||||
if constexpr(ck::is_same_v<ck::remove_cvref_t<T>, ck::pk_i4_t> ||
|
||||
ck::is_same_v<ck::remove_cvref_t<T>, ck::f4x2_pk_t>)
|
||||
if constexpr(ck::is_packed_type_v<ck::remove_cvref_t<T>>)
|
||||
{
|
||||
return (mDesc.GetElementSpaceSize() + 1) / 2;
|
||||
return (mDesc.GetElementSpaceSize() + 1) / ck::packed_size_v<ck::remove_cvref_t<T>>;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -516,69 +515,31 @@ struct Tensor
|
||||
template <typename... Is>
|
||||
std::size_t GetOffsetFromMultiIndex(Is... is) const
|
||||
{
|
||||
if constexpr(ck::is_same_v<ck::remove_cvref_t<T>, ck::pk_i4_t> ||
|
||||
ck::is_same_v<ck::remove_cvref_t<T>, ck::f4x2_pk_t>)
|
||||
{
|
||||
return mDesc.GetOffsetFromMultiIndex(is...) / 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
return mDesc.GetOffsetFromMultiIndex(is...);
|
||||
}
|
||||
return mDesc.GetOffsetFromMultiIndex(is...) / ck::packed_size_v<ck::remove_cvref_t<T>>;
|
||||
}
|
||||
|
||||
template <typename... Is>
|
||||
T& operator()(Is... is)
|
||||
{
|
||||
if constexpr(ck::is_same_v<ck::remove_cvref_t<T>, ck::pk_i4_t> ||
|
||||
ck::is_same_v<ck::remove_cvref_t<T>, ck::f4x2_pk_t>)
|
||||
{
|
||||
return mData[mDesc.GetOffsetFromMultiIndex(is...) / 2];
|
||||
}
|
||||
else
|
||||
{
|
||||
return mData[mDesc.GetOffsetFromMultiIndex(is...)];
|
||||
}
|
||||
return mData[mDesc.GetOffsetFromMultiIndex(is...) /
|
||||
ck::packed_size_v<ck::remove_cvref_t<T>>];
|
||||
}
|
||||
|
||||
template <typename... Is>
|
||||
const T& operator()(Is... is) const
|
||||
{
|
||||
if constexpr(ck::is_same_v<ck::remove_cvref_t<T>, ck::pk_i4_t> ||
|
||||
ck::is_same_v<ck::remove_cvref_t<T>, ck::f4x2_pk_t>)
|
||||
{
|
||||
return mData[mDesc.GetOffsetFromMultiIndex(is...) / 2];
|
||||
}
|
||||
else
|
||||
{
|
||||
return mData[mDesc.GetOffsetFromMultiIndex(is...)];
|
||||
}
|
||||
return mData[mDesc.GetOffsetFromMultiIndex(is...) /
|
||||
ck::packed_size_v<ck::remove_cvref_t<T>>];
|
||||
}
|
||||
|
||||
T& operator()(std::vector<std::size_t> idx)
|
||||
{
|
||||
if constexpr(ck::is_same_v<ck::remove_cvref_t<T>, ck::pk_i4_t> ||
|
||||
ck::is_same_v<ck::remove_cvref_t<T>, ck::f4x2_pk_t>)
|
||||
{
|
||||
return mData[mDesc.GetOffsetFromMultiIndex(idx) / 2];
|
||||
}
|
||||
else
|
||||
{
|
||||
return mData[mDesc.GetOffsetFromMultiIndex(idx)];
|
||||
}
|
||||
return mData[mDesc.GetOffsetFromMultiIndex(idx) / ck::packed_size_v<ck::remove_cvref_t<T>>];
|
||||
}
|
||||
|
||||
const T& operator()(std::vector<std::size_t> idx) const
|
||||
{
|
||||
if constexpr(ck::is_same_v<ck::remove_cvref_t<T>, ck::pk_i4_t> ||
|
||||
ck::is_same_v<ck::remove_cvref_t<T>, ck::f4x2_pk_t>)
|
||||
{
|
||||
return mData[mDesc.GetOffsetFromMultiIndex(idx) / 2];
|
||||
}
|
||||
else
|
||||
{
|
||||
return mData[mDesc.GetOffsetFromMultiIndex(idx)];
|
||||
}
|
||||
return mData[mDesc.GetOffsetFromMultiIndex(idx) / ck::packed_size_v<ck::remove_cvref_t<T>>];
|
||||
}
|
||||
|
||||
typename Data::iterator begin() { return mData.begin(); }
|
||||
|
||||
@@ -67,6 +67,18 @@ struct GeneratorTensor_1<ck::f8_t>
|
||||
return ck::type_convert<ck::f8_t>(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GeneratorTensor_1<ck::bf8_t>
|
||||
{
|
||||
float value = 1.0;
|
||||
|
||||
template <typename... Is>
|
||||
ck::bf8_t operator()(Is...)
|
||||
{
|
||||
return ck::type_convert<ck::bf8_t>(value);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template <>
|
||||
@@ -93,6 +105,38 @@ struct GeneratorTensor_1<ck::f4x2_pk_t>
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GeneratorTensor_1<ck::f6x32_pk_t>
|
||||
{
|
||||
float value = 1.0;
|
||||
|
||||
template <typename... Is>
|
||||
ck::f6x32_pk_t operator()(Is...)
|
||||
{
|
||||
ck::f6x32_pk_t r;
|
||||
ck::static_for<0, 32, 1>{}([&](auto i) {
|
||||
r.pack(ck::type_convert<ck::f6_t>(value), static_cast<ck::index_t>(i));
|
||||
});
|
||||
return r;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GeneratorTensor_1<ck::bf6x32_pk_t>
|
||||
{
|
||||
float value = 1.0;
|
||||
|
||||
template <typename... Is>
|
||||
ck::bf6x32_pk_t operator()(Is...)
|
||||
{
|
||||
ck::bf6x32_pk_t r;
|
||||
ck::static_for<0, 32, 1>{}([&](auto i) {
|
||||
r.pack(ck::type_convert<ck::bf6_t>(value), static_cast<ck::index_t>(i));
|
||||
});
|
||||
return r;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GeneratorTensor_1<int8_t>
|
||||
{
|
||||
@@ -132,6 +176,44 @@ struct GeneratorTensor_2
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GeneratorTensor_2<ck::f6x32_pk_t>
|
||||
{
|
||||
int min_value = 0;
|
||||
int max_value = 1;
|
||||
|
||||
template <typename... Is>
|
||||
ck::f6x32_pk_t operator()(Is...)
|
||||
{
|
||||
ck::f6x32_pk_t r;
|
||||
ck::static_for<0, 32, 1>{}([&](auto i) {
|
||||
float tmp = (std::rand() % (max_value - min_value)) + min_value;
|
||||
r.pack(ck::type_convert<ck::f6_t>(tmp), static_cast<ck::index_t>(i));
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GeneratorTensor_2<ck::bf6x32_pk_t>
|
||||
{
|
||||
int min_value = 0;
|
||||
int max_value = 1;
|
||||
|
||||
template <typename... Is>
|
||||
ck::bf6x32_pk_t operator()(Is...)
|
||||
{
|
||||
ck::bf6x32_pk_t r;
|
||||
ck::static_for<0, 32, 1>{}([&](auto i) {
|
||||
float tmp = (std::rand() % (max_value - min_value)) + min_value;
|
||||
r.pack(ck::type_convert<ck::bf6_t>(tmp), static_cast<ck::index_t>(i));
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GeneratorTensor_2<ck::bhalf_t>
|
||||
{
|
||||
@@ -342,6 +424,46 @@ struct GeneratorTensor_3<ck::f4x2_pk_t>
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GeneratorTensor_3<ck::f6x32_pk_t>
|
||||
{
|
||||
float min_value = 0;
|
||||
float max_value = 1;
|
||||
|
||||
template <typename... Is>
|
||||
ck::f6x32_pk_t operator()(Is...)
|
||||
{
|
||||
ck::f6x32_pk_t r;
|
||||
ck::static_for<0, 32, 1>{}([&](auto i) {
|
||||
float rnd = float(std::rand()) / float(RAND_MAX);
|
||||
float fp32 = min_value + rnd * (max_value - min_value);
|
||||
r.pack(ck::type_convert<ck::f6_t>(fp32), static_cast<ck::index_t>(i));
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GeneratorTensor_3<ck::bf6x32_pk_t>
|
||||
{
|
||||
float min_value = 0;
|
||||
float max_value = 1;
|
||||
|
||||
template <typename... Is>
|
||||
ck::bf6x32_pk_t operator()(Is...)
|
||||
{
|
||||
ck::bf6x32_pk_t r;
|
||||
ck::static_for<0, 32, 1>{}([&](auto i) {
|
||||
float rnd = float(std::rand()) / float(RAND_MAX);
|
||||
float fp32 = min_value + rnd * (max_value - min_value);
|
||||
r.pack(ck::type_convert<ck::bf6_t>(fp32), static_cast<ck::index_t>(i));
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct GeneratorTensor_4
|
||||
{
|
||||
@@ -360,6 +482,69 @@ struct GeneratorTensor_4
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GeneratorTensor_4<ck::f4x2_pk_t>
|
||||
{
|
||||
std::mt19937 generator;
|
||||
std::normal_distribution<float> distribution;
|
||||
|
||||
GeneratorTensor_4(float mean, float stddev, unsigned int seed = 1)
|
||||
: generator(seed), distribution(mean, stddev){};
|
||||
|
||||
template <typename... Is>
|
||||
ck::f4x2_pk_t operator()(Is...)
|
||||
{
|
||||
float fp32_tmp0 = distribution(generator);
|
||||
float fp32_tmp1 = distribution(generator);
|
||||
|
||||
return ck::f4x2_pk_t{ck::type_convert<ck::f4x2_t>(ck::float2_t{fp32_tmp0, fp32_tmp1})};
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GeneratorTensor_4<ck::f6x32_pk_t>
|
||||
{
|
||||
std::mt19937 generator;
|
||||
std::normal_distribution<float> distribution;
|
||||
|
||||
GeneratorTensor_4(float mean, float stddev, unsigned int seed = 1)
|
||||
: generator(seed), distribution(mean, stddev){};
|
||||
|
||||
template <typename... Is>
|
||||
ck::f6x32_pk_t operator()(Is...)
|
||||
{
|
||||
ck::f6x32_pk_t r;
|
||||
ck::static_for<0, 32, 1>{}([&](auto i) {
|
||||
r.pack(ck::type_convert<ck::f6_t>(distribution(generator)),
|
||||
static_cast<ck::index_t>(i));
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GeneratorTensor_4<ck::bf6x32_pk_t>
|
||||
{
|
||||
std::mt19937 generator;
|
||||
std::normal_distribution<float> distribution;
|
||||
|
||||
GeneratorTensor_4(float mean, float stddev, unsigned int seed = 1)
|
||||
: generator(seed), distribution(mean, stddev){};
|
||||
|
||||
template <typename... Is>
|
||||
ck::bf6x32_pk_t operator()(Is...)
|
||||
{
|
||||
ck::bf6x32_pk_t r;
|
||||
ck::static_for<0, 32, 1>{}([&](auto i) {
|
||||
r.pack(ck::type_convert<ck::bf6_t>(distribution(generator)),
|
||||
static_cast<ck::index_t>(i));
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
};
|
||||
|
||||
struct GeneratorTensor_Checkboard
|
||||
{
|
||||
template <typename... Ts>
|
||||
@@ -405,6 +590,53 @@ struct GeneratorTensor_Sequential
|
||||
}
|
||||
};
|
||||
|
||||
template <ck::index_t Dim>
|
||||
struct GeneratorTensor_Sequential<ck::f4x2_pk_t, Dim>
|
||||
{
|
||||
template <typename... Ts>
|
||||
ck::f4x2_pk_t operator()(Ts... Xs) const
|
||||
{
|
||||
std::array<ck::index_t, sizeof...(Ts)> dims = {{static_cast<ck::index_t>(Xs)...}};
|
||||
|
||||
float tmp = dims[Dim];
|
||||
return ck::type_convert<ck::f4x2_t>(ck::float2_t(tmp));
|
||||
}
|
||||
};
|
||||
|
||||
template <ck::index_t Dim>
|
||||
struct GeneratorTensor_Sequential<ck::f6x32_pk_t, Dim>
|
||||
{
|
||||
template <typename... Ts>
|
||||
ck::f6x32_pk_t operator()(Ts... Xs) const
|
||||
{
|
||||
std::array<ck::index_t, sizeof...(Ts)> dims = {{static_cast<ck::index_t>(Xs)...}};
|
||||
|
||||
float tmp = dims[Dim];
|
||||
|
||||
ck::f6x32_pk_t r;
|
||||
ck::static_for<0, 32, 1>{}(
|
||||
[&](auto i) { r.pack(ck::type_convert<ck::f6_t>(tmp), static_cast<ck::index_t>(i)); });
|
||||
return r;
|
||||
}
|
||||
};
|
||||
|
||||
template <ck::index_t Dim>
|
||||
struct GeneratorTensor_Sequential<ck::bf6x32_pk_t, Dim>
|
||||
{
|
||||
template <typename... Ts>
|
||||
ck::bf6x32_pk_t operator()(Ts... Xs) const
|
||||
{
|
||||
std::array<ck::index_t, sizeof...(Ts)> dims = {{static_cast<ck::index_t>(Xs)...}};
|
||||
|
||||
float tmp = dims[Dim];
|
||||
|
||||
ck::bf6x32_pk_t r;
|
||||
ck::static_for<0, 32, 1>{}(
|
||||
[&](auto i) { r.pack(ck::type_convert<ck::bf6_t>(tmp), static_cast<ck::index_t>(i)); });
|
||||
return r;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, size_t NumEffectiveDim = 2>
|
||||
struct GeneratorTensor_Diagonal
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user