Merge commit '57e0f5df29abefd919c334c994628a994ba2868c' into develop

This commit is contained in:
assistant-librarian[bot]
2025-05-19 22:06:56 +00:00
parent 0b87df9c4a
commit 9d088bc569
15 changed files with 1602 additions and 588 deletions

View File

@@ -360,10 +360,9 @@ struct Tensor
std::size_t GetElementSpaceSize() const
{
if constexpr(ck::is_same_v<ck::remove_cvref_t<T>, ck::pk_i4_t> ||
ck::is_same_v<ck::remove_cvref_t<T>, ck::f4x2_pk_t>)
if constexpr(ck::is_packed_type_v<ck::remove_cvref_t<T>>)
{
return (mDesc.GetElementSpaceSize() + 1) / 2;
return (mDesc.GetElementSpaceSize() + 1) / ck::packed_size_v<ck::remove_cvref_t<T>>;
}
else
{
@@ -516,69 +515,31 @@ struct Tensor
template <typename... Is>
std::size_t GetOffsetFromMultiIndex(Is... is) const
{
if constexpr(ck::is_same_v<ck::remove_cvref_t<T>, ck::pk_i4_t> ||
ck::is_same_v<ck::remove_cvref_t<T>, ck::f4x2_pk_t>)
{
return mDesc.GetOffsetFromMultiIndex(is...) / 2;
}
else
{
return mDesc.GetOffsetFromMultiIndex(is...);
}
return mDesc.GetOffsetFromMultiIndex(is...) / ck::packed_size_v<ck::remove_cvref_t<T>>;
}
template <typename... Is>
T& operator()(Is... is)
{
if constexpr(ck::is_same_v<ck::remove_cvref_t<T>, ck::pk_i4_t> ||
ck::is_same_v<ck::remove_cvref_t<T>, ck::f4x2_pk_t>)
{
return mData[mDesc.GetOffsetFromMultiIndex(is...) / 2];
}
else
{
return mData[mDesc.GetOffsetFromMultiIndex(is...)];
}
return mData[mDesc.GetOffsetFromMultiIndex(is...) /
ck::packed_size_v<ck::remove_cvref_t<T>>];
}
template <typename... Is>
const T& operator()(Is... is) const
{
if constexpr(ck::is_same_v<ck::remove_cvref_t<T>, ck::pk_i4_t> ||
ck::is_same_v<ck::remove_cvref_t<T>, ck::f4x2_pk_t>)
{
return mData[mDesc.GetOffsetFromMultiIndex(is...) / 2];
}
else
{
return mData[mDesc.GetOffsetFromMultiIndex(is...)];
}
return mData[mDesc.GetOffsetFromMultiIndex(is...) /
ck::packed_size_v<ck::remove_cvref_t<T>>];
}
T& operator()(std::vector<std::size_t> idx)
{
if constexpr(ck::is_same_v<ck::remove_cvref_t<T>, ck::pk_i4_t> ||
ck::is_same_v<ck::remove_cvref_t<T>, ck::f4x2_pk_t>)
{
return mData[mDesc.GetOffsetFromMultiIndex(idx) / 2];
}
else
{
return mData[mDesc.GetOffsetFromMultiIndex(idx)];
}
return mData[mDesc.GetOffsetFromMultiIndex(idx) / ck::packed_size_v<ck::remove_cvref_t<T>>];
}
const T& operator()(std::vector<std::size_t> idx) const
{
if constexpr(ck::is_same_v<ck::remove_cvref_t<T>, ck::pk_i4_t> ||
ck::is_same_v<ck::remove_cvref_t<T>, ck::f4x2_pk_t>)
{
return mData[mDesc.GetOffsetFromMultiIndex(idx) / 2];
}
else
{
return mData[mDesc.GetOffsetFromMultiIndex(idx)];
}
return mData[mDesc.GetOffsetFromMultiIndex(idx) / ck::packed_size_v<ck::remove_cvref_t<T>>];
}
typename Data::iterator begin() { return mData.begin(); }

View File

@@ -67,6 +67,18 @@ struct GeneratorTensor_1<ck::f8_t>
return ck::type_convert<ck::f8_t>(value);
}
};
template <>
struct GeneratorTensor_1<ck::bf8_t>
{
float value = 1.0;
template <typename... Is>
ck::bf8_t operator()(Is...)
{
return ck::type_convert<ck::bf8_t>(value);
}
};
#endif
template <>
@@ -93,6 +105,38 @@ struct GeneratorTensor_1<ck::f4x2_pk_t>
}
};
template <>
struct GeneratorTensor_1<ck::f6x32_pk_t>
{
float value = 1.0;
template <typename... Is>
ck::f6x32_pk_t operator()(Is...)
{
ck::f6x32_pk_t r;
ck::static_for<0, 32, 1>{}([&](auto i) {
r.pack(ck::type_convert<ck::f6_t>(value), static_cast<ck::index_t>(i));
});
return r;
}
};
template <>
struct GeneratorTensor_1<ck::bf6x32_pk_t>
{
float value = 1.0;
template <typename... Is>
ck::bf6x32_pk_t operator()(Is...)
{
ck::bf6x32_pk_t r;
ck::static_for<0, 32, 1>{}([&](auto i) {
r.pack(ck::type_convert<ck::bf6_t>(value), static_cast<ck::index_t>(i));
});
return r;
}
};
template <>
struct GeneratorTensor_1<int8_t>
{
@@ -132,6 +176,44 @@ struct GeneratorTensor_2
}
};
template <>
struct GeneratorTensor_2<ck::f6x32_pk_t>
{
int min_value = 0;
int max_value = 1;
template <typename... Is>
ck::f6x32_pk_t operator()(Is...)
{
ck::f6x32_pk_t r;
ck::static_for<0, 32, 1>{}([&](auto i) {
float tmp = (std::rand() % (max_value - min_value)) + min_value;
r.pack(ck::type_convert<ck::f6_t>(tmp), static_cast<ck::index_t>(i));
});
return r;
}
};
template <>
struct GeneratorTensor_2<ck::bf6x32_pk_t>
{
int min_value = 0;
int max_value = 1;
template <typename... Is>
ck::bf6x32_pk_t operator()(Is...)
{
ck::bf6x32_pk_t r;
ck::static_for<0, 32, 1>{}([&](auto i) {
float tmp = (std::rand() % (max_value - min_value)) + min_value;
r.pack(ck::type_convert<ck::bf6_t>(tmp), static_cast<ck::index_t>(i));
});
return r;
}
};
template <>
struct GeneratorTensor_2<ck::bhalf_t>
{
@@ -342,6 +424,46 @@ struct GeneratorTensor_3<ck::f4x2_pk_t>
}
};
template <>
struct GeneratorTensor_3<ck::f6x32_pk_t>
{
float min_value = 0;
float max_value = 1;
template <typename... Is>
ck::f6x32_pk_t operator()(Is...)
{
ck::f6x32_pk_t r;
ck::static_for<0, 32, 1>{}([&](auto i) {
float rnd = float(std::rand()) / float(RAND_MAX);
float fp32 = min_value + rnd * (max_value - min_value);
r.pack(ck::type_convert<ck::f6_t>(fp32), static_cast<ck::index_t>(i));
});
return r;
}
};
template <>
struct GeneratorTensor_3<ck::bf6x32_pk_t>
{
float min_value = 0;
float max_value = 1;
template <typename... Is>
ck::bf6x32_pk_t operator()(Is...)
{
ck::bf6x32_pk_t r;
ck::static_for<0, 32, 1>{}([&](auto i) {
float rnd = float(std::rand()) / float(RAND_MAX);
float fp32 = min_value + rnd * (max_value - min_value);
r.pack(ck::type_convert<ck::bf6_t>(fp32), static_cast<ck::index_t>(i));
});
return r;
}
};
template <typename T>
struct GeneratorTensor_4
{
@@ -360,6 +482,69 @@ struct GeneratorTensor_4
}
};
template <>
struct GeneratorTensor_4<ck::f4x2_pk_t>
{
std::mt19937 generator;
std::normal_distribution<float> distribution;
GeneratorTensor_4(float mean, float stddev, unsigned int seed = 1)
: generator(seed), distribution(mean, stddev){};
template <typename... Is>
ck::f4x2_pk_t operator()(Is...)
{
float fp32_tmp0 = distribution(generator);
float fp32_tmp1 = distribution(generator);
return ck::f4x2_pk_t{ck::type_convert<ck::f4x2_t>(ck::float2_t{fp32_tmp0, fp32_tmp1})};
}
};
template <>
struct GeneratorTensor_4<ck::f6x32_pk_t>
{
std::mt19937 generator;
std::normal_distribution<float> distribution;
GeneratorTensor_4(float mean, float stddev, unsigned int seed = 1)
: generator(seed), distribution(mean, stddev){};
template <typename... Is>
ck::f6x32_pk_t operator()(Is...)
{
ck::f6x32_pk_t r;
ck::static_for<0, 32, 1>{}([&](auto i) {
r.pack(ck::type_convert<ck::f6_t>(distribution(generator)),
static_cast<ck::index_t>(i));
});
return r;
}
};
template <>
struct GeneratorTensor_4<ck::bf6x32_pk_t>
{
std::mt19937 generator;
std::normal_distribution<float> distribution;
GeneratorTensor_4(float mean, float stddev, unsigned int seed = 1)
: generator(seed), distribution(mean, stddev){};
template <typename... Is>
ck::bf6x32_pk_t operator()(Is...)
{
ck::bf6x32_pk_t r;
ck::static_for<0, 32, 1>{}([&](auto i) {
r.pack(ck::type_convert<ck::bf6_t>(distribution(generator)),
static_cast<ck::index_t>(i));
});
return r;
}
};
struct GeneratorTensor_Checkboard
{
template <typename... Ts>
@@ -405,6 +590,53 @@ struct GeneratorTensor_Sequential
}
};
template <ck::index_t Dim>
struct GeneratorTensor_Sequential<ck::f4x2_pk_t, Dim>
{
template <typename... Ts>
ck::f4x2_pk_t operator()(Ts... Xs) const
{
std::array<ck::index_t, sizeof...(Ts)> dims = {{static_cast<ck::index_t>(Xs)...}};
float tmp = dims[Dim];
return ck::type_convert<ck::f4x2_t>(ck::float2_t(tmp));
}
};
template <ck::index_t Dim>
struct GeneratorTensor_Sequential<ck::f6x32_pk_t, Dim>
{
template <typename... Ts>
ck::f6x32_pk_t operator()(Ts... Xs) const
{
std::array<ck::index_t, sizeof...(Ts)> dims = {{static_cast<ck::index_t>(Xs)...}};
float tmp = dims[Dim];
ck::f6x32_pk_t r;
ck::static_for<0, 32, 1>{}(
[&](auto i) { r.pack(ck::type_convert<ck::f6_t>(tmp), static_cast<ck::index_t>(i)); });
return r;
}
};
template <ck::index_t Dim>
struct GeneratorTensor_Sequential<ck::bf6x32_pk_t, Dim>
{
template <typename... Ts>
ck::bf6x32_pk_t operator()(Ts... Xs) const
{
std::array<ck::index_t, sizeof...(Ts)> dims = {{static_cast<ck::index_t>(Xs)...}};
float tmp = dims[Dim];
ck::bf6x32_pk_t r;
ck::static_for<0, 32, 1>{}(
[&](auto i) { r.pack(ck::type_convert<ck::bf6_t>(tmp), static_cast<ck::index_t>(i)); });
return r;
}
};
template <typename T, size_t NumEffectiveDim = 2>
struct GeneratorTensor_Diagonal
{