mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-24 14:54:47 +00:00
Merge commit 'e77a7ca2bc65651b5e87a0127e0335733aca2f35' into develop
This commit is contained in:
@@ -286,8 +286,25 @@ struct ThreadwiseTensorSliceTransfer_v7r3
|
||||
|
||||
static_for<0, nDst, 1>{}([&](auto i) {
|
||||
using elm_vector_t = typename remove_cvref_t<decltype(elm_vectors[i])>::type;
|
||||
elm_vectors(i).template AsType<elm_vector_t>()(I0) =
|
||||
oob_val ? elm_vectors(i).template AsType<elm_vector_t>()[I0] : elm_vector_t{0};
|
||||
using scalar_t =
|
||||
remove_cvref_t<decltype(elm_vectors(i).template AsType<elm_vector_t>()[I0])>;
|
||||
|
||||
// This is a bit ugly but necessary to be able to compile f8 instances for grouped
|
||||
// convolution forward. For some reason for that specific type there is an ambiguity
|
||||
// in the type resolution for the ternary expression. I added an explicit cast to
|
||||
// disambiguate and only use it for f8 just in case it affects performance.
|
||||
if constexpr(std::is_same_v<scalar_t, ck::f8_ocp_t>)
|
||||
{
|
||||
elm_vectors(i).template AsType<elm_vector_t>()(I0) =
|
||||
oob_val ? elm_vector_t{elm_vectors(i).template AsType<elm_vector_t>()[I0]}
|
||||
: elm_vector_t{0};
|
||||
}
|
||||
else
|
||||
{
|
||||
elm_vectors(i).template AsType<elm_vector_t>()(I0) =
|
||||
oob_val ? elm_vectors(i).template AsType<elm_vector_t>()[I0]
|
||||
: elm_vector_t{0};
|
||||
}
|
||||
});
|
||||
|
||||
elm_vectors_tuple_(thread_scratch_id)(iAccess) = elm_vectors;
|
||||
|
||||
Reference in New Issue
Block a user