mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 01:36:06 +00:00
Allow distinct K0/K1 values for A/B block descriptor (#98)
* add gitignore * host tensor: allow generating sequentially increasing value in a given dimension * gridwise gemm v3r1: allow distinct K0/K1 values for A/B block descriptor - remove dangling header include - modify example gemm_xdl accordingly - infer KPack value from M/NPerXdl - device conv2d fwd: update parameters accordingly for the underlying gridwise gemm v3r1 (API for conv2d fwd stays the same for now until we decide to expose individual K0s for activation and weight) * add LDS data dump utility * profiler: reflect API change for distinct K0/K1 for A/B matrices * profiler: add conflict-free LDS write FP16 kernel instances * fix accidental perf regression * address feedback; cosmetic changes * clang-format for new files * format Co-authored-by: Chao Liu <chao.liu2@amd.com>
This commit is contained in:
@@ -154,4 +154,15 @@ struct GeneratorTensor_Checkboard
|
||||
}
|
||||
};
|
||||
|
||||
template <ck::index_t Dim>
|
||||
struct GeneratorTensor_Sequential
|
||||
{
|
||||
template <typename... Ts>
|
||||
float operator()(Ts... Xs) const
|
||||
{
|
||||
std::array<ck::index_t, sizeof...(Ts)> dims = {{static_cast<ck::index_t>(Xs)...}};
|
||||
return dims[Dim];
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user