Merge commit 'ffe9775e7071d524caa70cf13017ebe80fe6272b' into develop

This commit is contained in:
assistant-librarian[bot]
2025-09-11 20:11:33 +00:00
parent bc3e5d8191
commit ed41349e19
52 changed files with 633 additions and 122 deletions

View File

@@ -44,11 +44,27 @@ using DeviceElementwisePermuteInstance = ck::tensor_operation::device::DeviceEle
ck::Sequence<8, 8>, // InScalarPerVectorSeq
ck::Sequence<8>>; // OutScalarPerVectorSeq
int main()
int main(int argc, char* argv[])
{
bool do_verification = true;
bool time_kernel = true;
if(argc == 1)
{
// use default
}
else if(argc == 3)
{
do_verification = std::stoi(argv[1]);
time_kernel = std::stoi(argv[2]);
}
else
{
printf("arg1: verification (0=no, 1=yes)\n");
printf("arg2: time kernel (0=no, 1=yes)\n");
exit(0);
}
std::vector<std::size_t> nchw = {16, 128, 32, 64};
std::array<ck::index_t, 4> ab_lengths;
std::array<ck::index_t, 4> ab_strides = {static_cast<int>(nchw[1] * nchw[2] * nchw[3]),