mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-26 08:00:13 +00:00
Transpose 3d (#984)
* added working example for 5D input using 1D kernel
* example with 5D input tensor and 2d kernel - not working: issues with arguments
* added updated version of 3d device op - changed descriptors/dims
* added example file to check kernel
* fixed descriptor and isSupportedArgument stride problem
* added and modified kernel for 3d - updated tids/loop
* adding some more 5d example files
* fixed some issues
* changes made for testing
* working version: fixed error in stride for A, still a bit inefficient
* cleaned up formatting/comments
* updating formatting
* more formatting fixes
* fixing cmake, adding back gpu targets in cmake script
* adding client example
* added instances for client example
* fixed errors in client example
* implemented client ex with device_elementwise.hpp and device_elementwise_3d_impl.hpp
* removed extra files
* minor formatting and naming fixes
* adding test files and profiler
* fixing minor error
* minor fix
* removed unneccesary comments, renamed files
* updated instance list for client example, added different layout example
* removing instances
* fixed error in instance generation
* remove comments
* update profiler and client example tensor layouts
* fixed errors in test/profiler
* updated vector dim access to enable vector load
* updated test/profiler files
* updated example with 1d kernel
* updating profiler
* renamed files
---------
Co-authored-by: Jing Zhang <jizha@amd.com>
[ROCm/composable_kernel commit: 3af8c81a72]
This commit is contained in:
@@ -148,6 +148,7 @@ add_subdirectory(pool)
|
||||
add_subdirectory(batched_gemm_multi_d)
|
||||
add_subdirectory(grouped_convnd_bwd_data)
|
||||
add_subdirectory(conv_tensor_rearrange)
|
||||
add_subdirectory(transpose)
|
||||
if(GPU_TARGETS MATCHES "gfx11")
|
||||
add_subdirectory(wmma_op)
|
||||
endif()
|
||||
|
||||
9
test/transpose/CMakeLists.txt
Normal file
9
test/transpose/CMakeLists.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
|
||||
set(target 0)
|
||||
foreach(gpu IN LISTS GPU_TARGETS)
|
||||
if(gpu IN_LIST gpu_list AND target EQUAL 0)
|
||||
add_gtest_executable(test_transpose test_transpose.cpp)
|
||||
target_link_libraries(test_transpose PRIVATE utility device_transpose_instance)
|
||||
set(target 1)
|
||||
endif()
|
||||
endforeach()
|
||||
27
test/transpose/test_transpose.cpp
Normal file
27
test/transpose/test_transpose.cpp
Normal file
@@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <tuple>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "test_transpose_util.hpp"
|
||||
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
|
||||
template <typename Tuple>
|
||||
class TestTranspose : public ::testing::Test
|
||||
{
|
||||
};
|
||||
|
||||
// clang-format off
|
||||
using KernelTypes = ::testing::Types<
|
||||
std::tuple< F16, F16>,
|
||||
std::tuple< F32, F32>
|
||||
>;
|
||||
// clang-format on
|
||||
|
||||
TYPED_TEST_SUITE(TestTranspose, KernelTypes);
|
||||
|
||||
//#include "test_transpose_ut_cases.inc"
|
||||
30
test/transpose/test_transpose_ut_cases.inc
Normal file
30
test/transpose/test_transpose_ut_cases.inc
Normal file
@@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
TYPED_TEST(TestTranspose, Test1)
|
||||
{
|
||||
// for 16, 8, 16, 32, 8
|
||||
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
|
||||
std::vector<index_t> lengths{16, 8, 16, 32, 8};
|
||||
/**constexpr int N = 16;
|
||||
constexpr int C = 8;
|
||||
constexpr int D = 16;
|
||||
constexpr int H = 32;
|
||||
constexpr int W = 8;**/
|
||||
|
||||
this->Run();
|
||||
}
|
||||
|
||||
|
||||
TYPED_TEST(TestTranpose, Test2)
|
||||
{
|
||||
std::vector<int> Ms{127, 255, 312, 799, 1573};
|
||||
std::vector<index_t> lengths{16, 8, 16, 32, 16};
|
||||
/**constexpr int N = 16;
|
||||
constexpr int C = 8;
|
||||
constexpr int D = 16;
|
||||
constexpr int H = 32;
|
||||
constexpr int W = 8;**/
|
||||
|
||||
this->Run();
|
||||
}
|
||||
|
||||
54
test/transpose/test_transpose_util.hpp
Normal file
54
test/transpose/test_transpose_util.hpp
Normal file
@@ -0,0 +1,54 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "include/ck/utility/data_type.hpp"
|
||||
#include "profiler/profile_transpose_impl.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace test {
|
||||
|
||||
template <typename Tuple>
|
||||
class TestTranspose : public testing::Test
|
||||
{
|
||||
using F32 = float;
|
||||
|
||||
protected:
|
||||
using ADataType = std::tuple_element_t<0, Tuple>;
|
||||
using BDataType = std::tuple_element_t<1, Tuple>;
|
||||
|
||||
public:
|
||||
static constexpr bool verify_ = true;
|
||||
static constexpr int init_method_ = 1; // decimal value initialization
|
||||
static constexpr bool log_ = false;
|
||||
static constexpr bool bench_ = false; // measure kernel performance
|
||||
std::vector<std::vector<index_t>> lengths_ = {{16, 32, 16, 32, 16}, {16, 8, 16, 32, 8}};
|
||||
|
||||
void Run()
|
||||
{
|
||||
for(auto length : this->lengths_)
|
||||
{
|
||||
this->RunSingle(length);
|
||||
}
|
||||
}
|
||||
|
||||
void RunSingle()
|
||||
{
|
||||
bool pass = ck::profiler::profile_transpose_impl<ADataType, BDataType, 5>(
|
||||
verify_, init_method_, log_, bench_, lengths_);
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace test
|
||||
} // namespace ck
|
||||
Reference in New Issue
Block a user