mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 08:50:17 +00:00
update build
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
cmake_minimum_required(VERSION 2.8.3)
|
||||
#cmake_minimum_required(VERSION 2.8.3)
|
||||
cmake_minimum_required(VERSION 3.9)
|
||||
project(modular_convolution)
|
||||
|
||||
#c++
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
set(DRIVER_SOURCE driver.cpp)
|
||||
if(DEVICE_BACKEND STREQUAL "HIP")
|
||||
set(DRIVER_SOURCE driver.hip.cpp)
|
||||
elseif(DEVICE_BACKEND STREQUAL "CUDA")
|
||||
set(DRIVER_SOURCE driver.cu)
|
||||
endif()
|
||||
|
||||
add_executable(driver ${DRIVER_SOURCE})
|
||||
target_link_libraries(driver PRIVATE tensor)
|
||||
|
||||
1
driver/driver.cu
Symbolic link
1
driver/driver.cu
Symbolic link
@@ -0,0 +1 @@
|
||||
driver.hip.cpp
|
||||
@@ -50,7 +50,7 @@ struct GeneratorTensor_3
|
||||
std::initializer_list<std::size_t> ids = {static_cast<std::size_t>(is)...};
|
||||
std::vector<std::size_t> lens(sizeof...(Is), 100);
|
||||
std::vector<std::size_t> strides(sizeof...(Is), 1);
|
||||
std::partial_sum(lens.rbegin(), lens.rbegin() + (sizeof...(Is)-1), strides.rbegin() + 1);
|
||||
std::partial_sum(lens.rbegin(), lens.rbegin() + (sizeof...(Is) - 1), strides.rbegin() + 1);
|
||||
return std::inner_product(ids.begin(), ids.end(), strides.begin(), std::size_t(0)) + 1;
|
||||
#endif
|
||||
}
|
||||
@@ -340,7 +340,7 @@ void host_winograd_3x3_convolution(
|
||||
std::size_t ho = OutTileSizeH * y + j;
|
||||
for(int i = 0; i < OutTileSizeW; ++i)
|
||||
{
|
||||
std::size_t wo = OutTileSizeW * x + i;
|
||||
std::size_t wo = OutTileSizeW * x + i;
|
||||
out(n, k, ho, wo) = out_hold(n, k, y, x, j, i);
|
||||
}
|
||||
}
|
||||
@@ -393,13 +393,13 @@ int main()
|
||||
constexpr unsigned WPad = 0;
|
||||
#elif 0
|
||||
// 3x3, 34x34
|
||||
constexpr unsigned N = 64;
|
||||
constexpr unsigned C = 256;
|
||||
constexpr unsigned N = 64;
|
||||
constexpr unsigned C = 256;
|
||||
constexpr unsigned HI = 34;
|
||||
constexpr unsigned WI = 34;
|
||||
constexpr unsigned K = 64;
|
||||
constexpr unsigned S = 3;
|
||||
constexpr unsigned R = 3;
|
||||
constexpr unsigned K = 64;
|
||||
constexpr unsigned S = 3;
|
||||
constexpr unsigned R = 3;
|
||||
|
||||
constexpr unsigned HPad = 0;
|
||||
constexpr unsigned WPad = 0;
|
||||
@@ -11,7 +11,7 @@ set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
|
||||
if(DEVICE_BACKEND STREQUAL "CUDA")
|
||||
target_link_libraries(device nvToolsExt cudart)
|
||||
target_link_libraries(tensor nvToolsExt cudart)
|
||||
endif()
|
||||
|
||||
install(TARGETS tensor LIBRARY DESTINATION lib)
|
||||
|
||||
@@ -43,7 +43,7 @@ float launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, Args... args)
|
||||
hipGetErrorString(hipGetLastError());
|
||||
#elif DEVICE_BACKEND_CUDA
|
||||
const void* f = reinterpret_cast<const void*>(kernel);
|
||||
void* p_args = {&args...};
|
||||
void* p_args[] = {&args...};
|
||||
|
||||
timer.Start();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user