mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
* Implement hiprtc for codegen tests
* Introduce gemm_softmax_gemm to codegen.
* Fix codegen build issues.
* Address PR comments.
* Separate ck_host lib and gemm_softmax_gemm into different PR.
* Fix cmake.
* Replace ENV variable with CMake option for toggling hipRTC in codegen
tests.
* Address PR comments.
* fix clang format
* Add missing header in magic_division.hpp
* - Workaround for hipRTC content wrapper
- Move descriptor for gemm_softmax_gemm to different branch
* Fix formatting.
* Revert "Fix formatting."
This reverts commit b5209eaef4.
* formatting fix
* fixed header guard issues
* updated header guards
* updated data_type for new types
* fixed redefinition error
* Add codegen test for batched_gemm_softmax_gemm.
Signed-off-by: Mirza Halilcevic <mirza.halilcevic@amd.com>
* formatting fix
---------
Signed-off-by: Mirza Halilcevic <mirza.halilcevic@amd.com>
Co-authored-by: Dino Musić <dino.music@htecgroup.com>
Co-authored-by: Mirza Halilcevic <mirza.halilcevic@htecgroup.com>
Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>
Co-authored-by: arai713 <67439843+arai713@users.noreply.github.com>
Co-authored-by: Astha Rai <astha.rai713@gmail.com>
Co-authored-by: Mirza Halilcevic <mirza.halilcevic@amd.com>
170 lines
5.2 KiB
C++
170 lines
5.2 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#pragma once
|
|
#ifndef __HIPCC_RTC__
|
|
#include <hip/hip_runtime.h>
|
|
|
|
#include "ck/ck.hpp"
|
|
#include "ck/stream_config.hpp"
|
|
#include "ck/host_utility/hip_check_error.hpp"
|
|
|
|
template <typename... Args, typename F>
|
|
float launch_and_time_kernel(const StreamConfig& stream_config,
|
|
F kernel,
|
|
dim3 grid_dim,
|
|
dim3 block_dim,
|
|
std::size_t lds_byte,
|
|
Args... args)
|
|
{
|
|
#if CK_TIME_KERNEL
|
|
if(stream_config.time_kernel_)
|
|
{
|
|
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
|
|
{
|
|
printf("%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u} \n",
|
|
__func__,
|
|
grid_dim.x,
|
|
grid_dim.y,
|
|
grid_dim.z,
|
|
block_dim.x,
|
|
block_dim.y,
|
|
block_dim.z);
|
|
|
|
printf("Warm up %d times\n", stream_config.cold_niters_);
|
|
}
|
|
// warm up
|
|
for(int i = 0; i < stream_config.cold_niters_; ++i)
|
|
{
|
|
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
|
|
hip_check_error(hipGetLastError());
|
|
}
|
|
|
|
const int nrepeat = stream_config.nrepeat_;
|
|
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
|
|
{
|
|
printf("Start running %d times...\n", nrepeat);
|
|
}
|
|
hipEvent_t start, stop;
|
|
|
|
hip_check_error(hipEventCreate(&start));
|
|
hip_check_error(hipEventCreate(&stop));
|
|
|
|
hip_check_error(hipDeviceSynchronize());
|
|
hip_check_error(hipEventRecord(start, stream_config.stream_id_));
|
|
|
|
for(int i = 0; i < nrepeat; ++i)
|
|
{
|
|
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
|
|
hip_check_error(hipGetLastError());
|
|
}
|
|
|
|
hip_check_error(hipEventRecord(stop, stream_config.stream_id_));
|
|
hip_check_error(hipEventSynchronize(stop));
|
|
|
|
float total_time = 0;
|
|
|
|
hip_check_error(hipEventElapsedTime(&total_time, start, stop));
|
|
|
|
hip_check_error(hipEventDestroy(start));
|
|
hip_check_error(hipEventDestroy(stop));
|
|
|
|
return total_time / nrepeat;
|
|
}
|
|
else
|
|
{
|
|
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
|
|
hip_check_error(hipGetLastError());
|
|
|
|
return 0;
|
|
}
|
|
#else
|
|
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
|
|
hip_check_error(hipGetLastError());
|
|
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
template <typename... Args, typename F, typename PreProcessFunc>
|
|
float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
|
|
PreProcessFunc preprocess,
|
|
F kernel,
|
|
dim3 grid_dim,
|
|
dim3 block_dim,
|
|
std::size_t lds_byte,
|
|
Args... args)
|
|
{
|
|
#if CK_TIME_KERNEL
|
|
if(stream_config.time_kernel_)
|
|
{
|
|
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
|
|
{
|
|
printf("%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u} \n",
|
|
__func__,
|
|
grid_dim.x,
|
|
grid_dim.y,
|
|
grid_dim.z,
|
|
block_dim.x,
|
|
block_dim.y,
|
|
block_dim.z);
|
|
|
|
printf("Warm up %d times\n", stream_config.cold_niters_);
|
|
}
|
|
// warm up
|
|
preprocess();
|
|
for(int i = 0; i < stream_config.cold_niters_; ++i)
|
|
{
|
|
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
|
|
hip_check_error(hipGetLastError());
|
|
}
|
|
|
|
const int nrepeat = stream_config.nrepeat_;
|
|
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
|
|
{
|
|
printf("Start running %d times...\n", nrepeat);
|
|
}
|
|
hipEvent_t start, stop;
|
|
|
|
hip_check_error(hipEventCreate(&start));
|
|
hip_check_error(hipEventCreate(&stop));
|
|
|
|
hip_check_error(hipDeviceSynchronize());
|
|
hip_check_error(hipEventRecord(start, stream_config.stream_id_));
|
|
|
|
for(int i = 0; i < nrepeat; ++i)
|
|
{
|
|
preprocess();
|
|
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
|
|
hip_check_error(hipGetLastError());
|
|
}
|
|
|
|
hip_check_error(hipEventRecord(stop, stream_config.stream_id_));
|
|
hip_check_error(hipEventSynchronize(stop));
|
|
|
|
float total_time = 0;
|
|
|
|
hip_check_error(hipEventElapsedTime(&total_time, start, stop));
|
|
|
|
hip_check_error(hipEventDestroy(start));
|
|
hip_check_error(hipEventDestroy(stop));
|
|
|
|
return total_time / nrepeat;
|
|
}
|
|
else
|
|
{
|
|
preprocess();
|
|
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
|
|
hip_check_error(hipGetLastError());
|
|
|
|
return 0;
|
|
}
|
|
#else
|
|
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
|
|
hip_check_error(hipGetLastError());
|
|
|
|
return 0;
|
|
#endif
|
|
}
|
|
#endif
|