mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-21 21:39:15 +00:00
Add host lib (#1134)
* Format
* Format
* Format
* Remove const
* Use the right template
* Format
* Format
* add row/col instances
* Add missing file
* fixed
* Format
* Updates
* Format
* fixed rrr layout
* Format
* Update test and embed modules
* Restore older version
* Update year
* Set -fPIC
* Format
* Use double for isnan
* rename host folder to codegen + minor fix
* add codegen CI test
* add option to build components without building CK
* fix the groovy syntax
* fix typo
* use the correct function for the codegen stage
---------
Co-authored-by: Jing Zhang <jizha@amd.com>
Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
Co-authored-by: illsilin <Illia.Silin@amd.com>
[ROCm/composable_kernel commit: 8eff4d62b6]
This commit is contained in:
95
codegen/test/rtc/src/compile_kernel.cpp
Normal file
95
codegen/test/rtc/src/compile_kernel.cpp
Normal file
@@ -0,0 +1,95 @@
|
||||
#include "rtc/hip.hpp"
|
||||
#include <rtc/compile_kernel.hpp>
|
||||
#include <rtc/tmp_dir.hpp>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cassert>
|
||||
|
||||
namespace rtc {
|
||||
|
||||
template <class T>
|
||||
T generic_read_file(const std::string& filename, size_t offset = 0, size_t nbytes = 0)
|
||||
{
|
||||
std::ifstream is(filename, std::ios::binary | std::ios::ate);
|
||||
if(nbytes == 0)
|
||||
{
|
||||
// if there is a non-zero offset and nbytes is not set,
|
||||
// calculate size of remaining bytes to read
|
||||
nbytes = is.tellg();
|
||||
if(offset > nbytes)
|
||||
throw std::runtime_error("offset is larger than file size");
|
||||
nbytes -= offset;
|
||||
}
|
||||
if(nbytes < 1)
|
||||
throw std::runtime_error("Invalid size for: " + filename);
|
||||
is.seekg(offset, std::ios::beg);
|
||||
|
||||
T buffer(nbytes, 0);
|
||||
if(not is.read(&buffer[0], nbytes))
|
||||
throw std::runtime_error("Error reading file: " + filename);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
std::vector<char> read_buffer(const std::string& filename, size_t offset = 0, size_t nbytes = 0)
|
||||
{
|
||||
return generic_read_file<std::vector<char>>(filename, offset, nbytes);
|
||||
}
|
||||
|
||||
std::string read_string(const std::string& filename)
|
||||
{
|
||||
return generic_read_file<std::string>(filename);
|
||||
}
|
||||
|
||||
void write_buffer(const std::string& filename, const char* buffer, std::size_t size)
|
||||
{
|
||||
std::ofstream os(filename);
|
||||
os.write(buffer, size);
|
||||
}
|
||||
void write_buffer(const std::string& filename, const std::vector<char>& buffer)
|
||||
{
|
||||
write_buffer(filename, buffer.data(), buffer.size());
|
||||
}
|
||||
void write_string(const std::string& filename, const std::string_view& buffer)
|
||||
{
|
||||
write_buffer(filename, buffer.data(), buffer.size());
|
||||
}
|
||||
|
||||
std::string compiler() { return "/opt/rocm/llvm/bin/clang++ -x hip --cuda-device-only"; }
|
||||
|
||||
kernel compile_kernel(const std::vector<src_file>& srcs, compile_options options)
|
||||
{
|
||||
assert(not srcs.empty());
|
||||
tmp_dir td{"compile"};
|
||||
options.flags += " -I. -O3";
|
||||
options.flags += " -std=c++17";
|
||||
options.flags += " --offload-arch=" + get_device_name();
|
||||
std::string out;
|
||||
|
||||
for(const auto& src : srcs)
|
||||
{
|
||||
std::filesystem::path full_path = td.path / src.path;
|
||||
std::filesystem::path parent_path = full_path.parent_path();
|
||||
std::filesystem::create_directories(parent_path);
|
||||
write_string(full_path.string(), src.content);
|
||||
if(src.path.extension().string() == ".cpp")
|
||||
{
|
||||
options.flags += " -c " + src.path.filename().string();
|
||||
if(out.empty())
|
||||
out = src.path.stem().string() + ".o";
|
||||
}
|
||||
}
|
||||
|
||||
options.flags += " -o " + out;
|
||||
td.execute(compiler() + options.flags);
|
||||
|
||||
auto out_path = td.path / out;
|
||||
if(not std::filesystem::exists(out_path))
|
||||
throw std::runtime_error("Output file missing: " + out);
|
||||
|
||||
auto obj = read_buffer(out_path.string());
|
||||
|
||||
return kernel{obj.data(), options.kernel_name};
|
||||
}
|
||||
|
||||
} // namespace rtc
|
||||
102
codegen/test/rtc/src/hip.cpp
Normal file
102
codegen/test/rtc/src/hip.cpp
Normal file
@@ -0,0 +1,102 @@
|
||||
#include <rtc/hip.hpp>
|
||||
#include <rtc/manage_ptr.hpp>
|
||||
#include <stdexcept>
|
||||
#include <cassert>
|
||||
|
||||
namespace rtc {
|
||||
|
||||
using hip_ptr = RTC_MANAGE_PTR(void, hipFree);
|
||||
|
||||
std::string hip_error(int error) { return hipGetErrorString(static_cast<hipError_t>(error)); }
|
||||
|
||||
int get_device_id()
|
||||
{
|
||||
int device;
|
||||
auto status = hipGetDevice(&device);
|
||||
if(status != hipSuccess)
|
||||
throw std::runtime_error("No device");
|
||||
return device;
|
||||
}
|
||||
|
||||
std::string get_device_name()
|
||||
{
|
||||
hipDeviceProp_t props{};
|
||||
auto status = hipGetDeviceProperties(&props, get_device_id());
|
||||
if(status != hipSuccess)
|
||||
throw std::runtime_error("Failed to get device properties");
|
||||
return props.gcnArchName;
|
||||
}
|
||||
|
||||
bool is_device_ptr(const void* ptr)
|
||||
{
|
||||
hipPointerAttribute_t attr;
|
||||
auto status = hipPointerGetAttributes(&attr, ptr);
|
||||
if(status != hipSuccess)
|
||||
return false;
|
||||
return attr.type == hipMemoryTypeDevice;
|
||||
}
|
||||
|
||||
void gpu_sync()
|
||||
{
|
||||
auto status = hipDeviceSynchronize();
|
||||
if(status != hipSuccess)
|
||||
throw std::runtime_error("hip device synchronization failed: " + hip_error(status));
|
||||
}
|
||||
|
||||
std::size_t get_available_gpu_memory()
|
||||
{
|
||||
size_t free;
|
||||
size_t total;
|
||||
auto status = hipMemGetInfo(&free, &total);
|
||||
if(status != hipSuccess)
|
||||
throw std::runtime_error("Failed getting available memory: " + hip_error(status));
|
||||
return free;
|
||||
}
|
||||
|
||||
std::shared_ptr<void> allocate_gpu(std::size_t sz, bool host)
|
||||
{
|
||||
if(sz > get_available_gpu_memory())
|
||||
throw std::runtime_error("Memory not available to allocate buffer: " + std::to_string(sz));
|
||||
void* alloc_ptr = nullptr;
|
||||
auto status = host ? hipHostMalloc(&alloc_ptr, sz) : hipMalloc(&alloc_ptr, sz);
|
||||
if(status != hipSuccess)
|
||||
{
|
||||
if(host)
|
||||
throw std::runtime_error("Gpu allocation failed: " + hip_error(status));
|
||||
else
|
||||
return allocate_gpu(sz, true);
|
||||
}
|
||||
assert(alloc_ptr != nullptr);
|
||||
std::shared_ptr<void> result = share(hip_ptr{alloc_ptr});
|
||||
return result;
|
||||
}
|
||||
|
||||
std::shared_ptr<void> write_to_gpu(const void* x, std::size_t sz, bool host)
|
||||
{
|
||||
gpu_sync();
|
||||
auto result = allocate_gpu(sz, host);
|
||||
assert(is_device_ptr(result.get()));
|
||||
assert(not is_device_ptr(x));
|
||||
auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice);
|
||||
if(status != hipSuccess)
|
||||
throw std::runtime_error("Copy to gpu failed: " + hip_error(status));
|
||||
return result;
|
||||
}
|
||||
|
||||
std::shared_ptr<void> read_from_gpu(const void* x, std::size_t sz)
|
||||
{
|
||||
gpu_sync();
|
||||
std::shared_ptr<char> result(new char[sz]);
|
||||
assert(not is_device_ptr(result.get()));
|
||||
if(not is_device_ptr(x))
|
||||
{
|
||||
throw std::runtime_error(
|
||||
"read_from_gpu() requires Src buffer to be on the GPU, Copy from gpu failed\n");
|
||||
}
|
||||
auto status = hipMemcpy(result.get(), x, sz, hipMemcpyDeviceToHost);
|
||||
if(status != hipSuccess)
|
||||
throw std::runtime_error("Copy from gpu failed: " + hip_error(status)); // NOLINT
|
||||
return std::static_pointer_cast<void>(result);
|
||||
}
|
||||
|
||||
} // namespace rtc
|
||||
121
codegen/test/rtc/src/kernel.cpp
Normal file
121
codegen/test/rtc/src/kernel.cpp
Normal file
@@ -0,0 +1,121 @@
|
||||
#include <rtc/kernel.hpp>
|
||||
#include <rtc/manage_ptr.hpp>
|
||||
#include <rtc/hip.hpp>
|
||||
#include <cassert>
|
||||
|
||||
// extern declare the function since hip/hip_ext.h header is broken
|
||||
extern hipError_t hipExtModuleLaunchKernel(hipFunction_t, // NOLINT
|
||||
uint32_t,
|
||||
uint32_t,
|
||||
uint32_t,
|
||||
uint32_t,
|
||||
uint32_t,
|
||||
uint32_t,
|
||||
size_t,
|
||||
hipStream_t,
|
||||
void**,
|
||||
void**,
|
||||
hipEvent_t = nullptr,
|
||||
hipEvent_t = nullptr,
|
||||
uint32_t = 0);
|
||||
|
||||
namespace rtc {
|
||||
|
||||
std::vector<char> pack_args(const std::vector<kernel_argument>& args)
|
||||
{
|
||||
std::vector<char> kernargs;
|
||||
for(auto&& arg : args)
|
||||
{
|
||||
std::size_t n = arg.size;
|
||||
const auto* p = static_cast<const char*>(arg.data);
|
||||
// Insert padding
|
||||
std::size_t padding = (arg.align - (kernargs.size() % arg.align)) % arg.align;
|
||||
kernargs.insert(kernargs.end(), padding, 0);
|
||||
kernargs.insert(kernargs.end(), p, p + n);
|
||||
}
|
||||
return kernargs;
|
||||
}
|
||||
|
||||
using hip_module_ptr = RTC_MANAGE_PTR(hipModule_t, hipModuleUnload);
|
||||
|
||||
struct kernel_impl
|
||||
{
|
||||
hip_module_ptr module = nullptr;
|
||||
hipFunction_t fun = nullptr;
|
||||
};
|
||||
|
||||
hip_module_ptr load_module(const char* image)
|
||||
{
|
||||
hipModule_t raw_m;
|
||||
auto status = hipModuleLoadData(&raw_m, image);
|
||||
hip_module_ptr m{raw_m};
|
||||
if(status != hipSuccess)
|
||||
throw std::runtime_error("Failed to load module: " + hip_error(status));
|
||||
return m;
|
||||
}
|
||||
|
||||
kernel::kernel(const char* image, const std::string& name) : impl(std::make_shared<kernel_impl>())
|
||||
{
|
||||
impl->module = load_module(image);
|
||||
auto status = hipModuleGetFunction(&impl->fun, impl->module.get(), name.c_str());
|
||||
if(hipSuccess != status)
|
||||
throw std::runtime_error("Failed to get function: " + name + ": " + hip_error(status));
|
||||
}
|
||||
|
||||
void launch_kernel(hipFunction_t fun,
|
||||
hipStream_t stream,
|
||||
std::size_t global,
|
||||
std::size_t local,
|
||||
void* kernargs,
|
||||
std::size_t size)
|
||||
{
|
||||
assert(global > 0);
|
||||
assert(local > 0);
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER,
|
||||
kernargs,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE,
|
||||
&size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
|
||||
auto status = hipExtModuleLaunchKernel(fun,
|
||||
global,
|
||||
1,
|
||||
1,
|
||||
local,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
stream,
|
||||
nullptr,
|
||||
reinterpret_cast<void**>(&config),
|
||||
nullptr,
|
||||
nullptr);
|
||||
if(status != hipSuccess)
|
||||
throw std::runtime_error("Failed to launch kernel: " + hip_error(status));
|
||||
}
|
||||
|
||||
void kernel::launch(hipStream_t stream,
|
||||
std::size_t global,
|
||||
std::size_t local,
|
||||
std::vector<void*> args) const
|
||||
{
|
||||
assert(impl != nullptr);
|
||||
void* kernargs = args.data();
|
||||
std::size_t size = args.size() * sizeof(void*);
|
||||
|
||||
launch_kernel(impl->fun, stream, global, local, kernargs, size);
|
||||
}
|
||||
|
||||
void kernel::launch(hipStream_t stream,
|
||||
std::size_t global,
|
||||
std::size_t local,
|
||||
const std::vector<kernel_argument>& args) const
|
||||
{
|
||||
assert(impl != nullptr);
|
||||
std::vector<char> kernargs = pack_args(args);
|
||||
std::size_t size = kernargs.size();
|
||||
|
||||
launch_kernel(impl->fun, stream, global, local, kernargs.data(), size);
|
||||
}
|
||||
|
||||
} // namespace rtc
|
||||
48
codegen/test/rtc/src/tmp_dir.cpp
Normal file
48
codegen/test/rtc/src/tmp_dir.cpp
Normal file
@@ -0,0 +1,48 @@
|
||||
#include <rtc/tmp_dir.hpp>
|
||||
#include <algorithm>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace rtc {
|
||||
std::string random_string(std::string::size_type length)
|
||||
{
|
||||
static const std::string& chars = "0123456789"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
|
||||
std::mt19937 rg{std::random_device{}()};
|
||||
std::uniform_int_distribution<std::string::size_type> pick(0, chars.length() - 1);
|
||||
|
||||
std::string str(length, 0);
|
||||
std::generate(str.begin(), str.end(), [&] { return chars[pick(rg)]; });
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
std::string unique_string(const std::string& prefix)
|
||||
{
|
||||
auto pid = getpid();
|
||||
auto tid = std::this_thread::get_id();
|
||||
auto clk = std::chrono::steady_clock::now().time_since_epoch().count();
|
||||
std::stringstream ss;
|
||||
ss << std::hex << prefix << "-" << pid << "-" << tid << "-" << clk << "-" << random_string(16);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
tmp_dir::tmp_dir(const std::string& prefix)
|
||||
: path(std::filesystem::temp_directory_path() /
|
||||
unique_string(prefix.empty() ? "ck-rtc" : "ck-rtc-" + prefix))
|
||||
{
|
||||
std::filesystem::create_directories(this->path);
|
||||
}
|
||||
|
||||
void tmp_dir::execute(const std::string& cmd) const
|
||||
{
|
||||
std::string s = "cd " + path.string() + "; " + cmd;
|
||||
std::system(s.c_str());
|
||||
}
|
||||
|
||||
tmp_dir::~tmp_dir() { std::filesystem::remove_all(this->path); }
|
||||
|
||||
} // namespace rtc
|
||||
Reference in New Issue
Block a user