mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-06 15:54:31 +00:00
Introduce gemm_softmax_gemm to codegen.
This commit is contained in:
@@ -15,7 +15,8 @@ std::vector<rtc::src_file> get_headers_for_test()
|
||||
auto hs = ck::host::GetHeaders();
|
||||
std::transform(
|
||||
hs.begin(), hs.end(), std::back_inserter(result), [&](const auto& p) -> rtc::src_file {
|
||||
return {p.first, p.second};
|
||||
std::string sec(p.second.begin(), p.second.end());
|
||||
return {p.first, sec};
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
#include "ck/host/device_gemm_multiple_d/problem.hpp"
|
||||
#include "ck/host/device_gemm_multiple_d/operation.hpp"
|
||||
#include "ck/host/device_batched_gemm_softmax_gemm/problem.hpp"
|
||||
#include "ck/host/device_batched_gemm_softmax_gemm/operation.hpp"
|
||||
#include "ck/host/headers.hpp"
|
||||
#include "ck/host/stringutils.hpp"
|
||||
#include "ck/host/utils.hpp"
|
||||
@@ -15,13 +17,59 @@
|
||||
using half = _Float16;
|
||||
// using half = __fp16;
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
const char* const disable_warning_pragma = R"__migraphx__(
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Weverything"
|
||||
${content}
|
||||
#pragma clang diagnostic pop
|
||||
)__migraphx__";
|
||||
|
||||
template <class P>
|
||||
std::string ck_disable_warnings(P p)
|
||||
{
|
||||
return ck::host::InterpolateString(disable_warning_pragma,
|
||||
{{"content", std::string{p.data(), p.size()}}});
|
||||
}
|
||||
|
||||
static std::unordered_map<std::string, std::string> create_ck_header_strings()
|
||||
{
|
||||
std::unordered_map<std::string, std::string> result;
|
||||
auto ck_headers = ck::host::GetHeaders();
|
||||
|
||||
std::transform(
|
||||
ck_headers.begin(), ck_headers.end(), std::inserter(result, result.begin()), [&](auto& p) {
|
||||
return std::pair<std::string, std::string>(p.first, ck_disable_warnings(p.second));
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::vector<rtc::src_file> create_ck_headers()
|
||||
{
|
||||
static const auto& header_strings = create_ck_header_strings();
|
||||
std::vector<rtc::src_file> srcs;
|
||||
std::transform(
|
||||
header_strings.begin(), header_strings.end(), std::back_inserter(srcs), [&](auto& p) -> rtc::src_file {
|
||||
std::string sec(p.second.begin(), p.second.end());
|
||||
return {p.first, sec};
|
||||
});
|
||||
return srcs;
|
||||
}
|
||||
|
||||
static inline const std::vector<rtc::src_file>& ck_headers()
|
||||
{
|
||||
static const auto& headers = create_ck_headers();
|
||||
return headers;
|
||||
}
|
||||
|
||||
std::vector<rtc::src_file> get_headers_for_test()
|
||||
{
|
||||
std::vector<rtc::src_file> result;
|
||||
auto hs = ck::host::GetHeaders();
|
||||
std::transform(
|
||||
hs.begin(), hs.end(), std::back_inserter(result), [&](const auto& p) -> rtc::src_file {
|
||||
return {p.first, p.second};
|
||||
std::string sec(p.second.begin(), p.second.end());
|
||||
return {p.first, sec};
|
||||
});
|
||||
return result;
|
||||
}
|
||||
@@ -130,10 +178,13 @@ const std::string gemm_compile_check = R"__ck__(
|
||||
|
||||
extern "C" __global__ void f(const ck::half_t* a, const ck::half_t* b, ck::half_t* c) {
|
||||
using G = ${template};
|
||||
constexpr auto desc = ${template}::make_descriptor(ck::make_naive_tensor_descriptor_packed(ck::make_tuple(${m}, ${k})),
|
||||
ck::make_naive_tensor_descriptor(ck::make_tuple(${n}, ${k}), ck::make_tuple(1, ${n})),
|
||||
ck::make_tuple(),
|
||||
ck::make_naive_tensor_descriptor_packed(ck::make_tuple(${m}, ${n})));
|
||||
constexpr auto desc =
|
||||
G::make_descriptor(ck::make_naive_tensor_descriptor_packed(ck::make_tuple(${m},
|
||||
${k})),
|
||||
ck::make_naive_tensor_descriptor(ck::make_tuple(${n},
|
||||
${k}), ck::make_tuple(1, ${n})), ck::make_tuple(),
|
||||
ck::make_naive_tensor_descriptor_packed(ck::make_tuple(${m},
|
||||
${n})));
|
||||
|
||||
static_assert(desc.IsValid(), "Invalid ck gemm.");
|
||||
|
||||
@@ -163,23 +214,32 @@ TEST_CASE(test_problem_kernel)
|
||||
std::string epilogue = "";
|
||||
std::string prologue = "";
|
||||
|
||||
for(auto solution : prob.GetSolutions("gfx90a", prologue, epilogue))
|
||||
auto solutions = prob.GetSolutions("gfx90a", prologue, epilogue);
|
||||
std::cout << "Num solutions: " << solutions.size() << std::endl;
|
||||
for(auto i = 0; i < solutions.size(); ++i)
|
||||
{
|
||||
auto src = ck::host::InterpolateString(gemm_compile_check,
|
||||
{{"include", prob.GetIncludeHeader()},
|
||||
{"template", solution.ToTemplateString()},
|
||||
{"m", std::to_string(prob.M)},
|
||||
{"n", std::to_string(prob.N)},
|
||||
{"k", std::to_string(prob.K)}});
|
||||
auto srcs = get_headers_for_test();
|
||||
srcs.push_back({"main.cpp", src});
|
||||
rtc::compile_options options;
|
||||
std::cout << "Testing solution " << std::to_string(i + 1) << std::endl;
|
||||
auto&& solution = solutions[i];
|
||||
auto src = ck::host::InterpolateString(gemm_compile_check,
|
||||
{{"include", prob.GetIncludeHeader()},
|
||||
{"template", solution.ToTemplateString()},
|
||||
{"m", std::to_string(prob.M)},
|
||||
{"n", std::to_string(prob.N)},
|
||||
{"k", std::to_string(prob.K)}});
|
||||
// auto srcs = get_headers_for_test();
|
||||
// srcs.push_back({"main.cpp", src});
|
||||
// rtc::compile_options options;
|
||||
// options.kernel_name = "f";
|
||||
rtc::hip_compile_options options;
|
||||
options.kernel_name = "f";
|
||||
auto k = rtc::compile_kernel(srcs, options);
|
||||
auto block_size = solution.GetTemplateParameter<std::size_t>("BlockSize");
|
||||
auto m_per_block = solution.GetTemplateParameter<std::size_t>("MPerBlock");
|
||||
auto n_per_block = solution.GetTemplateParameter<std::size_t>("NPerBlock");
|
||||
auto grid_size = ck::host::integer_divide_ceil(prob.M, m_per_block) *
|
||||
options.additional_src_files = ck_headers();
|
||||
// auto k = rtc::compile_kernel(srcs, options);
|
||||
std::cout << src << std::endl;
|
||||
auto k = rtc::compile_hip_code_object(src, options);
|
||||
auto block_size = solution.GetTemplateParameter<std::size_t>("BlockSize");
|
||||
auto m_per_block = solution.GetTemplateParameter<std::size_t>("MPerBlock");
|
||||
auto n_per_block = solution.GetTemplateParameter<std::size_t>("NPerBlock");
|
||||
auto grid_size = ck::host::integer_divide_ceil(prob.M, m_per_block) *
|
||||
ck::host::integer_divide_ceil(prob.N, n_per_block);
|
||||
k.launch(nullptr, grid_size * block_size, block_size)(a.data(), b.data(), c.data());
|
||||
|
||||
@@ -187,4 +247,34 @@ TEST_CASE(test_problem_kernel)
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(test_gemm_softmax_gemm)
|
||||
{
|
||||
ck::host::device_batched_gemm_softmax_gemm::Problem prob;
|
||||
prob.TransA = false;
|
||||
prob.TransB = true;
|
||||
prob.TransB1 = false;
|
||||
prob.TransC = false;
|
||||
prob.M = 1024;
|
||||
prob.N = 1024;
|
||||
prob.K = 1024;
|
||||
prob.O = 1024;
|
||||
check_all<half> check;
|
||||
auto a = to_gpu(generate_buffer<half>(1024 * 1024, 0));
|
||||
auto b = to_gpu(generate_buffer<half>(1024 * 1024, 1));
|
||||
auto b1 = to_gpu(generate_buffer<half>(1024 * 1024, 2));
|
||||
auto c = to_gpu(generate_buffer<half>(1024 * 1024, 3));
|
||||
|
||||
std::string epilogue = "";
|
||||
std::string prologue = "";
|
||||
|
||||
auto solutions = prob.GetSolutions("gfx90a", prologue, epilogue);
|
||||
std::cout << "Num solutions: " << solutions.size() << std::endl;
|
||||
|
||||
for(auto i = 0; i < solutions.size(); ++i) {
|
||||
std::cout << "Solution " << i << std::endl;
|
||||
std::cout << solutions[i].ToTemplateString() << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, const char* argv[]) { test::run(argc, argv); }
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <rtc/kernel.hpp>
|
||||
#include <ck/filesystem.hpp>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
|
||||
namespace rtc {
|
||||
|
||||
@@ -19,9 +20,36 @@ struct compile_options
|
||||
std::string kernel_name = "main";
|
||||
};
|
||||
|
||||
struct hip_compile_options
|
||||
{
|
||||
std::size_t global;
|
||||
std::size_t local;
|
||||
std::string kernel_name = "kernel";
|
||||
std::string params = "";
|
||||
std::vector<src_file> additional_src_files = {};
|
||||
|
||||
/**
|
||||
* @brief Set the launch parameters but allow v to override the values
|
||||
*
|
||||
* @param v A value class which can have a "global" and/or "local" keys to override the default
|
||||
* global and local
|
||||
* @param compute_global A function used to compute the global based on the local
|
||||
* @param default_local The defaul local to use if its missing from the v parameter
|
||||
*/
|
||||
void set_launch_params(const std::function<std::size_t(std::size_t local)>& compute_global,
|
||||
std::size_t default_local = 1024);
|
||||
|
||||
void set_launch_params(std::size_t default_global, std::size_t default_local = 1024)
|
||||
{
|
||||
set_launch_params([=](auto) { return default_global; }, default_local);
|
||||
}
|
||||
};
|
||||
|
||||
kernel compile_kernel(const std::vector<src_file>& src,
|
||||
compile_options options = compile_options{});
|
||||
|
||||
kernel compile_hip_code_object(const std::string& content, hip_compile_options options);
|
||||
|
||||
} // namespace rtc
|
||||
|
||||
#endif
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace rtc {
|
||||
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
#include "rtc/hip.hpp"
|
||||
#include <rtc/compile_kernel.hpp>
|
||||
#include <hip/hiprtc.h>
|
||||
#include <rtc/tmp_dir.hpp>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cassert>
|
||||
#include <deque>
|
||||
#include <numeric>
|
||||
|
||||
namespace rtc {
|
||||
|
||||
@@ -100,4 +103,345 @@ kernel compile_kernel(const std::vector<src_file>& srcs, compile_options options
|
||||
return kernel{obj.data(), options.kernel_name};
|
||||
}
|
||||
|
||||
struct hiprtc_src_file
|
||||
{
|
||||
hiprtc_src_file() = default;
|
||||
hiprtc_src_file(const src_file& s) : path(s.path.string()), content(s.content) {}
|
||||
std::string path;
|
||||
std::string content;
|
||||
template <class Self, class F>
|
||||
static auto reflect(Self& self, F f)
|
||||
{
|
||||
return pack(f(self.path, "path"), f(self.content, "content"));
|
||||
}
|
||||
};
|
||||
|
||||
std::string hiprtc_error(hiprtcResult err, const std::string& msg)
|
||||
{
|
||||
return "hiprtc: " + (hiprtcGetErrorString(err) + (": " + msg));
|
||||
}
|
||||
|
||||
void hiprtc_check_error(hiprtcResult err, const std::string& msg, const std::string& ctx)
|
||||
{
|
||||
if(err != HIPRTC_SUCCESS)
|
||||
throw std::runtime_error(hiprtc_error(err, msg));
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
#define MIGRAPHX_HIPRTC(...) \
|
||||
hiprtc_check_error(__VA_ARGS__, #__VA_ARGS__, "Lorem ipsum dolor sit amet")
|
||||
|
||||
#define MIGRAPHX_HIPRTC_THROW(error, msg) throw std::runtime_error(hiprtc_error(error, msg))
|
||||
|
||||
template <class F, F f> // NOLINT
|
||||
struct manage_deleter
|
||||
{
|
||||
template <class T>
|
||||
void operator()(T* x) const
|
||||
{
|
||||
if(x != nullptr)
|
||||
{
|
||||
(void)f(x);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <class T, class F, F f> // NOLINT
|
||||
using manage_ptr = std::unique_ptr<T, manage_deleter<F, f>>;
|
||||
|
||||
#define MIGRAPHX_MANAGE_PTR(T, F) manage_ptr<std::remove_pointer_t<T>, decltype(&F), &F> // NOLINT
|
||||
|
||||
// Workaround hiprtc's broken API
|
||||
void hiprtc_program_destroy(hiprtcProgram prog) { hiprtcDestroyProgram(&prog); }
|
||||
using hiprtc_program_ptr = MIGRAPHX_MANAGE_PTR(hiprtcProgram, hiprtc_program_destroy);
|
||||
|
||||
template <class... Ts>
|
||||
hiprtc_program_ptr hiprtc_program_create(Ts... xs)
|
||||
{
|
||||
hiprtcProgram prog = nullptr;
|
||||
auto result = hiprtcCreateProgram(&prog, xs...);
|
||||
hiprtc_program_ptr p{prog};
|
||||
if(result != HIPRTC_SUCCESS)
|
||||
MIGRAPHX_HIPRTC_THROW(result, "Create program failed.");
|
||||
return p;
|
||||
}
|
||||
|
||||
bool starts_with(const std::string& value, const std::string& prefix)
|
||||
{
|
||||
if(prefix.size() > value.size())
|
||||
return false;
|
||||
else
|
||||
return std::equal(prefix.begin(), prefix.end(), value.begin());
|
||||
}
|
||||
|
||||
bool ends_with(const std::string& value, const std::string& suffix)
|
||||
{
|
||||
if(suffix.size() > value.size())
|
||||
return false;
|
||||
else
|
||||
return std::equal(suffix.rbegin(), suffix.rend(), value.rbegin());
|
||||
}
|
||||
|
||||
std::vector<std::string> split_string(const std::string& s, char delim)
|
||||
{
|
||||
std::vector<std::string> elems;
|
||||
std::stringstream ss(s + delim);
|
||||
std::string item;
|
||||
while(std::getline(ss, item, delim))
|
||||
{
|
||||
elems.push_back(item);
|
||||
}
|
||||
return elems;
|
||||
}
|
||||
|
||||
template <class Strings>
|
||||
inline std::string join_strings(Strings strings, const std::string& delim)
|
||||
{
|
||||
auto it = strings.begin();
|
||||
if(it == strings.end())
|
||||
return "";
|
||||
|
||||
auto nit = std::next(it);
|
||||
return std::accumulate(nit, strings.end(), *it, [&](std::string x, std::string y) {
|
||||
return std::move(x) + delim + std::move(y);
|
||||
});
|
||||
}
|
||||
|
||||
struct hiprtc_program
|
||||
{
|
||||
struct string_array
|
||||
{
|
||||
std::deque<std::string> strings{};
|
||||
std::vector<const char*> c_strs{};
|
||||
|
||||
string_array() {}
|
||||
string_array(const string_array&) = delete;
|
||||
|
||||
std::size_t size() const { return strings.size(); }
|
||||
|
||||
const char** data() { return c_strs.data(); }
|
||||
|
||||
void push_back(std::string s)
|
||||
{
|
||||
strings.push_back(std::move(s));
|
||||
c_strs.push_back(strings.back().c_str());
|
||||
}
|
||||
};
|
||||
|
||||
hiprtc_program_ptr prog = nullptr;
|
||||
string_array headers{};
|
||||
string_array include_names{};
|
||||
std::string cpp_src = "";
|
||||
std::string cpp_name = "";
|
||||
|
||||
hiprtc_program(const std::string& src, const std::string& name = "main.cpp")
|
||||
: cpp_src(src), cpp_name(name)
|
||||
{
|
||||
create_program();
|
||||
}
|
||||
|
||||
hiprtc_program(std::vector<src_file> srcs)
|
||||
{
|
||||
for(auto&& src : srcs)
|
||||
{
|
||||
if(ends_with(src.path, ".cpp"))
|
||||
{
|
||||
cpp_src = std::move(src.content);
|
||||
cpp_name = std::move(src.path);
|
||||
}
|
||||
else
|
||||
{
|
||||
headers.push_back(std::move(src.content));
|
||||
include_names.push_back(std::move(src.path));
|
||||
}
|
||||
}
|
||||
create_program();
|
||||
}
|
||||
|
||||
void create_program()
|
||||
{
|
||||
assert(not cpp_src.empty());
|
||||
assert(not cpp_name.empty());
|
||||
assert(headers.size() == include_names.size());
|
||||
prog = hiprtc_program_create(cpp_src.c_str(),
|
||||
cpp_name.c_str(),
|
||||
headers.size(),
|
||||
headers.data(),
|
||||
include_names.data());
|
||||
}
|
||||
|
||||
void compile(const std::vector<std::string>& options, bool quiet = false) const
|
||||
{
|
||||
// if(enabled(MIGRAPHX_TRACE_HIPRTC{}))
|
||||
// std::cout << "hiprtc " << join_strings(options, " ") << " " << cpp_name << std::endl;
|
||||
std::vector<const char*> c_options;
|
||||
std::transform(options.begin(),
|
||||
options.end(),
|
||||
std::back_inserter(c_options),
|
||||
[](const std::string& s) { return s.c_str(); });
|
||||
std::cout << "BEFORE HIPRTC COMPILE" << std::endl;
|
||||
auto result = hiprtcCompileProgram(prog.get(), c_options.size(), c_options.data());
|
||||
auto prog_log = log();
|
||||
if(not prog_log.empty() and not quiet)
|
||||
{
|
||||
std::cerr << prog_log << std::endl;
|
||||
}
|
||||
if(result != HIPRTC_SUCCESS)
|
||||
throw std::runtime_error("Compilation failed.");
|
||||
}
|
||||
|
||||
std::string log() const
|
||||
{
|
||||
std::size_t n = 0;
|
||||
MIGRAPHX_HIPRTC(hiprtcGetProgramLogSize(prog.get(), &n));
|
||||
if(n == 0)
|
||||
return {};
|
||||
std::string buffer(n, '\0');
|
||||
MIGRAPHX_HIPRTC(hiprtcGetProgramLog(prog.get(), buffer.data()));
|
||||
assert(buffer.back() != 0);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
std::vector<char> get_code_obj() const
|
||||
{
|
||||
std::size_t n = 0;
|
||||
MIGRAPHX_HIPRTC(hiprtcGetCodeSize(prog.get(), &n));
|
||||
std::vector<char> buffer(n);
|
||||
MIGRAPHX_HIPRTC(hiprtcGetCode(prog.get(), buffer.data()));
|
||||
return buffer;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<std::vector<char>> compile_hip_src_with_hiprtc(std::vector<src_file> srcs,
|
||||
const std::string& params,
|
||||
const std::string& arch)
|
||||
{
|
||||
hiprtc_program prog(std::move(srcs));
|
||||
auto options = split_string(params, ' ');
|
||||
options.push_back("-DMIGRAPHX_USE_HIPRTC=1");
|
||||
if(true)
|
||||
{
|
||||
options.push_back("-DMIGRAPHX_HAS_DPP=0");
|
||||
options.push_back("-DMIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS=1");
|
||||
options.push_back("-Wno-reserved-identifier");
|
||||
options.push_back("-Wno-unused-parameter");
|
||||
options.push_back("-Wno-gnu-line-marker");
|
||||
options.push_back("-Wno-old-style-cast");
|
||||
}
|
||||
if(true)
|
||||
options.push_back("-DMIGRAPHX_DEBUG");
|
||||
if(std::none_of(options.begin(), options.end(), [](const std::string& s) {
|
||||
return starts_with(s, "--std=") or starts_with(s, "-std=");
|
||||
}))
|
||||
options.push_back("-std=c++17");
|
||||
options.push_back("-fno-gpu-rdc");
|
||||
options.push_back("-O3");
|
||||
options.push_back("-Wno-cuda-compat");
|
||||
options.push_back("--offload-arch=" + arch);
|
||||
prog.compile(options);
|
||||
return {prog.get_code_obj()};
|
||||
}
|
||||
|
||||
bool hip_has_flags(const std::vector<std::string>& flags)
|
||||
{
|
||||
hiprtc_program prog{" "};
|
||||
try
|
||||
{
|
||||
prog.compile(flags, true);
|
||||
return true;
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool hip_accept_non_uniform_wg()
|
||||
{
|
||||
static bool non_uniform_wg = hip_has_flags({"-fno-offload-uniform-block"});
|
||||
return non_uniform_wg;
|
||||
}
|
||||
|
||||
static std::vector<std::string> get_compiler_warnings()
|
||||
{
|
||||
std::vector<std::string> warnings = {
|
||||
"-Weverything",
|
||||
"-Wno-c++98-compat",
|
||||
"-Wno-c++98-compat-pedantic",
|
||||
"-Wno-conversion",
|
||||
"-Wno-double-promotion",
|
||||
"-Wno-exit-time-destructors",
|
||||
"-Wno-extra-semi",
|
||||
"-Wno-extra-semi-stmt",
|
||||
"-Wno-float-conversion",
|
||||
"-Wno-gnu-anonymous-struct",
|
||||
"-Wno-gnu-zero-variadic-macro-arguments",
|
||||
"-Wno-missing-prototypes",
|
||||
"-Wno-nested-anon-types",
|
||||
"-Wno-padded",
|
||||
"-Wno-shorten-64-to-32",
|
||||
"-Wno-sign-conversion",
|
||||
"-Wno-sign-compare",
|
||||
"-Wno-unused-command-line-argument",
|
||||
"-Wno-weak-vtables",
|
||||
"-Wno-c99-extensions",
|
||||
};
|
||||
|
||||
if(hip_has_flags({"-Werror", "-Wunsafe-buffer-usage"}))
|
||||
warnings.push_back("-Wno-unsafe-buffer-usage");
|
||||
return warnings;
|
||||
}
|
||||
|
||||
const std::vector<std::string>& compiler_warnings()
|
||||
{
|
||||
static std::vector<std::string> warnings = get_compiler_warnings();
|
||||
return warnings;
|
||||
}
|
||||
|
||||
kernel compile_hip_code_object(const std::string& content, hip_compile_options options)
|
||||
{
|
||||
assert(options.global > 0);
|
||||
assert(options.local > 0);
|
||||
// assert(not options.inputs.empty());
|
||||
// assert(options.inputs.size() == options.virtual_inputs.size() or
|
||||
// options.virtual_inputs.empty());
|
||||
std::vector<src_file> srcs = options.additional_src_files;
|
||||
// Neko sranje
|
||||
// static auto kernels{::migraphx_kernels()};
|
||||
// std::transform(
|
||||
// kernels.begin(),
|
||||
// kernels.end(),
|
||||
// std::back_inserter(srcs),
|
||||
// [](const std::pair<std::string_view, std::string_view>& elem) { return src_file{elem};
|
||||
// });
|
||||
srcs.emplace_back("main.cpp", content);
|
||||
|
||||
for (auto src : srcs) {
|
||||
std::cout << src.path << std::endl;
|
||||
}
|
||||
|
||||
|
||||
// auto args_hpp =
|
||||
// generate_args_hpp(options.virtual_inputs.empty() ? options.inputs :
|
||||
// options.virtual_inputs);
|
||||
// srcs.emplace_back("args.hpp", args_hpp);
|
||||
|
||||
if(options.global % options.local != 0 and hip_accept_non_uniform_wg())
|
||||
options.params += " -fno-offload-uniform-block";
|
||||
else
|
||||
assert(options.global % options.local == 0);
|
||||
|
||||
options.params += " -DMIGRAPHX_NGLOBAL=" + std::to_string(options.global);
|
||||
options.params += " -DMIGRAPHX_NLOCAL=" + std::to_string(options.local);
|
||||
options.params += " " + join_strings(compiler_warnings(), " ");
|
||||
options.params += " -ftemplate-backtrace-limit=0";
|
||||
options.params += " -Werror";
|
||||
auto cos = compile_hip_src_with_hiprtc(srcs, options.params, get_device_name());
|
||||
if(cos.size() != 1)
|
||||
std::runtime_error("No code object");
|
||||
auto& obj = cos.front();
|
||||
|
||||
return kernel{obj.data(), options.kernel_name};
|
||||
}
|
||||
|
||||
} // namespace rtc
|
||||
|
||||
Reference in New Issue
Block a user