#include "gtest/gtest.h" #include #include "kompute/Kompute.hpp" TEST(TestAsyncOperations, TestManagerAsync) { uint32_t size = 100000; std::vector data(size, 0.0); std::vector resultSync(size, 100000); std::vector resultAsync(size, 200000); std::shared_ptr tensorA{ new kp::Tensor(data) }; std::shared_ptr tensorB{ new kp::Tensor(data) }; std::shared_ptr tensorC{ new kp::Tensor(data) }; std::shared_ptr tensorD{ new kp::Tensor(data) }; std::shared_ptr tensorE{ new kp::Tensor(data) }; std::shared_ptr tensorF{ new kp::Tensor(data) }; kp::Manager mgr; mgr.evalOpDefault({ tensorA, tensorB, tensorC, tensorD, tensorE, tensorF }); std::string shader(R"( #version 450 layout (local_size_x = 1) in; layout(set = 0, binding = 0) buffer a { float pa[]; }; layout(set = 0, binding = 1) buffer b { float pb[]; }; void main() { uint index = gl_GlobalInvocationID.x; for (int i = 0; i < 100000; i++) { pa[index] += 1.0; } pb[index] = pa[index]; } )"); auto startSync = std::chrono::high_resolution_clock::now(); mgr.evalOpDefault>( { tensorA, tensorB }, std::vector(shader.begin(), shader.end())); mgr.evalOpDefault>( { tensorC, tensorD }, std::vector(shader.begin(), shader.end())); mgr.evalOpDefault>( { tensorE, tensorF }, std::vector(shader.begin(), shader.end())); auto endSync = std::chrono::high_resolution_clock::now(); mgr.evalOpDefault({ tensorB, tensorD, tensorF }); EXPECT_EQ(tensorB->data(), resultSync); EXPECT_EQ(tensorD->data(), resultSync); EXPECT_EQ(tensorF->data(), resultSync); auto durationSync = std::chrono::duration_cast(endSync - startSync).count(); auto startAsync = std::chrono::high_resolution_clock::now(); mgr.evalOpAsync>( { tensorA, tensorB }, "asyncOne", std::vector(shader.begin(), shader.end())); mgr.evalOpAsync>( { tensorC, tensorD }, "asyncTwo", std::vector(shader.begin(), shader.end())); mgr.evalOpAsync>( { tensorE, tensorF }, "asyncThree", std::vector(shader.begin(), shader.end())); mgr.evalOpAwait("asyncOne"); mgr.evalOpAwait("asyncTwo"); mgr.evalOpAwait("asyncThree"); auto endAsync = std::chrono::high_resolution_clock::now(); auto durationAsync = std::chrono::duration_cast(endAsync - startAsync).count(); mgr.evalOpDefault({ tensorB, tensorD, tensorF }); EXPECT_EQ(tensorB->data(), resultAsync); EXPECT_EQ(tensorD->data(), resultAsync); EXPECT_EQ(tensorF->data(), resultAsync); SPDLOG_DEBUG("Total Sync: {}", durationSync); SPDLOG_DEBUG("Total Async: {}", durationAsync); }