#include "gtest/gtest.h" #include #include "kompute/Kompute.hpp" TEST(TestAsyncOperations, TestManagerAsync) { uint32_t size = 100000; std::string shader(R"( #version 450 layout (local_size_x = 1) in; layout(set = 0, binding = 0) buffer a { float pa[]; }; layout(set = 0, binding = 1) buffer b { float pb[]; }; void main() { uint index = gl_GlobalInvocationID.x; for (int i = 0; i < 100000; i++) { pa[index] += 1.0; } pb[index] = pa[index]; } )"); std::vector data(size, 0.0); std::vector resultSync(size, 100000); std::vector resultAsync(size, 100000); std::shared_ptr tensorSyncA{ new kp::Tensor(data) }; std::shared_ptr tensorSyncB{ new kp::Tensor(data) }; std::shared_ptr tensorSyncC{ new kp::Tensor(data) }; std::shared_ptr tensorSyncD{ new kp::Tensor(data) }; std::shared_ptr tensorSyncE{ new kp::Tensor(data) }; std::shared_ptr tensorSyncF{ new kp::Tensor(data) }; kp::Manager mgr; mgr.evalOpDefault({ tensorSyncA, tensorSyncB, tensorSyncC, tensorSyncD, tensorSyncE, tensorSyncF }); auto startSync = std::chrono::high_resolution_clock::now(); mgr.evalOpDefault>( { tensorSyncA, tensorSyncB }, std::vector(shader.begin(), shader.end())); mgr.evalOpDefault>( { tensorSyncC, tensorSyncD }, std::vector(shader.begin(), shader.end())); mgr.evalOpDefault>( { tensorSyncE, tensorSyncF }, std::vector(shader.begin(), shader.end())); mgr.evalOpDefault({ tensorSyncB, tensorSyncD, tensorSyncF }); auto endSync = std::chrono::high_resolution_clock::now(); auto durationSync = std::chrono::duration_cast(endSync - startSync).count(); EXPECT_EQ(tensorSyncB->data(), resultSync); EXPECT_EQ(tensorSyncD->data(), resultSync); EXPECT_EQ(tensorSyncF->data(), resultSync); //std::shared_ptr tensorAsyncA{ new kp::Tensor(data) }; //std::shared_ptr tensorAsyncB{ new kp::Tensor(data) }; //std::shared_ptr tensorAsyncC{ new kp::Tensor(data) }; //std::shared_ptr tensorAsyncD{ new kp::Tensor(data) }; //std::shared_ptr tensorAsyncE{ new kp::Tensor(data) }; //std::shared_ptr tensorAsyncF{ new kp::Tensor(data) }; //kp::Manager mgrAsync(0, 1); //mgrAsync.evalOpDefault({ tensorAsyncA, tensorAsyncB, tensorAsyncC, tensorAsyncD, tensorAsyncE, tensorAsyncF }); //mgrAsync.createManagedSequence("async0", 0); ////mgrAsync.createManagedSequence("async1", 1); ////mgrAsync.createManagedSequence("async2", 2); //auto startAsync = std::chrono::high_resolution_clock::now(); //mgrAsync.evalOpAsync>( // { tensorAsyncA, tensorAsyncB }, "async0", std::vector(shader.begin(), shader.end())); ////mgrAsync.evalOpAsync>( //// { tensorAsyncC, tensorAsyncD }, "async1", std::vector(shader.begin(), shader.end())); ////mgrAsync.evalOpAsync>( //// { tensorAsyncE, tensorAsyncF }, "async2", std::vector(shader.begin(), shader.end())); //mgrAsync.evalOpAwait("async0"); ////mgrAsync.evalOpAwait("async1"); ////mgrAsync.evalOpAwait("async2"); //mgrAsync.evalOpDefault({ tensorAsyncB }); ////mgrAsync.evalOpDefault({ tensorAsyncD }); ////mgrAsync.evalOpDefault({ tensorAsyncF }); //auto endAsync = std::chrono::high_resolution_clock::now(); //auto durationAsync = std::chrono::duration_cast(endAsync - startAsync).count(); //EXPECT_EQ(tensorAsyncB->data(), resultAsync); ////EXPECT_EQ(tensorAsyncD->data(), resultAsync); ////EXPECT_EQ(tensorAsyncF->data(), resultAsync); ////SPDLOG_DEBUG("Total Sync: {}", durationSync); //SPDLOG_DEBUG("Total Async: {}", durationAsync); }