diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 2956634..571d252 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -49,7 +49,7 @@ Algorithm::createParameters(std::vector>& tensorParams) // TODO: Explore design for having multiple descriptor pool sizes std::vector descriptorPoolSizes = { vk::DescriptorPoolSize(vk::DescriptorType::eStorageBuffer, - 1 // Descriptor count + static_cast(tensorParams.size()) // Descriptor count ) }; @@ -99,6 +99,11 @@ Algorithm::createParameters(std::vector>& tensorParams) this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo, this->mDescriptorSet.get()); + std::vector descriptorBufferInfos; + for (size_t i = 0; i < tensorParams.size(); i++) { + descriptorBufferInfos.push_back(tensorParams[i]->constructDescriptorBufferInfo()); + } + // TODO: Explore design exposing the destination array element std::vector computeWriteDescriptorSets; for (size_t i = 0; i < tensorParams.size(); i++) { diff --git a/src/OpMult.cpp b/src/OpMult.cpp index 43fcb7c..f596bb3 100644 --- a/src/OpMult.cpp +++ b/src/OpMult.cpp @@ -109,6 +109,7 @@ OpMult::record() { SPDLOG_DEBUG("Kompute OpMult record called"); + // Barrier to ensure the data is finished writing to buffer memory this->mTensorLHS->recordBufferMemoryBarrier( vk::AccessFlagBits::eHostWrite, vk::AccessFlagBits::eShaderRead, @@ -122,6 +123,7 @@ OpMult::record() this->mAlgorithm->recordDispatch(this->mX, this->mY, this->mZ); + // Barrier to ensure the shader code is executed before buffer read this->mTensorOutput->recordBufferMemoryBarrier( vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eTransferRead, @@ -140,6 +142,7 @@ OpMult::record() this->mTensorOutputStaging->recordCopyFrom(this->mTensorLHS); + // Buffer to ensure wait until data is copied to staging buffer this->mTensorOutput->recordBufferMemoryBarrier( vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eHostRead, diff --git a/src/OpMult.hpp b/src/OpMult.hpp index 28251c9..25a7bf2 100644 --- a/src/OpMult.hpp +++ b/src/OpMult.hpp @@ -52,5 +52,5 @@ class OpMult : public OpBase } // End namespace kp // Including implemenation for template class -#include "OpMult.tpp" +#include "OpMult.cpp" diff --git a/src/Sequence.cpp b/src/Sequence.cpp index 5c982f0..a06797f 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -97,11 +97,6 @@ Sequence::eval() { SPDLOG_DEBUG("Kompute sequence compute recording EVAL"); - bool toggleSingleRecording = !this->mRecording; - if (toggleSingleRecording) { - this->begin(); - } - const vk::PipelineStageFlags waitStageMask = vk::PipelineStageFlagBits::eTransfer; vk::SubmitInfo submitInfo( @@ -116,10 +111,6 @@ Sequence::eval() this->mDevice->waitForFences(1, &fence, VK_TRUE, UINT64_MAX); this->mDevice->destroy(fence); - if (toggleSingleRecording) { - this->end(); - } - // TODO: Explore whether moving postSubmit calls to a separate sequence // function that is explicitly called by the manager for (size_t i = 0; i < this->mOperations.size(); i++) { diff --git a/src/Tensor.cpp b/src/Tensor.cpp index d64f724..b4cc143 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -58,7 +58,7 @@ Tensor::init(std::shared_ptr physicalDevice, std::vector data) { SPDLOG_DEBUG( - "Kompute Tensor running init with physicalDevice and logical device"); + "Kompute Tensor running init with Vulkan params and data size: {}", data.size()); this->mPhysicalDevice = physicalDevice; this->mDevice = device;