#include #include "Algorithm.hpp" namespace kp { Algorithm::Algorithm() { SPDLOG_DEBUG("Kompute Algorithm base constructor"); } Algorithm::Algorithm(std::shared_ptr device, std::shared_ptr commandBuffer) { SPDLOG_DEBUG("Kompute Algorithm Constructor with device"); this->mDevice = device; this->mCommandBuffer = commandBuffer; } Algorithm::~Algorithm() { SPDLOG_DEBUG("Kompute Algorithm Destructor started"); if (!this->mDevice) { spdlog::error( "Kompute Algorithm destructor reached with null Device pointer"); return; } } void Algorithm::init(std::string shaderFilePath, std::vector> tensorParams) { SPDLOG_DEBUG("Kompute Algorithm init started"); // TODO: Move to util function this->createParameters(tensorParams); this->createShaderModule(shaderFilePath); this->createPipeline(); } void Algorithm::createParameters(std::vector>& tensorParams) { SPDLOG_DEBUG("Kompute Algorithm createParameters started"); // TODO: Explore design for having multiple descriptor pool sizes std::vector descriptorPoolSizes = { vk::DescriptorPoolSize( vk::DescriptorType::eStorageBuffer, 1 // Descriptor count ) }; // TODO: Explore design for having more than 1 set configurable vk::DescriptorPoolCreateInfo descriptorPoolInfo( vk::DescriptorPoolCreateFlags(), 1, // Max sets static_cast(descriptorPoolSizes.size()), descriptorPoolSizes.data()); SPDLOG_DEBUG("Kompute Algorithm creating descriptor pool"); this->mDescriptorPool = std::make_shared(); this->mDevice->createDescriptorPool(&descriptorPoolInfo, nullptr, this->mDescriptorPool.get()); std::vector descriptorSetBindings; // TODO: Explore allowing descriptor set bind index to be configurable by user to specify which tensors woudl go on each binding for (size_t i = 0; i < tensorParams.size(); i++) { descriptorSetBindings.push_back( vk::DescriptorSetLayoutBinding( i, // Binding index vk::DescriptorType::eStorageBuffer, 1, // Descriptor count vk::ShaderStageFlagBits::eCompute) ); } // This is the component that is fed into the pipeline vk::DescriptorSetLayoutCreateInfo descriptorSetLayoutInfo( vk::DescriptorSetLayoutCreateFlags(), static_cast(descriptorSetBindings.size()), descriptorSetBindings.data() ); SPDLOG_DEBUG("Kompute Algorithm creating descriptor set layout"); // TODO: We createa signle descriptor set layout which would have to be extended if multiple set layouts to be supported this->mDescriptorSetLayout = std::make_shared(); this->mDevice->createDescriptorSetLayout(&descriptorSetLayoutInfo, nullptr, this->mDescriptorSetLayout.get()); vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo( *this->mDescriptorPool, 1, // Descriptor set layout count this->mDescriptorSetLayout.get()); SPDLOG_DEBUG("Kompute Algorithm allocating descriptor sets"); std::vector descriptorSets = this->mDevice->allocateDescriptorSets(descriptorSetAllocateInfo); this->mDescriptorSet = std::make_shared(); this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo, this->mDescriptorSet.get()); std::vector computeWriteDescriptorSets; for (size_t i = 0; i < tensorParams.size(); i++) { vk::DescriptorBufferInfo descriptorBufferInfo = tensorParams[i]->constructDescriptorBufferInfo(); // TODO: Explore design exposing the destination array element computeWriteDescriptorSets.push_back( vk::WriteDescriptorSet( *this->mDescriptorSet, i, // Destination binding 0, // Destination array element 1, // Descriptor count vk::DescriptorType::eStorageBuffer, nullptr, // Descriptor image info &descriptorBufferInfo)); } SPDLOG_DEBUG("Kompute Algorithm updating descriptor sets"); this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr); SPDLOG_DEBUG("Kompue Algorithm successfully run init"); } void Algorithm::createShaderModule(std::string shaderFilePath) { SPDLOG_DEBUG("Kompute Algorithm createShaderModule started"); std::ifstream fileStream( shaderFilePath, std::ios::binary | std::ios::in | std::ios::ate); size_t shaderFileSize = fileStream.tellg(); fileStream.seekg(0, std::ios::beg); char* shaderFileData = new char[shaderFileSize]; fileStream.read(shaderFileData, shaderFileSize); fileStream.close(); vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(), shaderFileSize, (uint32_t*)shaderFileData); this->mFreeShaderModule = true; this->mShaderModule = std::shared_ptr(); this->mDevice->createShaderModule(&shaderModuleInfo, nullptr, this->mShaderModule.get()); } void Algorithm::createPipeline() { SPDLOG_DEBUG("Kompute Algorithm calling create Pipeline"); vk::PipelineLayoutCreateInfo pipelineLayoutInfo( vk::PipelineLayoutCreateFlags(), 1, // Set layout count this->mDescriptorSetLayout.get()); this->mPipelineLayout = std::make_shared(); this->mDevice->createPipelineLayout(&pipelineLayoutInfo, nullptr, this->mPipelineLayout.get()); vk::PipelineShaderStageCreateInfo shaderStage(vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eCompute, *this->mShaderModule, "main", nullptr); vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(), shaderStage, *this->mPipelineLayout, vk::Pipeline(), 0); // TODO: Confirm what the best structure is with pipeline cache this->mFreePipelineCache = true; vk::PipelineCacheCreateInfo pipelineCacheInfo = vk::PipelineCacheCreateInfo(); this->mPipelineCache = std::make_shared(); this->mDevice->createPipelineCache(&pipelineCacheInfo, nullptr, this->mPipelineCache.get()); vk::ResultValue pipelineResult = this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo); if (pipelineResult.result != vk::Result::eSuccess) { throw std::runtime_error("Failed to create pipeline result: " + vk::to_string(pipelineResult.result)); } this->mFreePipeline = true; this->mPipeline = std::make_shared(pipelineResult.value); } void Algorithm::recordDispatch(uint32_t x, uint32_t y, uint32_t z) { SPDLOG_DEBUG("Kompute Algorithm calling record dispatch"); this->mCommandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute, *this->mPipeline); this->mCommandBuffer->bindDescriptorSets( vk::PipelineBindPoint::eCompute, *this->mPipelineLayout, 0, // First set *this->mDescriptorSet, nullptr // Dispatcher ); this->mCommandBuffer->dispatch(x, y, z); } }