diff --git a/Makefile b/Makefile index ecb64ce..bb4d466 100755 --- a/Makefile +++ b/Makefile @@ -1,28 +1,26 @@ -####### SRC Build Params ####### - -CC="/c/Program Files (x86)/Microsoft Visual Studio/2019/Community/MSBuild/Current/Bin/MSBuild.exe" - - -####### Shader Build Params ####### - ifeq ($(OS),Windows_NT) # is Windows_NT on XP, 2000, 7, Vista, 10... - SCMP=C:\VulkanSDK\1.2.141.2\Bin32\glslangValidator.exe + CMAKE_BIN ?= "C:\Program Files\CMake\bin\cmake.exe" + SCMP_BIN="C:\\VulkanSDK\\1.2.141.2\\Bin32\\glslangValidator.exe" + MSBUILD_BIN ?= "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\MSBuild\\Current\\Bin\\MSBuild.exe" + VCPKG_CMAKE ?= "C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsystems\\vcpkg.cmake" else - SCMP=/c/VulkanSDK/1.2.141.2/Bin32/glslangValidator.exe + CLANG_FORMAT_BIN ?= "/home/alejandro/Programming/lib/clang+llvm-10.0.0-x86_64-linux-gnu-ubuntu-18.04/bin/clang-format" + CMAKE_BIN ?= "/c/Program Files/CMake/bin/cmake.exe" + SCMP_BIN ?= "/c/VulkanSDK/1.2.141.2/Bin32/glslangValidator.exe" + MSBUILD_BIN ?= "/c/Program Files (x86)/Microsoft Visual Studio/2019/Community/MSBuild/Current/Bin/MSBuild.exe" + VCPKG_CMAKE ?= "C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsystems\\vcpkg.cmake" endif -####### Package manager ####### - -VCPKG=/c/Users/axsau/Programming/lib/vcpkg/vcpkg ####### Main Target Rules ####### run_cmake: - cmake \ + $(CMAKE_BIN) \ -Bbuild \ - -DCMAKE_TOOLCHAIN_FILE=C:\\Users\\axsau\\Programming\\lib\\vcpkg\\scripts\\buildsystems\\vcpkg.cmake \ + -DCMAKE_TOOLCHAIN_FILE=$(VCPKG_CMAKE) \ -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -G "Visual Studio 16 2019" push_docs_to_ghpages: @@ -31,10 +29,25 @@ push_docs_to_ghpages: GIT_DEPLOY_REPO="origin" \ ./scripts/push_folder_to_branch.sh -build_vs: - $(CC) build/kompute.sln +####### Visual studio build shortcut commands ####### -run_tests: +build_all: + $(MSBUILD_BIN) build/kompute.sln + +build_docs: + $(MSBUILD_BIN) build/docs/gendoxygen.vcxproj + $(MSBUILD_BIN) build/docs/gensphinx.vcxproj + +build_kompute: + $(MSBUILD_BIN) build/src/kompute.vcxproj + +build_tests: + $(MSBUILD_BIN) build/test/test_kompute.vcxproj + +run_docs: build_docs + (cd build/docs/sphinx && python2.7 -m SimpleHTTPServer) + +run_tests: build_tests ./build/test/Debug/test_kompute.exe clean_cmake: @@ -46,7 +59,7 @@ install_python_reqs: build_shaders: python scripts/convert_shaders.py \ --shader-path shaders/glsl \ - --shader-binary $(SCMP) \ + --shader-binary $(SCMP_BIN) \ --header-path src/include/kompute/shaders/ \ -v @@ -85,7 +98,7 @@ build_single_header: "single_include/kompute/Kompute.hpp" format: - clang-format -i -style="{BasedOnStyle: mozilla, IndentWidth: 4}" src/*.cpp src/include/kompute/*.hpp + $(CLANG_FORMAT_BIN) -i -style="{BasedOnStyle: mozilla, IndentWidth: 4}" src/*.cpp src/include/kompute/*.hpp clean: find src -name "*gch" -exec rm {} \; || "No ghc files" diff --git a/README.md b/README.md index 576d193..75ee6ed 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,9 @@ +🔋 [Documentation]() 💻 [Import to your project]() ⌨ [Tutorials]() 💾 + + ## Principles & Features * Single header easy to import static library diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index f31325c..9bee5df 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -150,6 +150,8 @@ static unsigned const int shaders_glsl_opmult_comp_spv_len = 1308; } #endif // define SHADEROP_SHADEROPMULT_HPP +#include + #define KP_MAX_DIM_SIZE 1 namespace kp { @@ -234,42 +236,81 @@ class Tensor namespace kp { /** - Base Operation -*/ + * Base Operation which provides the high level interface that Kompute + * operations implement in order to perform a set of actions in the GPU. + * + * Operations can perform actions on tensors, and optionally can also own an + * Algorithm with respective parameters. kp::Operations with kp::Algorithms + * would inherit from kp::OpBaseAlgo. + */ class OpBase { - private: public: /** - Constructor - */ + * Base constructor, should not be used unless explicitly intended. + */ OpBase() { SPDLOG_DEBUG("Compute OpBase base constructor"); } + /** + * Default constructor with parameters that provides the bare minimum + * requirements for the operations to be able to create and manage their + * sub-components. + */ OpBase(std::shared_ptr physicalDevice, std::shared_ptr device, - std::shared_ptr commandBuffer) + std::shared_ptr commandBuffer, + std::vector>& tensors, + bool freeTensors) { SPDLOG_DEBUG("Compute OpBase constructor with params"); this->mPhysicalDevice = physicalDevice; this->mDevice = device; this->mCommandBuffer = commandBuffer; + this->mTensors = tensors; } - ~OpBase() { - SPDLOG_DEBUG("Compute OpBase destructor started"); - } - - virtual void init(std::vector> tensors) + /** + * Default destructor for OpBase class. This OpBase destructor class should + * always be called to destroy and free owned resources unless it is + * intended to destroy the resources in the parent class. This can be done + * by passing the mFreeTensors=false. + */ + ~OpBase() { - SPDLOG_DEBUG("Kompute OpBase init called"); + SPDLOG_DEBUG("Kompute OpBase destructor started"); + + if (!this->mDevice) { + spdlog::warn("Kompute OpBase destructor called with empty device"); + return; + } + + if (this->mFreeTensors) { + SPDLOG_DEBUG("Kompute OpBase freeing tensors"); + for (std::shared_ptr tensor : this->mTensors) { + if (tensor && tensor->isInit()) { + tensor->freeMemoryDestroyGPUResources(); + } else { + spdlog::error("Kompute OpBase expected to free " + "tensor but has already been freed."); + } + } + } } - virtual void record() { SPDLOG_DEBUG("Kompute OpBase record called"); } + virtual void init() = 0; - virtual void postSubmit() { SPDLOG_DEBUG("Kompute OpBase init called"); } + virtual void record() = 0; + + virtual void postSubmit() = 0; protected: + // Sometimes owned resources + std::vector> mTensors; + bool mFreeTensors = + false; // TODO: Provide granularity to specify which to free + + // Always external resources std::shared_ptr mPhysicalDevice; std::shared_ptr mDevice; std::shared_ptr mCommandBuffer; @@ -295,6 +336,9 @@ class Sequence uint32_t queueIndex); ~Sequence(); + // Initialiser + void init(); + // Record command functions void begin(); void end(); @@ -310,15 +354,15 @@ class Sequence SPDLOG_DEBUG("Kompute Sequence record function started"); SPDLOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - T* op = - new T(this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); + T* op = new T( + this->mPhysicalDevice, this->mDevice, this->mCommandBuffer, tensors); OpBase* baseOp = dynamic_cast(op); std::unique_ptr baseOpPtr{ baseOp }; SPDLOG_DEBUG( "Kompute Sequence running init on OpBase derived class instance"); - baseOpPtr->init(tensors); + baseOpPtr->init(); SPDLOG_DEBUG( "Kompute Sequence running record on OpBase derived class instance"); @@ -350,6 +394,8 @@ class Sequence } // End namespace kp +#define KP_DEFAULT_SESSION "DEFAULT" + namespace kp { /** @@ -370,28 +416,34 @@ class Manager ~Manager(); - std::weak_ptr managedSequence(); + std::weak_ptr getOrCreateManagedSequence(std::string sessionName); template - void evalOp(std::vector> tensors) + void evalOp(std::vector> tensors, std::string sessionName = KP_DEFAULT_SESSION) { SPDLOG_DEBUG("Kompute Manager evalOp triggered"); - Sequence sq(this->mPhysicalDevice, - this->mDevice, - this->mComputeQueue, - this->mComputeQueueFamilyIndex); - SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); - sq.begin(); - SPDLOG_DEBUG("Kompute Manager evalOp running sequence RECORD"); - sq.record(tensors); - SPDLOG_DEBUG("Kompute Manager evalOp running sequence END"); - sq.end(); - SPDLOG_DEBUG("Kompute Manager evalOp running sequence EVAL"); - sq.eval(); + std::weak_ptr sqWeakPtr = + this->getOrCreateManagedSequence(sessionName); + + if (std::shared_ptr sq = sqWeakPtr.lock()) + { + SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); + sq->begin(); + + SPDLOG_DEBUG("Kompute Manager evalOp running sequence RECORD"); + sq->record(tensors); + + SPDLOG_DEBUG("Kompute Manager evalOp running sequence END"); + sq->end(); + + SPDLOG_DEBUG("Kompute Manager evalOp running sequence EVAL"); + sq->eval(); + } SPDLOG_DEBUG("Kompute Manager evalOp running sequence SUCCESS"); } private: + std::shared_ptr mInstance = nullptr; bool mFreeInstance = false; std::shared_ptr mPhysicalDevice = nullptr; @@ -402,7 +454,7 @@ class Manager std::shared_ptr mComputeQueue = nullptr; // Always owned resources - std::vector> mManagedSequences; + std::unordered_map> mManagedSequences; #if DEBUG vk::DebugReportCallbackEXT mDebugReportCallback; @@ -439,15 +491,16 @@ class Algorithm void recordDispatch(uint32_t x = 1, uint32_t y = 1, uint32_t z = 1); private: - // Shared resources + // Never Owned Resources std::shared_ptr mDevice; std::shared_ptr mCommandBuffer; - // Resources owned by default + // Optionally owned resources std::shared_ptr mDescriptorSetLayout; bool mFreeDescriptorSetLayout = false; std::shared_ptr mDescriptorPool; bool mFreeDescriptorPool = false; + // TODO: Explore design for multiple descriptor sets std::shared_ptr mDescriptorSet; bool mFreeDescriptorSet = false; @@ -463,6 +516,7 @@ class Algorithm // Create util functions void createShaderModule(const std::vector& shaderFileData); void createPipeline(); + // Parameters void createParameters(std::vector>& tensorParams); void createDescriptorPool(); @@ -486,11 +540,13 @@ class OpMult : public OpBase OpMult(std::shared_ptr physicalDevice, std::shared_ptr device, - std::shared_ptr commandBuffer); + std::shared_ptr commandBuffer, + std::vector>& tensors, + bool freeTensors = false); ~OpMult(); - void init(std::vector> tensors) override; + void init() override; void record() override; @@ -532,8 +588,10 @@ OpMult::OpMult() template OpMult::OpMult(std::shared_ptr physicalDevice, std::shared_ptr device, - std::shared_ptr commandBuffer) - : OpBase(physicalDevice, device, commandBuffer) + std::shared_ptr commandBuffer, + std::vector>& tensors, + bool freeTensors) + : OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors) { SPDLOG_DEBUG("Kompute OpMult constructor with params"); @@ -548,20 +606,20 @@ OpMult::~OpMult() template void -OpMult::init(std::vector> tensors) +OpMult::init() { SPDLOG_DEBUG("Kompute OpMult init called"); - if (tensors.size() < 3) { + if (this->mTensors.size() < 3) { throw std::runtime_error( "Kompute OpMult called with less than 1 tensor"); - } else if (tensors.size() > 3) { - spdlog::warn("Kompute OpMult called with more than 3 tensors"); + } else if (this->mTensors.size() > 3) { + spdlog::warn("Kompute OpMult called with more than 3 this->mTensors"); } - this->mTensorLHS = tensors[0]; - this->mTensorRHS = tensors[1]; - this->mTensorOutput = tensors[2]; + this->mTensorLHS = this->mTensors[0]; + this->mTensorRHS = this->mTensors[1]; + this->mTensorOutput = this->mTensors[2]; // The dispatch size is set up based on either explicitly provided template // parameters or by default it would take the shape and size of the tensors @@ -635,7 +693,7 @@ OpMult::init(std::vector> tensors) SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component"); - this->mAlgorithm->init(shaderFileData, tensors); + this->mAlgorithm->init(shaderFileData, this->mTensors); } template @@ -709,21 +767,22 @@ class OpCreateTensor : public OpBase OpCreateTensor(std::shared_ptr physicalDevice, std::shared_ptr device, - std::shared_ptr commandBuffer); + std::shared_ptr commandBuffer, + std::vector>& tensors, + bool freeTensors = true); ~OpCreateTensor(); - void init(std::vector> tensors) override; + void init() override; void record() override; void postSubmit() override; private: + // Never owned resources std::shared_ptr mPrimaryTensor; - bool mFreePrimaryTensorResources = false; std::shared_ptr mStagingTensor; - bool mFreeStagingTensorResources = false; }; } // End namespace kp diff --git a/src/Manager.cpp b/src/Manager.cpp index 9d875b2..5dbc205 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -71,16 +71,25 @@ Manager::~Manager() } std::weak_ptr -Manager::managedSequence() +Manager::getOrCreateManagedSequence(std::string sessionName) { SPDLOG_DEBUG("Kompute Manager creating Sequence object"); - std::shared_ptr sq = std::make_shared( - this->mPhysicalDevice, - this->mDevice, - this->mComputeQueue, - this->mComputeQueueFamilyIndex); - this->mManagedSequences.push_back(sq); - return sq; + std::unordered_map>::iterator + found = this->mManagedSequences.find(sessionName); + + if (found == this->mManagedSequences.end()) { + std::shared_ptr sq = + std::make_shared(this->mPhysicalDevice, + this->mDevice, + this->mComputeQueue, + this->mComputeQueueFamilyIndex); + sq->init(); + this->mManagedSequences.insert({sessionName, sq}); + return sq; + } + else { + return found->second; + } } void diff --git a/src/OpCreateTensor.cpp b/src/OpCreateTensor.cpp index da96625..a20a639 100644 --- a/src/OpCreateTensor.cpp +++ b/src/OpCreateTensor.cpp @@ -13,8 +13,10 @@ OpCreateTensor::OpCreateTensor() OpCreateTensor::OpCreateTensor( std::shared_ptr physicalDevice, std::shared_ptr device, - std::shared_ptr commandBuffer) - : OpBase(physicalDevice, device, commandBuffer) + std::shared_ptr commandBuffer, + std::vector>& tensors, + bool freeTensors) + : OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors) { SPDLOG_DEBUG("Kompute OpCreateTensor constructor with params"); } @@ -22,47 +24,21 @@ OpCreateTensor::OpCreateTensor( OpCreateTensor::~OpCreateTensor() { SPDLOG_DEBUG("Kompute OpCreateTensor destructor started"); - - if(!this->mDevice) { - spdlog::warn("Kompute OpCreateTensor destructor called with empty device"); - return; - } - - if (!this->mFreePrimaryTensorResources) { - SPDLOG_DEBUG("Kompute OpCreateTensor removing primary tensor"); - if (this->mPrimaryTensor && this->mPrimaryTensor->isInit()) { - this->mPrimaryTensor->freeMemoryDestroyGPUResources(); - } else { - spdlog::error("Kompute OpCreateTensor expected to free primary tensor but has already been freed."); - } - } - - if (!this->mFreeStagingTensorResources) { - SPDLOG_DEBUG("Kompute OpCreateTensor removing primary tensor"); - if (this->mStagingTensor && this->mStagingTensor->isInit()) { - this->mStagingTensor->freeMemoryDestroyGPUResources(); - } else { - spdlog::error("Kompute OpCreateTensor expected to free secondary tensor but has already been freed."); - } - } } void -OpCreateTensor::init(std::vector> tensors) +OpCreateTensor::init() { SPDLOG_DEBUG("Kompute OpCreateTensor init called"); - if (tensors.size() < 1) { + if (this->mTensors.size() < 1) { throw std::runtime_error( "Kompute OpCreateTensor called with less than 1 tensor"); - } else if (tensors.size() > 1) { + } else if (this->mTensors.size() > 1) { spdlog::warn("Kompute OpCreateTensor called with more than 1 tensor"); } - this->mFreePrimaryTensorResources = true; - this->mFreeStagingTensorResources = true; - - this->mPrimaryTensor = tensors[0]; + this->mPrimaryTensor = this->mTensors[0]; if (this->mPrimaryTensor->tensorType() == Tensor::TensorTypes::eDevice) { this->mPrimaryTensor->init( @@ -76,6 +52,9 @@ OpCreateTensor::init(std::vector> tensors) this->mStagingTensor->mapDataIntoHostMemory(); + // Adding to the OpBase owned resource so they are freed + this->mTensors.push_back(this->mStagingTensor); + } else { this->mPrimaryTensor->init( this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); diff --git a/src/Sequence.cpp b/src/Sequence.cpp index ab24e19..de1376c 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -19,9 +19,6 @@ Sequence::Sequence(std::shared_ptr physicalDevice, this->mDevice = device; this->mComputeQueue = computeQueue; this->mQueueIndex = queueIndex; - - this->createCommandPool(); - this->createCommandBuffer(); } Sequence::~Sequence() @@ -58,10 +55,17 @@ Sequence::~Sequence() } } +void +Sequence::init() +{ + this->createCommandPool(); + this->createCommandBuffer(); +} + void Sequence::begin() { - if (this->mCommandPool == nullptr) { + if (!this->mCommandPool) { throw std::runtime_error("Kompute Sequence command pool is null"); } @@ -78,7 +82,7 @@ Sequence::begin() void Sequence::end() { - if (this->mCommandPool == nullptr) { + if (!this->mCommandPool) { throw std::runtime_error("Kompute Sequence command pool is null"); } @@ -125,7 +129,7 @@ Sequence::createCommandPool() { SPDLOG_DEBUG("Kompute Sequence creating command pool"); - if (this->mDevice == nullptr) { + if (!this->mDevice) { throw std::runtime_error("Kompute Sequence device is null"); } if (this->mQueueIndex < 0) { @@ -146,10 +150,10 @@ void Sequence::createCommandBuffer() { SPDLOG_DEBUG("Kompute Sequence creating command buffer"); - if (this->mDevice == nullptr) { + if (!this->mDevice) { throw std::runtime_error("Kompute Sequence device is null"); } - if (this->mCommandPool == nullptr) { + if (!this->mCommandPool) { throw std::runtime_error("Kompute Sequence command pool is null"); } diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 843df42..04e8f74 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -323,7 +323,9 @@ Tensor::createBuffer() SPDLOG_DEBUG("Kompute Tensor buffer & memory creation successful"); } -void Tensor::freeMemoryDestroyGPUResources() { +void +Tensor::freeMemoryDestroyGPUResources() +{ SPDLOG_DEBUG("Kompute Tensor started freeMemoryDestroyGPUResources"); this->mIsInit = false; @@ -355,7 +357,6 @@ void Tensor::freeMemoryDestroyGPUResources() { } SPDLOG_DEBUG("Kompute Tensor successful freeMemoryDestroyGPUResources"); - } } diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index 4bb7e1d..f4ed71c 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -25,15 +25,16 @@ class Algorithm void recordDispatch(uint32_t x = 1, uint32_t y = 1, uint32_t z = 1); private: - // Shared resources + // Never Owned Resources std::shared_ptr mDevice; std::shared_ptr mCommandBuffer; - // Resources owned by default + // Optionally owned resources std::shared_ptr mDescriptorSetLayout; bool mFreeDescriptorSetLayout = false; std::shared_ptr mDescriptorPool; bool mFreeDescriptorPool = false; + // TODO: Explore design for multiple descriptor sets std::shared_ptr mDescriptorSet; bool mFreeDescriptorSet = false; @@ -49,6 +50,7 @@ class Algorithm // Create util functions void createShaderModule(const std::vector& shaderFileData); void createPipeline(); + // Parameters void createParameters(std::vector>& tensorParams); void createDescriptorPool(); diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index b93da15..7ed0924 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -1,9 +1,13 @@ #pragma once +#include + #include "kompute/Core.hpp" #include "kompute/Sequence.hpp" +#define KP_DEFAULT_SESSION "DEFAULT" + namespace kp { /** @@ -24,28 +28,34 @@ class Manager ~Manager(); - std::weak_ptr managedSequence(); + std::weak_ptr getOrCreateManagedSequence(std::string sessionName); template - void evalOp(std::vector> tensors) + void evalOp(std::vector> tensors, std::string sessionName = KP_DEFAULT_SESSION) { SPDLOG_DEBUG("Kompute Manager evalOp triggered"); - Sequence sq(this->mPhysicalDevice, - this->mDevice, - this->mComputeQueue, - this->mComputeQueueFamilyIndex); - SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); - sq.begin(); - SPDLOG_DEBUG("Kompute Manager evalOp running sequence RECORD"); - sq.record(tensors); - SPDLOG_DEBUG("Kompute Manager evalOp running sequence END"); - sq.end(); - SPDLOG_DEBUG("Kompute Manager evalOp running sequence EVAL"); - sq.eval(); + std::weak_ptr sqWeakPtr = + this->getOrCreateManagedSequence(sessionName); + + if (std::shared_ptr sq = sqWeakPtr.lock()) + { + SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); + sq->begin(); + + SPDLOG_DEBUG("Kompute Manager evalOp running sequence RECORD"); + sq->record(tensors); + + SPDLOG_DEBUG("Kompute Manager evalOp running sequence END"); + sq->end(); + + SPDLOG_DEBUG("Kompute Manager evalOp running sequence EVAL"); + sq->eval(); + } SPDLOG_DEBUG("Kompute Manager evalOp running sequence SUCCESS"); } private: + std::shared_ptr mInstance = nullptr; bool mFreeInstance = false; std::shared_ptr mPhysicalDevice = nullptr; @@ -56,7 +66,7 @@ class Manager std::shared_ptr mComputeQueue = nullptr; // Always owned resources - std::vector> mManagedSequences; + std::unordered_map> mManagedSequences; #if DEBUG vk::DebugReportCallbackEXT mDebugReportCallback; diff --git a/src/include/kompute/OpBase.hpp b/src/include/kompute/OpBase.hpp index 1a503df..1511c82 100644 --- a/src/include/kompute/OpBase.hpp +++ b/src/include/kompute/OpBase.hpp @@ -7,42 +7,81 @@ namespace kp { /** - Base Operation -*/ + * Base Operation which provides the high level interface that Kompute + * operations implement in order to perform a set of actions in the GPU. + * + * Operations can perform actions on tensors, and optionally can also own an + * Algorithm with respective parameters. kp::Operations with kp::Algorithms + * would inherit from kp::OpBaseAlgo. + */ class OpBase { - private: public: /** - Constructor - */ + * Base constructor, should not be used unless explicitly intended. + */ OpBase() { SPDLOG_DEBUG("Compute OpBase base constructor"); } + /** + * Default constructor with parameters that provides the bare minimum + * requirements for the operations to be able to create and manage their + * sub-components. + */ OpBase(std::shared_ptr physicalDevice, std::shared_ptr device, - std::shared_ptr commandBuffer) + std::shared_ptr commandBuffer, + std::vector>& tensors, + bool freeTensors) { SPDLOG_DEBUG("Compute OpBase constructor with params"); this->mPhysicalDevice = physicalDevice; this->mDevice = device; this->mCommandBuffer = commandBuffer; + this->mTensors = tensors; } - ~OpBase() { - SPDLOG_DEBUG("Compute OpBase destructor started"); - } - - virtual void init(std::vector> tensors) + /** + * Default destructor for OpBase class. This OpBase destructor class should + * always be called to destroy and free owned resources unless it is + * intended to destroy the resources in the parent class. This can be done + * by passing the mFreeTensors=false. + */ + ~OpBase() { - SPDLOG_DEBUG("Kompute OpBase init called"); + SPDLOG_DEBUG("Kompute OpBase destructor started"); + + if (!this->mDevice) { + spdlog::warn("Kompute OpBase destructor called with empty device"); + return; + } + + if (this->mFreeTensors) { + SPDLOG_DEBUG("Kompute OpBase freeing tensors"); + for (std::shared_ptr tensor : this->mTensors) { + if (tensor && tensor->isInit()) { + tensor->freeMemoryDestroyGPUResources(); + } else { + spdlog::error("Kompute OpBase expected to free " + "tensor but has already been freed."); + } + } + } } - virtual void record() { SPDLOG_DEBUG("Kompute OpBase record called"); } + virtual void init() = 0; - virtual void postSubmit() { SPDLOG_DEBUG("Kompute OpBase init called"); } + virtual void record() = 0; + + virtual void postSubmit() = 0; protected: + // Sometimes owned resources + std::vector> mTensors; + bool mFreeTensors = + false; // TODO: Provide granularity to specify which to free + + // Always external resources std::shared_ptr mPhysicalDevice; std::shared_ptr mDevice; std::shared_ptr mCommandBuffer; diff --git a/src/include/kompute/OpCreateTensor.hpp b/src/include/kompute/OpCreateTensor.hpp index 978e8cc..81171ef 100644 --- a/src/include/kompute/OpCreateTensor.hpp +++ b/src/include/kompute/OpCreateTensor.hpp @@ -15,21 +15,22 @@ class OpCreateTensor : public OpBase OpCreateTensor(std::shared_ptr physicalDevice, std::shared_ptr device, - std::shared_ptr commandBuffer); + std::shared_ptr commandBuffer, + std::vector>& tensors, + bool freeTensors = true); ~OpCreateTensor(); - void init(std::vector> tensors) override; + void init() override; void record() override; void postSubmit() override; private: + // Never owned resources std::shared_ptr mPrimaryTensor; - bool mFreePrimaryTensorResources = false; std::shared_ptr mStagingTensor; - bool mFreeStagingTensorResources = false; }; } // End namespace kp diff --git a/src/include/kompute/OpMult.hpp b/src/include/kompute/OpMult.hpp index a4cc3be..2ceb303 100644 --- a/src/include/kompute/OpMult.hpp +++ b/src/include/kompute/OpMult.hpp @@ -27,11 +27,13 @@ class OpMult : public OpBase OpMult(std::shared_ptr physicalDevice, std::shared_ptr device, - std::shared_ptr commandBuffer); + std::shared_ptr commandBuffer, + std::vector>& tensors, + bool freeTensors = false); ~OpMult(); - void init(std::vector> tensors) override; + void init() override; void record() override; @@ -73,8 +75,10 @@ OpMult::OpMult() template OpMult::OpMult(std::shared_ptr physicalDevice, std::shared_ptr device, - std::shared_ptr commandBuffer) - : OpBase(physicalDevice, device, commandBuffer) + std::shared_ptr commandBuffer, + std::vector>& tensors, + bool freeTensors) + : OpBase(physicalDevice, device, commandBuffer, tensors, freeTensors) { SPDLOG_DEBUG("Kompute OpMult constructor with params"); @@ -89,20 +93,20 @@ OpMult::~OpMult() template void -OpMult::init(std::vector> tensors) +OpMult::init() { SPDLOG_DEBUG("Kompute OpMult init called"); - if (tensors.size() < 3) { + if (this->mTensors.size() < 3) { throw std::runtime_error( "Kompute OpMult called with less than 1 tensor"); - } else if (tensors.size() > 3) { - spdlog::warn("Kompute OpMult called with more than 3 tensors"); + } else if (this->mTensors.size() > 3) { + spdlog::warn("Kompute OpMult called with more than 3 this->mTensors"); } - this->mTensorLHS = tensors[0]; - this->mTensorRHS = tensors[1]; - this->mTensorOutput = tensors[2]; + this->mTensorLHS = this->mTensors[0]; + this->mTensorRHS = this->mTensors[1]; + this->mTensorOutput = this->mTensors[2]; // The dispatch size is set up based on either explicitly provided template // parameters or by default it would take the shape and size of the tensors @@ -176,7 +180,7 @@ OpMult::init(std::vector> tensors) SPDLOG_DEBUG("Kompute OpMult Initialising algorithm component"); - this->mAlgorithm->init(shaderFileData, tensors); + this->mAlgorithm->init(shaderFileData, this->mTensors); } template diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index 7890b7b..71fb404 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -22,6 +22,9 @@ class Sequence uint32_t queueIndex); ~Sequence(); + // Initialiser + void init(); + // Record command functions void begin(); void end(); @@ -37,15 +40,15 @@ class Sequence SPDLOG_DEBUG("Kompute Sequence record function started"); SPDLOG_DEBUG("Kompute Sequence creating OpBase derived class instance"); - T* op = - new T(this->mPhysicalDevice, this->mDevice, this->mCommandBuffer); + T* op = new T( + this->mPhysicalDevice, this->mDevice, this->mCommandBuffer, tensors); OpBase* baseOp = dynamic_cast(op); std::unique_ptr baseOpPtr{ baseOp }; SPDLOG_DEBUG( "Kompute Sequence running init on OpBase derived class instance"); - baseOpPtr->init(tensors); + baseOpPtr->init(); SPDLOG_DEBUG( "Kompute Sequence running record on OpBase derived class instance"); diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index 602eea5..d6a5659 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -57,7 +57,6 @@ class Tensor void mapDataFromHostMemory(); void mapDataIntoHostMemory(); - private: std::shared_ptr mPhysicalDevice; std::shared_ptr mDevice; diff --git a/test/TestManager.cpp b/test/TestManager.cpp index 8e0b74d..a2f8f49 100755 --- a/test/TestManager.cpp +++ b/test/TestManager.cpp @@ -74,8 +74,8 @@ TEST_CASE("End to end OpMult Flow should execute correctly from sequence") { kp::Manager mgr; - std::weak_ptr sq_ref = mgr.managedSequence(); - if (std::shared_ptr sq = sq_ref.lock()) { + std::weak_ptr sqWeakPtr = mgr.getOrCreateManagedSequence("newSequence"); + if (std::shared_ptr sq = sqWeakPtr.lock()) { sq->begin(); sq->record({ tensorLHS }); @@ -93,7 +93,7 @@ TEST_CASE("End to end OpMult Flow should execute correctly from sequence") { sq->end(); sq->eval(); } - sq_ref.reset(); + sqWeakPtr.reset(); spdlog::info("OpMult call success"); spdlog::info("Tensor output: {}", tensorOutput->data());