From 07bfbe3504d19103e0f418f9b8ea5a9c12c4eae5 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Wed, 2 Sep 2020 21:25:30 +0100
Subject: [PATCH] Updated to remove all todos

---
 src/Algorithm.cpp                             | 15 +--------------
 src/Manager.cpp                               |  2 +-
 src/Sequence.cpp                              |  2 --
 src/Tensor.cpp                                | 19 +++++++------------
 src/include/kompute/Algorithm.hpp             |  1 -
 src/include/kompute/Parameter.hpp             |  2 --
 src/include/kompute/Tensor.hpp                | 13 +++++++++++--
 src/include/kompute/operations/OpAlgoBase.hpp |  2 --
 .../kompute/operations/OpAlgoLhsRhsOut.hpp    |  3 ---
 9 files changed, 20 insertions(+), 39 deletions(-)
diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp
index c4bf8a9..238e790 100644
--- a/src/Algorithm.cpp
+++ b/src/Algorithm.cpp
@@ -91,7 +91,6 @@ Algorithm::init(const std::vector<char>& shaderFileData,
 {
     SPDLOG_DEBUG("Kompute Algorithm init started");
 
-    // TODO: Move to util function
     this->createParameters(tensorParams);
     this->createShaderModule(shaderFileData);
 
@@ -111,7 +110,6 @@ Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
 {
     SPDLOG_DEBUG("Kompute Algorithm createParameters started");
 
-    // TODO: Explore design for having multiple descriptor pool sizes
     std::vector<vk::DescriptorPoolSize> descriptorPoolSizes = {
         vk::DescriptorPoolSize(
           vk::DescriptorType::eStorageBuffer,
@@ -119,7 +117,6 @@ Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
           )
     };
 
-    // TODO: Explore design for having more than 1 set configurable
     vk::DescriptorPoolCreateInfo descriptorPoolInfo(
       vk::DescriptorPoolCreateFlags(),
       1, // Max sets
@@ -132,8 +129,6 @@ Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
       &descriptorPoolInfo, nullptr, this->mDescriptorPool.get());
 
     std::vector<vk::DescriptorSetLayoutBinding> descriptorSetBindings;
-    // TODO: Explore allowing descriptor set bind index to be configurable by
-    // user to specify which tensors woudl go on each binding
     for (size_t i = 0; i < tensorParams.size(); i++) {
         descriptorSetBindings.push_back(
           vk::DescriptorSetLayoutBinding(i, // Binding index
@@ -149,8 +144,6 @@ Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
       descriptorSetBindings.data());
 
     SPDLOG_DEBUG("Kompute Algorithm creating descriptor set layout");
-    // TODO: We createa  signle descriptor set layout which would have to be
-    // extended if multiple set layouts to be supported
     this->mDescriptorSetLayout = std::make_shared<vk::DescriptorSetLayout>();
     this->mDevice->createDescriptorSetLayout(
       &descriptorSetLayoutInfo, nullptr, this->mDescriptorSetLayout.get());
@@ -167,14 +160,13 @@ Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
                                           this->mDescriptorSet.get());
     this->mFreeDescriptorSet = true;
 
-    // TODO: Explore design exposing the destination array element
+    SPDLOG_DEBUG("Kompute Algorithm updating descriptor sets");
     for (size_t i = 0; i < tensorParams.size(); i++) {
         std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
 
         vk::DescriptorBufferInfo descriptorBufferInfo =
           tensorParams[i]->constructDescriptorBufferInfo();
 
-        // TODO: Explore design exposing the destination array element
         computeWriteDescriptorSets.push_back(
           vk::WriteDescriptorSet(*this->mDescriptorSet,
                                  i, // Destination binding
@@ -188,9 +180,6 @@ Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams)
                                             nullptr);
     }
 
-    SPDLOG_DEBUG("Kompute Algorithm updating descriptor sets");
-    // this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr);
-
     SPDLOG_DEBUG("Kompue Algorithm successfully run init");
 }
 
@@ -220,7 +209,6 @@ Algorithm::createPipeline(std::vector<uint32_t> specializationData)
 {
     SPDLOG_DEBUG("Kompute Algorithm calling create Pipeline");
 
-    // TODO: Explore design for supporting multiple sets
     vk::PipelineLayoutCreateInfo pipelineLayoutInfo(
       vk::PipelineLayoutCreateFlags(),
       1, // Set layout count
@@ -261,7 +249,6 @@ Algorithm::createPipeline(std::vector<uint32_t> specializationData)
                                                vk::Pipeline(),
                                                0);
 
-    // TODO: Confirm what the best structure is with pipeline cache
     vk::PipelineCacheCreateInfo pipelineCacheInfo =
       vk::PipelineCacheCreateInfo();
     this->mPipelineCache = std::make_shared<vk::PipelineCache>();
diff --git a/src/Manager.cpp b/src/Manager.cpp
index 07a8cc1..0f4b2a8 100644
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@@ -29,7 +29,7 @@ Manager::Manager()
 Manager::Manager(uint32_t physicalDeviceIndex)
 {
     this->mPhysicalDeviceIndex = physicalDeviceIndex;
-    // TODO: Moving this into a separate init
+
     this->createInstance();
     this->createDevice();
 }
diff --git a/src/Sequence.cpp b/src/Sequence.cpp
index 29fc192..a803b5a 100644
--- a/src/Sequence.cpp
+++ b/src/Sequence.cpp
@@ -139,8 +139,6 @@ Sequence::eval()
     this->mDevice->waitForFences(1, &fence, VK_TRUE, UINT64_MAX);
     this->mDevice->destroy(fence);
 
-    // TODO: Explore whether moving postSubmit calls to a separate sequence
-    // function that is explicitly called by the manager
     for (size_t i = 0; i < this->mOperations.size(); i++) {
         this->mOperations[i]->postSubmit();
     }
diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index 2aa50ec..dee9495 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -52,12 +52,18 @@ Tensor::init(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
     this->createBuffer();
 }
 
-std::vector<float>
+std::vector<float>&
 Tensor::data()
 {
     return this->mData;
 }
 
+float&
+Tensor::operator[] (int index)
+{
+    return this->mData[index];
+}
+
 uint64_t
 Tensor::memorySize()
 {
@@ -105,14 +111,11 @@ Tensor::recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor,
           "Kompute Tensor attempted to run createBuffer without init");
     }
 
-    // TODO: Allow for dst and src offsets to be configured
-    // TODO: Test and ensure sizes for tensors are compatible
     vk::DeviceSize bufferSize(this->memorySize());
     vk::BufferCopy copyRegion(0, 0, bufferSize);
 
     SPDLOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
 
-    // TODO: Ensure command buffer is in same device from buffer
     this->mCommandBuffer->copyBuffer(
       *copyFromTensor->mBuffer, *this->mBuffer, copyRegion);
 
@@ -151,7 +154,6 @@ Tensor::recordBufferMemoryBarrier(vk::AccessFlagBits srcAccessMask,
                                           nullptr);
 }
 
-// TODO: Explore if this function should be here or expose buffer
 vk::DescriptorBufferInfo
 Tensor::constructDescriptorBufferInfo()
 {
@@ -188,8 +190,6 @@ Tensor::mapDataIntoHostMemory()
 
     SPDLOG_DEBUG("Kompute Tensor local mapping tensor data to host buffer");
 
-    // TODO: Verify if there are situations where we want to copy to device
-    // memory
     if (this->mTensorType != TensorTypes::eStaging) {
         spdlog::error(
           "Mapping tensor data manually to DEVICE memory instead of "
@@ -199,14 +199,9 @@ Tensor::mapDataIntoHostMemory()
 
     vk::DeviceSize bufferSize = this->memorySize();
 
-    // TODO: Verify if flushed memory ranges should happend in sequence
     void* mapped = this->mDevice->mapMemory(
       *this->mMemory, 0, bufferSize, vk::MemoryMapFlags());
     memcpy(mapped, this->mData.data(), bufferSize);
-    this->mDevice->unmapMemory(*this->mMemory);
-
-    mapped = this->mDevice->mapMemory(
-      *this->mMemory, 0, bufferSize, vk::MemoryMapFlags());
     vk::MappedMemoryRange mappedRange(*this->mMemory, 0, bufferSize);
     this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
     this->mDevice->unmapMemory(*this->mMemory);
diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp
index c136f10..62a25a8 100644
--- a/src/include/kompute/Algorithm.hpp
+++ b/src/include/kompute/Algorithm.hpp
@@ -66,7 +66,6 @@ class Algorithm
     bool mFreeDescriptorSetLayout = false;
     std::shared_ptr<vk::DescriptorPool> mDescriptorPool;
     bool mFreeDescriptorPool = false;
-    // TODO: Explore design for multiple descriptor sets
     std::shared_ptr<vk::DescriptorSet> mDescriptorSet;
     bool mFreeDescriptorSet = false;
     std::shared_ptr<vk::ShaderModule> mShaderModule;
diff --git a/src/include/kompute/Parameter.hpp b/src/include/kompute/Parameter.hpp
index fc1e970..a37eb31 100644
--- a/src/include/kompute/Parameter.hpp
+++ b/src/include/kompute/Parameter.hpp
@@ -13,8 +13,6 @@ class Algorithm
 
     Algorithm(std::shared_ptr<vk::Device> device);
 
-    // TODO: Add specialisation data
-    // TODO: Explore other ways of passing shader (ie raw bytes)
     void init(std::string shaderFilePath,
               std::vector<std::shared_ptr<Tensor>> tensorParams);
 
diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp
index 629c64a..eea3d6c 100644
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@@ -68,9 +68,18 @@ class Tensor
      * important to ensure that there is no out-of-sync data with the GPU
      * memory.
      *
-     * @return Vector of elements representing the data in the tensor.
+     * @return Reference to vector of elements representing the data in the tensor.
      */
-    std::vector<float> data();
+    std::vector<float>& data();
+    /**
+     * Overrides the subscript operator to expose the underlying data's
+     * subscript operator which in this case would be its underlying
+     * vector's.
+     *
+     * @param i The index where the element will be returned from.
+     * @return Returns the element in the position requested.
+     */
+    float& operator[] (int index);
     /**
      * Returns the size/magnitude of the Tensor, which will be the total number
      * of elements across all dimensions
diff --git a/src/include/kompute/operations/OpAlgoBase.hpp b/src/include/kompute/operations/OpAlgoBase.hpp
index da84539..6c7ab10 100644
--- a/src/include/kompute/operations/OpAlgoBase.hpp
+++ b/src/include/kompute/operations/OpAlgoBase.hpp
@@ -181,8 +181,6 @@ OpAlgoBase<tX, tY, tZ>::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalD
         this->mY = tY > 0 ? tY : 1;
         this->mZ = tZ > 0 ? tZ : 1;
     } else {
-        // TODO: If tensor empty vector exception would be thrown
-        // TODO: Fully support the full size dispatch using size for the shape
         this->mX = tensors[0]->size();
         this->mY = 1;
         this->mZ = 1;
diff --git a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp
index ad89687..ecb7e33 100644
--- a/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp
+++ b/src/include/kompute/operations/OpAlgoLhsRhsOut.hpp
@@ -136,7 +136,6 @@ OpAlgoLhsRhsOut<tX, tY, tZ>::init()
     this->mTensorOutput = this->mTensors[2];
 
 
-    // TODO: Explore adding a validate function
     if (!(this->mTensorLHS->isInit() && this->mTensorRHS->isInit() &&
           this->mTensorOutput->isInit())) {
         throw std::runtime_error(
@@ -146,8 +145,6 @@ OpAlgoLhsRhsOut<tX, tY, tZ>::init()
           " Output: " + std::to_string(this->mTensorOutput->isInit()));
     }
 
-    // TODO: Explore use-cases where tensors shouldn't be the same size, and how
-    // to deal with those situations
     if (!(this->mTensorLHS->size() == this->mTensorRHS->size() &&
           this->mTensorRHS->size() == this->mTensorOutput->size())) {
         throw std::runtime_error(