From d59dc41ffcbac3bd2ba00eebebe1db13ffb15685 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Fri, 21 Aug 2020 19:15:07 +0100
Subject: [PATCH] Added initial implementation for algorithm and opMult

---
 src/Algorithm.cpp      | 178 +++++++++++++++++++++++++++++++++++++++++
 src/Algorithm.hpp      |  32 +++++++-
 src/OpBase.hpp         |   6 ++
 src/OpCreateTensor.cpp |   6 ++
 src/OpCreateTensor.hpp |   2 +
 src/OpMult.cpp         |  29 ++++++-
 src/OpMult.hpp         |  10 ++-
 src/Parameter.hpp      |  52 +++++++++++-
 src/Tensor.cpp         |  62 +++++++++++---
 src/Tensor.hpp         |  10 +++
 src/main.cpp           |  26 ++++--
 11 files changed, 385 insertions(+), 28 deletions(-)
 create mode 100644 src/Algorithm.cpp
diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp
new file mode 100644
index 0000000..ced642d
--- /dev/null
+++ b/src/Algorithm.cpp
@@ -0,0 +1,178 @@
+#include <fstream>
+
+#include "Algorithm.hpp"
+
+namespace kp {
+
+Algorithm::Algorithm()
+{
+    SPDLOG_DEBUG("Kompute Algorithm base constructor");
+}
+
+Algorithm::Algorithm(std::shared_ptr<vk::Device> device, std::shared_ptr<vk::CommandBuffer> commandBuffer)
+{
+    SPDLOG_DEBUG("Kompute Algorithm Constructor with device");
+
+    this->mDevice = device;
+    this->mCommandBuffer = commandBuffer;
+}
+
+Algorithm::~Algorithm()
+{
+    SPDLOG_DEBUG("Kompute Algorithm Destructor started");
+
+    if (!this->mDevice) {
+        spdlog::error(
+          "Kompute Algorithm destructor reached with null Device pointer");
+        return;
+    }
+}
+
+void Algorithm::init(std::string shaderFilePath,
+                   std::vector<std::shared_ptr<Tensor>> tensorParams) {
+    SPDLOG_DEBUG("Kompute Algorithm init started");
+
+    spdlog::info("Loading shader with file path {}", shaderFilePath);
+
+    // TODO: Move to util function
+    this->createParameters(tensorParams);
+    this->createShaderModule(shaderFilePath);
+    this->createPipeline();
+}
+
+void Algorithm::createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams) {
+    std::vector<vk::DescriptorPoolSize> descriptorPoolSizes;
+
+    for (std::shared_ptr<Tensor> tensorParam : tensorParams) {
+        descriptorPoolSizes.push_back(
+          vk::DescriptorPoolSize(
+            vk::DescriptorType::eStorageBuffer, 
+            1 // Descriptor count
+          )
+        );
+    }
+
+    // TODO: Explore design for having more than 1 set configurable
+    vk::DescriptorPoolCreateInfo descriptorPoolInfo(
+        vk::DescriptorPoolCreateFlags(), 
+        1, // Max sets
+        descriptorPoolSizes.size(),
+        descriptorPoolSizes.data());
+
+    this->mDescriptorPool = std::make_shared<vk::DescriptorPool>();
+    this->mDevice->createDescriptorPool(&descriptorPoolInfo, nullptr, this->mDescriptorPool.get());
+
+    // TODO: Explore allowing descriptor set bind index
+    std::vector<vk::DescriptorSetLayoutBinding> descriptorSetBindings;
+    for (size_t i = 0; i < tensorParams.size(); i++) {
+        descriptorSetBindings.push_back(
+            vk::DescriptorSetLayoutBinding(
+                i, // Binding index
+                vk::DescriptorType::eStorageBuffer,
+                1, // Descriptor count
+                vk::ShaderStageFlagBits::eCompute)
+        );
+    }
+
+    // This is the component that is fed into the pipeline
+    vk::DescriptorSetLayoutCreateInfo descriptorSetLayoutInfo(
+        vk::DescriptorSetLayoutCreateFlags(),
+        descriptorSetBindings.size(),
+        descriptorSetBindings.data()
+    );
+
+    // TODO: We createa  signle descriptor set layout which would have to be extended if multiple set layouts to be supported
+    this->mDescriptorSetLayout = std::make_shared<vk::DescriptorSetLayout>();
+    this->mDevice->createDescriptorSetLayout(&descriptorSetLayoutInfo, nullptr, this->mDescriptorSetLayout.get());
+
+    vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo(
+        *this->mDescriptorPool, 
+        1, // Descriptor set layout count
+        this->mDescriptorSetLayout.get());
+
+    std::vector<vk::DescriptorSet> descriptorSets =
+        this->mDevice->allocateDescriptorSets(descriptorSetAllocateInfo);
+
+    if (descriptorSets.size() != tensorParams.size()) {
+        throw std::runtime_error("Number of descriptor sets does not match number of paramters");
+    }
+
+    std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
+    for (size_t i = 0; i < descriptorSets.size(); i++) {
+
+        std::shared_ptr<Tensor> currTensor = tensorParams[i];
+        vk::DescriptorSet& currDescriptorSet = descriptorSets[i];
+        this->mDescriptorSets.push_back(std::make_shared<vk::DescriptorSet>(currDescriptorSet));
+
+        vk::DescriptorBufferInfo descriptorBufferInfo = currTensor->constructDescriptorBufferInfo();
+
+        computeWriteDescriptorSets.push_back(
+            vk::WriteDescriptorSet());
+    }
+
+    this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr);
+}
+
+void Algorithm::createShaderModule(std::string shaderFilePath) {
+    std::ifstream fileStream(
+      shaderFilePath, std::ios::binary | std::ios::in | std::ios::ate);
+
+    size_t shaderFileSize = fileStream.tellg();
+    fileStream.seekg(0, std::ios::beg);
+    char* shaderFileData = new char[shaderFileSize];
+    fileStream.read(shaderFileData, shaderFileSize);
+    fileStream.close();
+
+    vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(), shaderFileSize, (uint32_t*)shaderFileData);
+
+    this->mFreeShaderModule = true;
+    this->mShaderModule = std::shared_ptr<vk::ShaderModule>();
+    this->mDevice->createShaderModule(&shaderModuleInfo, nullptr, this->mShaderModule.get());
+}
+
+void Algorithm::createPipeline() {
+    SPDLOG_DEBUG("Kompute Algorithm calling create Pipeline");
+
+    vk::PipelineLayoutCreateInfo pipelineLayoutInfo(
+        vk::PipelineLayoutCreateFlags(),
+        1, // Set layout count
+        this->mDescriptorSetLayout.get());
+
+    this->mPipelineLayout = std::make_shared<vk::PipelineLayout>();
+    this->mDevice->createPipelineLayout(&pipelineLayoutInfo, nullptr, this->mPipelineLayout.get());
+
+    vk::PipelineShaderStageCreateInfo shaderStage(vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eCompute, *this->mShaderModule, "main", nullptr);
+
+    vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(), shaderStage, *this->mPipelineLayout, vk::Pipeline(), 0);
+
+    // TODO: Confirm what the best structure is with pipeline cache
+    this->mFreePipelineCache = true;
+    this->mPipelineCache = std::make_shared<vk::PipelineCache>(vk::PipelineCacheCreateInfo());
+
+    vk::ResultValue<vk::Pipeline> pipelineResult = this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo);
+
+    if (pipelineResult.result != vk::Result::eSuccess) {
+        throw std::runtime_error("Failed to create pipeline result: " + vk::to_string(pipelineResult.result));
+    }
+
+    this->mFreePipeline = true;
+    this->mPipeline = std::make_shared<vk::Pipeline>(pipelineResult.value);
+}
+
+void Algorithm::recordDispatch(uint32_t x, uint32_t y, uint32_t z) {
+    SPDLOG_DEBUG("Kompute Algorithm calling record dispatch");
+
+    this->mCommandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute, *this->mPipeline);
+
+    // TODO: Simplify interaction given we store array of pointers
+    std::vector<vk::DescriptorSet&> descriptorSetRefs(this->mDescriptorSets.size());
+    for (size_t i = 0; i < this->mDescriptorSets.size(); i++) {
+        descriptorSetRefs[i] = this->mDescriptorSets[i];
+    }
+
+    this->mCommandBuffer->bindDescriptorSets(vk::PipelineBindPoint::eCompute, *this->mPipelineLayout, 0, descriptorSetRefs, nullptr);
+
+    this->mCommandBuffer->dispatch(x, y, z);
+}
+
+}
diff --git a/src/Algorithm.hpp b/src/Algorithm.hpp
index 878d4df..7620ece 100644
--- a/src/Algorithm.hpp
+++ b/src/Algorithm.hpp
@@ -19,13 +19,43 @@ class Algorithm
   public:
     Algorithm();
 
-    Algorithm(std::shared_ptr<vk::Device> device);
+    Algorithm(std::shared_ptr<vk::Device> device, std::shared_ptr<vk::CommandBuffer> commandBuffer);
 
     // TODO: Add specialisation data
+    // TODO: Explore other ways of passing shader (ie raw bytes)
     void init(std::string shaderFilePath,
               std::vector<std::shared_ptr<Tensor>> tensorParams);
 
     ~Algorithm();
+
+    // Record commands
+    void recordDispatch(uint32_t x, uint32_t y, uint32_t z);
+
+private:
+    // Shared resources
+    std::shared_ptr<vk::Device> mDevice;
+    std::shared_ptr<vk::CommandBuffer> mCommandBuffer;
+
+    // Resources owned by default
+    std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
+    bool mFreeDescriptorSetLayout = false;
+    std::shared_ptr<vk::DescriptorPool> mDescriptorPool;
+    bool mFreeDescriptorPool = false;
+    std::vector<std::shared_ptr<vk::DescriptorSet>> mDescriptorSets;
+    bool mFreeDescriptorSet = false;
+    std::shared_ptr<vk::ShaderModule> mShaderModule;
+    bool mFreeShaderModule = false;
+    std::shared_ptr<vk::PipelineLayout> mPipelineLayout;
+    bool mFreePipelineLayout = false;
+    std::shared_ptr<vk::PipelineCache> mPipelineCache;
+    bool mFreePipelineCache = false;
+    std::shared_ptr<vk::Pipeline> mPipeline;
+    bool mFreePipeline = false;
+
+    // Create util functions
+    void createParameters(std::vector<std::shared_ptr<Tensor>>& tensorParams);
+    void createShaderModule(std::string shaderFilePath);
+    void createPipeline();
 };
 
 } // End namespace kp
diff --git a/src/OpBase.hpp b/src/OpBase.hpp
index c2f6493..a4ce407 100644
--- a/src/OpBase.hpp
+++ b/src/OpBase.hpp
@@ -42,6 +42,12 @@ class OpBase
 
     virtual void record() { SPDLOG_DEBUG("Kompute OpBase record called"); }
 
+    virtual void postSubmit()
+    {
+        SPDLOG_DEBUG("Kompute OpBase init called");
+    }
+
+
   protected:
     std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
     std::shared_ptr<vk::Device> mDevice;
diff --git a/src/OpCreateTensor.cpp b/src/OpCreateTensor.cpp
index 8284b3b..cc82b9d 100644
--- a/src/OpCreateTensor.cpp
+++ b/src/OpCreateTensor.cpp
@@ -65,4 +65,10 @@ OpCreateTensor::record()
     }
 }
 
+void OpCreateTensor::postSubmit()
+{
+    SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called");
+
+}
+
 }
diff --git a/src/OpCreateTensor.hpp b/src/OpCreateTensor.hpp
index b5fb16d..3ca15e6 100644
--- a/src/OpCreateTensor.hpp
+++ b/src/OpCreateTensor.hpp
@@ -31,6 +31,8 @@ class OpCreateTensor : public OpBase
 
     void record() override;
 
+    void postSubmit() override;
+
   private:
     std::shared_ptr<Tensor> mPrimaryTensor;
     std::shared_ptr<Tensor> mStagingTensor;
diff --git a/src/OpMult.cpp b/src/OpMult.cpp
index 6d4c49a..89217e8 100644
--- a/src/OpMult.cpp
+++ b/src/OpMult.cpp
@@ -11,12 +11,15 @@ OpMult::OpMult()
     SPDLOG_DEBUG("Kompute OpMult constructor base");
 }
 
+// TODO: Remove physicalDevice from main initialiser
 OpMult::OpMult(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                std::shared_ptr<vk::Device> device,
                std::shared_ptr<vk::CommandBuffer> commandBuffer)
   : OpBase(physicalDevice, device, commandBuffer)
 {
     SPDLOG_DEBUG("Kompute OpMult constructor with params");
+
+    this->mAlgorithm = Algorithm(device, commandBuffer);
 }
 
 OpMult::~OpMult()
@@ -29,18 +32,40 @@ OpMult::init(std::vector<std::shared_ptr<Tensor>> tensors)
 {
     SPDLOG_DEBUG("Kompute OpMult init called");
 
-    if (tensors.size() < 2) {
+    if (tensors.size() < 3) {
         throw std::runtime_error(
           "Kompute OpMult called with less than 1 tensor");
-    } else if (tensors.size() > 2) {
+    } else if (tensors.size() > 3) {
         spdlog::warn("Kompute OpMult called with more than 2 tensor");
     }
+
+    this->mTensorLHS = tensors[0];
+    this->mTensorRHS = tensors[1];
+    this->mTensorOutput = tensors[2];
+
+    this->mTensorOutputStaging= std::make_shared<Tensor>(
+      this->mTensorOutput->data(), Tensor::TensorTypes::eStaging);
+
+    this->mAlgorithm.init(
+        "shaders/glsl/computeheadless.comp.spv", tensors);
 }
 
 void
 OpMult::record()
 {
     SPDLOG_DEBUG("Kompute OpMult record called");
+
+    this->mAlgorithm.recordDispatch(1, 1, 1);
+
+    this->mTensorOutputStaging->recordCopyFrom(this->mTensorOutput);
+}
+
+void OpMult::postSubmit()
+{
+    SPDLOG_DEBUG("Kompute OpCreateTensor postSubmit called");
+
+    this->mTensorOutputStaging->copyDataFromHostBuffer();
+    this->mTensorOutput->setData(this->mTensorOutputStaging->data());
 }
 
 }
diff --git a/src/OpMult.hpp b/src/OpMult.hpp
index 16a9bae..364029f 100644
--- a/src/OpMult.hpp
+++ b/src/OpMult.hpp
@@ -11,6 +11,7 @@
 #include <spdlog/spdlog.h>
 
 #include "Tensor.hpp"
+#include "Algorithm.hpp"
 
 #include "OpBase.hpp"
 
@@ -31,9 +32,14 @@ class OpMult : public OpBase
 
     void record() override;
 
+    void postSubmit() override;
+
   private:
-    std::shared_ptr<Tensor> mPrimaryTensor;
-    std::shared_ptr<Tensor> mStagingTensor;
+    Algorithm mAlgorithm;
+    std::shared_ptr<Tensor> mTensorLHS;
+    std::shared_ptr<Tensor> mTensorRHS;
+    std::shared_ptr<Tensor> mTensorOutput;
+    std::shared_ptr<Tensor> mTensorOutputStaging;
 };
 
 } // End namespace kp
diff --git a/src/Parameter.hpp b/src/Parameter.hpp
index a01a44a..739c638 100644
--- a/src/Parameter.hpp
+++ b/src/Parameter.hpp
@@ -1,13 +1,57 @@
 #pragma once
 
+#include <vulkan/vulkan.h>
+#include <vulkan/vulkan.hpp>
+
+// SPDLOG_ACTIVE_LEVEL must be defined before spdlog.h import
+#if DEBUG
+#define SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_DEBUG
+#endif
+
+#include <spdlog/spdlog.h>
+
+#include "Tensor.hpp"
+
 namespace kp {
 
-class Parameter
+class Algorithm
 {
-  private:
   public:
-    Parameter();
-    virtual ~Parameter();
+    Algorithm();
+
+    Algorithm(std::shared_ptr<vk::Device> device);
+
+    // TODO: Add specialisation data
+    // TODO: Explore other ways of passing shader (ie raw bytes)
+    void init(std::string shaderFilePath,
+              std::vector<std::shared_ptr<Tensor>> tensorParams);
+
+    ~Algorithm();
+
+private:
+    // Shared resources
+    std::shared_ptr<vk::Device> mDevice;
+
+    // Resources owned by default
+    std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
+    bool mFreeDescriptorSetLayout = false;
+    std::shared_ptr<vk::DescriptorPool> mDescriptorPool;
+    bool mFreeDescriptorPool = false;
+    std::shared_ptr<vk::DescriptorSet> mDescriptorSet;
+    bool mFreeDescriptorSet = false;
+    std::shared_ptr<vk::ShaderModule> mShaderModule;
+    bool mFreeShaderModule = false;
+    std::shared_ptr<vk::PipelineLayout> mPipelineLayout;
+    bool mFreePipelineLayout = false;
+    std::shared_ptr<vk::PipelineCache> mPipelineCache;
+    bool mFreePipelineCache = false;
+    std::shared_ptr<vk::Pipeline> mPipeline;
+    bool mFreePipeline = false;
+
+    // Create util functions
+    void createParameters();
+    void createShaderModule(std::string shaderFilePath);
+    void createPipeline();
 };
 
 } // End namespace kp
diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index 7efcdd2..d98105d 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -105,12 +105,16 @@ Tensor::isInit()
     return this->mIsInit;
 }
 
+void Tensor::setData(const std::vector<uint32_t>& data) {
+    this->mData = data;
+}
+
 void
 Tensor::recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor)
 {
     SPDLOG_DEBUG("Kompute Tensor recordCopyFrom called");
 
-    if (!this->mIsInit) {
+    if (!this->mIsInit || !copyFromTensor->mIsInit) {
         throw std::runtime_error(
           "Kompute Tensor attempted to run createBuffer without init");
     }
@@ -126,9 +130,53 @@ Tensor::recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor)
     this->mCommandBuffer->copyBuffer(
       *copyFromTensor->mBuffer, *this->mBuffer, copyRegion);
 
+    // TODO: Ensure copied data is consistent with device
     this->mData = copyFromTensor->mData;
 }
 
+// TODO: Explore if this function should be here or expose buffer
+vk::DescriptorBufferInfo Tensor::constructDescriptorBufferInfo() {
+    return vk::DescriptorBufferInfo(
+        *this->mBuffer,
+        0, // offset
+        this->memorySize()
+    );
+}
+
+void Tensor::copyDataFromHostBuffer() {
+    SPDLOG_DEBUG("Kompute Tensor copying data from host buffer");
+
+    if (this->mTensorType != TensorTypes::eStaging) {
+        spdlog::warn("Copying tensor data manually to DEVICE buffer instead of using record GPU command");
+    }
+
+    vk::DeviceSize bufferSize = this->memorySize();
+    void* mapped = this->mDevice->mapMemory(*this->mMemory, 0, bufferSize, vk::MemoryMapFlags());
+    vk::MappedMemoryRange mappedMemoryRange(*this->mMemory, 0, bufferSize);
+    this->mDevice->invalidateMappedMemoryRanges(mappedMemoryRange);
+    memcpy(this->mData.data(), mapped, bufferSize);
+    this->mDevice->unmapMemory(*this->mMemory);
+}
+
+void Tensor::copyDataToHostBuffer() {
+
+    SPDLOG_DEBUG("Kompute Tensor copying data to buffer");
+
+    if (this->mTensorType != TensorTypes::eStaging) {
+        spdlog::warn("Copying tensor data manually to DEVICE buffer instead of using record GPU command");
+    }
+
+    vk::DeviceSize bufferSize = this->memorySize();
+
+    // TODO: Verify if flushed memory ranges should happend in sequence
+    void* mapped = this->mDevice->mapMemory(
+      *this->mMemory, 0, bufferSize, vk::MemoryMapFlags());
+    memcpy(mapped, this->mData.data(), bufferSize);
+    vk::MappedMemoryRange mappedRange(*this->mMemory, 0, bufferSize);
+    this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
+    this->mDevice->unmapMemory(*this->mMemory);
+}
+
 vk::BufferUsageFlags
 Tensor::getBufferUsageFlags()
 {
@@ -249,17 +297,7 @@ Tensor::createBuffer(void* data)
     SPDLOG_DEBUG("Kompute Tensor buffer & memory creation successful");
 
     if (data != nullptr) {
-        SPDLOG_DEBUG("Kompute Tensor mapping data to buffer");
-
-        // TODO: Verify if flushed memory ranges should happend in sequence
-        void* mapped = this->mDevice->mapMemory(
-          *this->mMemory, 0, bufferSize, vk::MemoryMapFlags());
-        memcpy(mapped, data, bufferSize);
-        vk::MappedMemoryRange mappedRange(*this->mMemory, 0, bufferSize);
-        this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
-        this->mDevice->unmapMemory(*this->mMemory);
-
-        SPDLOG_DEBUG("Kompute Tensor successful copy data to tensor");
+        this->copyDataToHostBuffer();
     }
 }
 
diff --git a/src/Tensor.hpp b/src/Tensor.hpp
index 611b3c4..9880a80 100644
--- a/src/Tensor.hpp
+++ b/src/Tensor.hpp
@@ -46,8 +46,18 @@ class Tensor
     TensorTypes tensorType();
     bool isInit();
 
+    // Setters
+    void setData(const std::vector<uint32_t>& data);
+
     // Record functions
     void recordCopyFrom(std::shared_ptr<Tensor> copyFromTensor);
+    // TODO: Add memory buffer barrier capabilities
+    //void recordBufferMemoryBarrier();
+
+    // Util functions
+    vk::DescriptorBufferInfo constructDescriptorBufferInfo();
+    void copyDataFromHostBuffer();
+    void copyDataToHostBuffer();
 
   private:
     std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
diff --git a/src/main.cpp b/src/main.cpp
index 5cfbf11..89616f8 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -22,6 +22,7 @@
 
 #include "Manager.hpp"
 #include "OpCreateTensor.hpp"
+#include "OpMult.hpp"
 #include "Tensor.hpp"
 
 #define BUFFER_ELEMENTS 32
@@ -493,6 +494,7 @@ class VulkanCompute
                   nullptr,
                   bufferMemoryBarrier,
                   nullptr);
+
                 this->mCommandBuffer.bindPipeline(
                   vk::PipelineBindPoint::eCompute, this->mPipeline);
                 this->mCommandBuffer.bindDescriptorSets(
@@ -623,18 +625,28 @@ main()
         kp::Manager mgr;
 
         spdlog::info("Creating first tensor");
-        std::shared_ptr<kp::Tensor> tensorOne{ new kp::Tensor(
+        std::shared_ptr<kp::Tensor> tensorLHS{ new kp::Tensor(
           { 0.0, 1.0, 2.0 }) };
-        mgr.evalOp<kp::OpCreateTensor>({ tensorOne });
+        mgr.evalOp<kp::OpCreateTensor>({ tensorLHS });
 
         spdlog::info("Creating second tensor");
-        std::shared_ptr<kp::Tensor> tensorTwo{ new kp::Tensor(
-          { 0.0, 1.0, 2.0 }) };
-        mgr.evalOp<kp::OpCreateTensor>({ tensorTwo });
+        std::shared_ptr<kp::Tensor> tensorRHS{ new kp::Tensor(
+          { 2.0, 4.0, 6.0 }) };
+        mgr.evalOp<kp::OpCreateTensor>({ tensorRHS });
+
+        // TODO: Add capabilities for just output tensor types
+        spdlog::info("Creating output tensor");
+        std::shared_ptr<kp::Tensor> tensorOutput{ new kp::Tensor(
+            { 0.0, 0.0, 0.0 }) };
+        mgr.evalOp<kp::OpCreateTensor>({ tensorOutput });
 
         spdlog::info("Called manager eval success");
-        spdlog::info("Tensor one: {}", tensorOne->data());
-        spdlog::info("Tensor two: {}", tensorTwo->data());
+        spdlog::info("Tensor one: {}", tensorLHS->data());
+        spdlog::info("Tensor two: {}", tensorRHS->data());
+        spdlog::info("Tensor two: {}", tensorOutput->data());
+
+        spdlog::info("Calling op mult");
+        mgr.evalOp<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
 
         return 0;
     } catch (const std::exception& exc) {