Add Doxygen Documentation for HostTesnor, HostTensorDescriptor, DeviceMem, FillUniformDistribution (#2160)

* added documentation for HostTensorDescriptor * added documentation for DeviceMem and FillUniformDistribution * fixed merging error * fixed host_tensor_descriptor error * clang format
2026-05-02 20:51:23 +00:00 · 2025-05-21 12:34:30 -05:00
parent 990d645578
commit fa39c4e798
4 changed files with 139 additions and 7 deletions
--- a/include/ck_tile/host/device_memory.hpp
+++ b/include/ck_tile/host/device_memory.hpp
@@ -20,10 +20,35 @@ __global__ void set_buffer_value(T* p, T x, uint64_t buffer_element_size)
 }

 /**
- * @brief Container for storing data in GPU device memory
+ * @brief Manages device memory allocation and host-device data transfers
 *
+ * DeviceMem encapsulates GPU memory management operations using HIP runtime API.
+ * It provides functionality for allocating device memory, transferring data between
+ * host and device, and performing basic memory operations.
+ *
+ * Key features:
+ * - Automatic memory allocation and deallocation
+ * - Host-to-device and device-to-host data transfers
+ * - Memory initialization operations
+ * - Integration with HostTensor for simplified data handling
+ *
+ * Usage example:
+ * ```
+ * // Allocate device memory
+ * BHostTensor<float> AHostData({256});
+ * DeviceMem d_mem(BHostData.get_element_space_size_in_bytes());
+ *
+ * // Transfer data to device
+ * HostTensor<float> AHostTensor({256});
+ * d_mem.ToDevice(AHostData.data());
+ *
+ * // Retrieve data from device
+ * HostTensor<float> ResultHostTensor({256});
+ * d_mem.FromDevice(ResultHostTensor.data());
+ * ```
 */
 struct DeviceMem
+
 {
    DeviceMem() : mpDeviceBuf(nullptr), mMemSize(0) {}
    DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
@@ -163,8 +188,8 @@ struct DeviceMem
        }
    }

-    void* mpDeviceBuf;
-    std::size_t mMemSize;
+    void* mpDeviceBuf;    ///< pointer to device buffer
+    std::size_t mMemSize; ///< size of device buffer in bytes
 };

 } // namespace ck_tile
--- a/include/ck_tile/host/fill.hpp
+++ b/include/ck_tile/host/fill.hpp
@@ -17,13 +17,31 @@

 namespace ck_tile {

+/**
+ * @brief Functor for filling a range with randomly generated values from a uniform distribution.
+ *
+ * This struct provides functionality to fill iterators or ranges with random values
+ * generated from a uniform distribution. It supports both single-threaded and
+ * multi-threaded operation.
+ *
+ * @tparam T The target type for the generated values.
+ *
+ * @note The multi-threaded implementation is not guaranteed to provide perfectly
+ * distributed values across threads.
+ *
+ * @example
+ *
+ *     // Direct usage without creating a separate variable:
+ *     ck_tile::FillUniformDistribution<ADataType>{-1.f, 1.f}(a_host_tensor);
+ */
 template <typename T>
 struct FillUniformDistribution
 {
    float a_{-5.f};
    float b_{5.f};
    std::optional<uint32_t> seed_{11939};
-    // ATTENTION: threaded does not guarantee the distribution between thread
+    // ATTENTION: Whether to use multi-threading (note: not guaranteed to be perfectly distributed
+    // across threads).
    bool threaded = false;

    template <typename ForwardIter>
--- a/include/ck_tile/host/host_tensor.hpp
+++ b/include/ck_tile/host/host_tensor.hpp
@@ -85,6 +85,19 @@ CK_TILE_HOST auto construct_f_unpack_args(F, T args)
    return construct_f_unpack_args_impl<F>(args, std::make_index_sequence<N>{});
 }

+/**
+ * @brief Descriptor for tensors in host memory.
+ *
+ * HostTensorDescriptor manages the shape (dimensions) and memory layout (strides)
+ * of a tensor in host memory. It provides functionality to:
+ * - Store tensor dimensions and strides
+ * - Calculate default strides for contiguous memory layout
+ * - Convert multi-dimensional indices to linear memory offsets
+ * - Query tensor metadata (dimensions, element counts, etc.)
+ *
+ * The class supports both automatic stride calculation for contiguous memory layout
+ * and custom strides for more complex memory patterns.
+ */
 struct HostTensorDescriptor
 {
    HostTensorDescriptor() = default;
@@ -138,12 +151,35 @@ struct HostTensorDescriptor
    }

    std::size_t get_num_of_dimension() const { return mLens.size(); }
+    /**
+     * @brief Calculates the total number of elements in the tensor.
+     *
+     * Computes the product of all dimension lengths to determine the
+     * total element count in the tensor.
+     *
+     * @pre The lengths array (mLens) and strides array (mStrides) must have
+     *      the same size.
+     *
+     * @return The total number of elements in the tensor.
+     */
    std::size_t get_element_size() const
    {
        assert(mLens.size() == mStrides.size());
        return std::accumulate(
            mLens.begin(), mLens.end(), std::size_t{1}, std::multiplies<std::size_t>());
    }
+    /**
+     * @brief Calculates the total element space required for the tensor in memory.
+     *
+     * This method computes the minimum size of contiguous memory needed to store
+     * all elements of the tensor, taking into account the tensor's dimensions and
+     * strides. The calculation is based on the formula: 1 + max((length_i - 1) * stride_i)
+     * across all dimensions.
+     *
+     * Dimensions with length 0 are skipped in this calculation.
+     *
+     * @return The size of the tensor's element space (number of elements).
+     */
    std::size_t get_element_space_size() const
    {
        std::size_t space = 1;
@@ -165,6 +201,18 @@ struct HostTensorDescriptor

    const std::vector<std::size_t>& get_strides() const { return mStrides; }

+    /**
+     * @brief Calculates the linear offset from multi-dimensional indices.
+     *
+     * Converts a set of N-dimensional indices into a single linear offset by computing
+     * the inner product of the indices with the tensor's strides.
+     *
+     * @tparam Is Parameter pack of index types (should be convertible to std::size_t)
+     * @param is Variable number of indices, one for each dimension of the tensor
+     * @return std::size_t Linear offset corresponding to the given multi-dimensional indices
+     *
+     * @pre The number of indices must match the number of dimensions in the tensor
+     */
    template <typename... Is>
    std::size_t GetOffsetFromMultiIndex(Is... is) const
    {
@@ -173,6 +221,15 @@ struct HostTensorDescriptor
        return std::inner_product(iss.begin(), iss.end(), mStrides.begin(), std::size_t{0});
    }

+    /**
+     * @brief Calculates the linear memory offset from a multi-dimensional index
+     *
+     * Computes the linear offset by performing an inner product between the provided
+     * multi-dimensional indices and the tensor's strides.
+     *
+     * @param iss Vector containing the multi-dimensional indices
+     * @return The calculated linear offset as a size_t
+     */
    std::size_t GetOffsetFromMultiIndex(std::vector<std::size_t> iss) const
    {
        return std::inner_product(iss.begin(), iss.end(), mStrides.begin(), std::size_t{0});
@@ -194,8 +251,8 @@ struct HostTensorDescriptor
    }

    private:
-    std::vector<std::size_t> mLens;
-    std::vector<std::size_t> mStrides;
+    std::vector<std::size_t> mLens;    ///< Lengths of each dimension
+    std::vector<std::size_t> mStrides; ///< Strides for each dimension
 };

 template <typename New2Old>
@@ -681,6 +738,24 @@ struct HostTensor
    Data mData;
 };

+/**
+ * @brief Creates a host tensor descriptor with specified dimensions and layout
+ *
+ * Constructs a HostTensorDescriptor with appropriate strides based on whether the tensor
+ * layout is row-major or column-major. This is determined via the compile-time template
+ * parameter `is_row_major`.
+ *
+ * @tparam is_row_major Compile-time flag indicating if the layout is row-major (true) or
+ * column-major (false)
+ *
+ * @param row Number of rows in the tensor
+ * @param col Number of columns in the tensor
+ * @param stride Stride between adjacent rows (for row-major) or columns (for column-major)
+ *
+ * @return HostTensorDescriptor with shape {row, col} and strides:
+ *         - For row-major: {stride, 1}
+ *         - For column-major: {1, stride}
+ */
 template <bool is_row_major>
 auto host_tensor_descriptor(std::size_t row,
                            std::size_t col,
@@ -698,6 +773,7 @@ auto host_tensor_descriptor(std::size_t row,
        return HostTensorDescriptor({row, col}, {1_uz, stride});
    }
 }
+
 template <bool is_row_major>
 auto get_default_stride(std::size_t row,
                        std::size_t col,
@@ -718,5 +794,4 @@ auto get_default_stride(std::size_t row,
    else
        return stride;
 }
-
 } // namespace ck_tile