From 9080b984cbb6b8dfd4f907cd6c3b19331ea78660 Mon Sep 17 00:00:00 2001 From: pmaybank <113125070+pmaybank@users.noreply.github.com> Date: Mon, 6 Mar 2023 17:39:16 +0000 Subject: [PATCH] Generate output using Doxygen / Breathe (#598) * Modify Doxygen config to pick up include directories recursively * Add DeviceMem struct to API Reference guide * Add classes that are used in Flash Attention kernel * Add a reference and config for generating bibliography Co-authored-by: Philip Maybank [ROCm/composable_kernel commit: e4bf6d422e3ffc52932f03658f5dbff8362c0946] --- docs/Doxyfile | 8 ++-- docs/source/API_Reference_Guide.rst | 41 ++++++++++++++++--- docs/source/conf.py | 5 ++- docs/source/refs.bib | 7 ++++ .../gpu/block/blockwise_gemm_xdlops.hpp | 15 ++++--- .../gpu/block/blockwise_softmax.hpp | 10 +++++ ...hread_group_tensor_slice_transfer_v4r1.hpp | 13 ++++-- ...ched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp | 4 ++ .../threadwise_tensor_slice_transfer.hpp | 7 +++- .../ck/library/utility/device_memory.hpp | 4 ++ 10 files changed, 94 insertions(+), 20 deletions(-) create mode 100644 docs/source/refs.bib diff --git a/docs/Doxyfile b/docs/Doxyfile index 958b3b6f44..ca354598b2 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -775,8 +775,10 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = ../library/include \ - ../library/include/internal +INPUT = ../include/ck/tensor_operation/gpu/grid \ + ../include/ck/tensor_operation/gpu/block \ + ../include/ck/tensor_operation/gpu/thread \ + ../library/include/ck/library/utility # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -845,7 +847,7 @@ FILE_PATTERNS = *.c \ # be searched for input files as well. # The default value is: NO. -RECURSIVE = NO +RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a diff --git a/docs/source/API_Reference_Guide.rst b/docs/source/API_Reference_Guide.rst index 1ad2ecd9a9..3665049dd6 100644 --- a/docs/source/API_Reference_Guide.rst +++ b/docs/source/API_Reference_Guide.rst @@ -1,11 +1,11 @@ -=================== +******************* API Reference Guide -=================== +******************* ------------- +================= Introduction ------------- +================= This document contains details of the APIs for the Composable Kernel (CK) library and introduces some of the key design principles that are used to write new classes that extend CK functionality. @@ -16,8 +16,37 @@ Using CK API This section describes how to use the CK library API. ------------------ +================= CK Datatypes +================= + +----------------- +DeviceMem ----------------- -[TODO] \ No newline at end of file +.. doxygenstruct:: DeviceMem + +--------------------------- +Kernels For Flashattention +--------------------------- + +The Flashattention algorithm is defined in :cite:t:`dao2022flashattention`. This sections lists the classes that are +used in the CK GPU implementation of Flashattention. + +**Gridwise classes** + +.. doxygenstruct:: ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle + +**Blockwise classes** + +.. doxygenstruct:: ck::ThreadGroupTensorSliceTransfer_v4r1 + +.. doxygenstruct:: ck::BlockwiseGemmXdlops_v2 + +.. doxygenstruct:: ck::BlockwiseSoftmax + +**Threadwise classes** + +.. doxygenstruct:: ck::ThreadwiseTensorSliceTransfer_StaticToStatic + +.. bibliography:: \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 8968e2fbe6..65ac187034 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -59,10 +59,13 @@ if read_the_docs_build: # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ['sphinx.ext.mathjax', 'breathe'] +extensions = ['sphinx.ext.mathjax', 'breathe', 'sphinxcontrib.bibtex'] + breathe_projects = { "CK": "../docBin/xml" } breathe_default_project = "CK" +bibtex_bibfiles = ['refs.bib'] + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/docs/source/refs.bib b/docs/source/refs.bib new file mode 100644 index 0000000000..3c6d775a7e --- /dev/null +++ b/docs/source/refs.bib @@ -0,0 +1,7 @@ + +@article{dao2022flashattention, + title={Flashattention: Fast and memory-efficient exact attention with io-awareness}, + author={Dao, Tri and Fu, Daniel Y and Ermon, Stefano and Rudra, Atri and R{\'e}, Christopher}, + journal={arXiv preprint arXiv:2205.14135}, + year={2022} +} diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp index aeef03d51f..5328dfde9b 100644 --- a/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp +++ b/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp @@ -622,11 +622,16 @@ constexpr auto BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_Selector() } }; -// Blockwise gemm supporting -// 1. regular XDL output M2_M3_M4_M2 and transposed XDL output M2_N2_N3_N4 -// 2. decoupled input tile descriptor and mma tile descriptor in order to support both vgpr and LDS -// source buffer -// 3. configurable k index starting position and step size after each FMA/XDL instruction +/** + * @brief Blockwise gemm + * + * Supports + * 1. regular XDL output M2_M3_M4_M2 and transposed XDL output M2_N2_N3_N4 + * 2. decoupled input tile descriptor and mma tile descriptor in order to support both vgpr and LDS + * source buffer + * 3. configurable k index starting position and step size after each FMA/XDL instruction + */ + template