diff --git a/docs/reference/pipelines/index.rst b/docs/reference/pipelines/index.rst index 956b509397..5e0d500a13 100644 --- a/docs/reference/pipelines/index.rst +++ b/docs/reference/pipelines/index.rst @@ -17,9 +17,20 @@ GEMM Pipelines defined in ``include/ck/tensor_operation/gpu/block`` directory. gemm(i) lds_write(i+1) +:ref:`BlockwiseGemmXdlops_pipeline_v2 ` + +.. code-block:: + + buffer_load(0) + lds_write(0) + buffer_load(1:prefetch) + lds_read(i) + gemm(i) + lds_write(i+1) + buffer_load(i+prefetch) + **TODO** -- BlockwiseGemmXdlops_pipeline_v2 - BlockwiseGemmXdlops_pipeline_v3 CK_TILE GEMM pipelines @@ -41,4 +52,23 @@ Pipelines defined in ``include/ck_tile/ops/gemm/pipeline`` directory. **TODO** - GemmPipelineAgBgCrMem -- GemmPipelineAgBgCrCompV3 \ No newline at end of file +- GemmPipelineAgBgCrCompV3 + +Documentation - Xdl Pipelines +------------------------------- + +.. toctree:: + :maxdepth: 2 + :caption: Xdl Pipelines + + xdl-v1 + xdl-v2 + +Documentation - CK_TILE Pipelines +------------------------------- + +.. toctree:: + :maxdepth: 2 + :caption: CK Tile Pipelines + + ck_tile-v1 \ No newline at end of file diff --git a/docs/reference/pipelines/xdl-v1.rst b/docs/reference/pipelines/xdl-v1.rst index b3bdfd96d2..0164b40c50 100644 --- a/docs/reference/pipelines/xdl-v1.rst +++ b/docs/reference/pipelines/xdl-v1.rst @@ -7,29 +7,58 @@ Xdl - v1 - Intrawave .. code-block:: - static_for<0, KRepeat, 1>{}([&](auto k) { - static_for<0, MRepeat, 1>{}([&](auto m0) { - a_thread_copy_.Run(a_block_desc_m0_m1_m2_k, - make_tuple(m0, I0, I0, Number{}), - a_block_buf, - a_thread_desc_, - make_tuple(m0, I0, k, I0), - a_thread_buf); - static_for<0, NRepeat, 1>{}([&](auto n0) { - b_thread_copy_.Run(b_block_desc_n0_n1_n2_k, - make_tuple(n0, I0, I0, Number{}), - b_block_buf, - b_thread_desc_, - make_tuple(n0, I0, k, I0), - b_thread_buf); + // Global prefetch 1 + a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf); + b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf); + + a_blockwise_copy.MoveSrcSliceWindow(a_grid_desc, a_block_copy_step); + b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step); + + // Local prefill 1 + a_blockwise_copy.RunWrite(a_block_desc, a_block_buf); + b_blockwise_copy.RunWrite(b_block_desc, b_block_buf); + + // Initialize C + c_thread_buf.Clear(); + + // main body + if constexpr(HasMainLoop) + { + index_t i = 0; + do + { + // ------------------------------------------------------------------------------------------- + a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf); + b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf); + + a_blockwise_copy.MoveSrcSliceWindow(a_grid_desc, a_block_copy_step); + b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step); + + block_sync_lds(); + static_for<0, KRepeat, 1>{}([&](auto k) { + static_for<0, MRepeat, 1>{}([&](auto m0) { + a_thread_copy_.Run(a_block_desc_m0_m1_m2_k, + make_tuple(m0, I0, I0, Number{}), + a_block_buf, + a_thread_desc_, + make_tuple(m0, I0, k, I0), + a_thread_buf); + static_for<0, NRepeat, 1>{}([&](auto n0) { + b_thread_copy_.Run(b_block_desc_n0_n1_n2_k, + make_tuple(n0, I0, I0, Number{}), + b_block_buf, + b_thread_desc_, + make_tuple(n0, I0, k, I0), + b_thread_buf); + }); + }); }); - }); - }); -The ``a_thread_copy_.Run`` function is a member of the ``ThreadwiseTensorSliceTransfer_v4`` class. The ``ThreadwiseTensorSliceTransfer_v4`` class uses, +.. + The ``a_thread_copy_.Run`` function is a member of the ``ThreadwiseTensorSliceTransfer_v4`` class. The ``ThreadwiseTensorSliceTransfer_v4`` class uses, -- ``make_tensor_coordinate`` from ``tensor_descriptor.hpp`` -- ``make_naive_tensor_descriptor_packed`` from ``tensor_descriptor_helper.hpp`` + - ``make_tensor_coordinate`` from ``tensor_descriptor.hpp`` + - ``make_naive_tensor_descriptor_packed`` from ``tensor_descriptor_helper.hpp`` **full class definition** diff --git a/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in index 33ad8d91f8..d390c9d99d 100644 --- a/docs/sphinx/_toc.yml.in +++ b/docs/sphinx/_toc.yml.in @@ -34,6 +34,8 @@ subtrees: title: Composable Kernel vector utilities - file: reference/Composable-Kernel-wrapper.rst title: Composable Kernel wrapper + - file: reference/pipelines/index.rst + title: Pipeline Policies - file: doxygen/html/namespace_c_k.rst title: CK API reference - file: doxygen/html/namespaceck__tile.rst