diff --git a/docs/reference/index.rst b/docs/reference/index.rst deleted file mode 100644 index 0eb47c51d6..0000000000 --- a/docs/reference/index.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. _ck_and_ck_tile_reference: - -CK / CK_TILE Reference -================================ - -Documentation - Pipelines ---------------------------- - -.. toctree:: - :maxdepth: 2 - :caption: CK / CK_TILE - - pipelines/index \ No newline at end of file diff --git a/docs/reference/pipelines/ck_tile-comp_v3.rst b/docs/reference/pipelines/ck_tile-comp_v3.rst new file mode 100644 index 0000000000..5198e89be2 --- /dev/null +++ b/docs/reference/pipelines/ck_tile-comp_v3.rst @@ -0,0 +1,10 @@ +.. _ck_tile-comp_v3: + +ck_tile - AgBgCrCompV3 - [SCHEDULER] +-------------------------------------------- + +**loop: lds_write(i+1)** + +**full class definition** + +.. literalinclude:: ../../../include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_comp_v3.hpp \ No newline at end of file diff --git a/docs/reference/pipelines/ck_tile-mem.rst b/docs/reference/pipelines/ck_tile-mem.rst new file mode 100644 index 0000000000..388097fa24 --- /dev/null +++ b/docs/reference/pipelines/ck_tile-mem.rst @@ -0,0 +1,24 @@ +.. _ck_tile-mem: + +ck_tile - AgBgCrMem - [SCHEDULER] +-------------------------------------------- + +**loop: buffer_load(i+prefetch)** + +.. code-block:: + + static_for<1, PrefetchStages, 1>{}([&](auto prefetch_idx) { + a_block_tiles.at(number{}) = + load_tile_with_elementwise(a_copy_dram_window, a_element_func); + + move_tile_window(a_copy_dram_window, a_dram_tile_window_step); + + b_block_tiles.at(number{}) = + load_tile_with_elementwise(b_copy_dram_window, b_element_func); + + move_tile_window(b_copy_dram_window, b_dram_tile_window_step); + }); + +**full class definition** + +.. literalinclude:: ../../../include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_mem.hpp \ No newline at end of file diff --git a/docs/reference/pipelines/index.rst b/docs/reference/pipelines/index.rst index 5e0d500a13..1ea55b2e59 100644 --- a/docs/reference/pipelines/index.rst +++ b/docs/reference/pipelines/index.rst @@ -29,9 +29,19 @@ GEMM Pipelines defined in ``include/ck/tensor_operation/gpu/block`` directory. lds_write(i+1) buffer_load(i+prefetch) -**TODO** +:ref:`BlockwiseGemmXdlops_pipeline_v3 ` -- BlockwiseGemmXdlops_pipeline_v3 +.. code-block:: + + buffer_load(0) + lds_write(0) + buffer_load(1) + lds_read(0) + loop: + lds_write(i+1) + buffer_load(i+2) + gemm(i) + lds_read(i+1) CK_TILE GEMM pipelines ------------------------- @@ -49,26 +59,52 @@ Pipelines defined in ``include/ck_tile/ops/gemm/pipeline`` directory. gemm(i) lds_write(i+1) -**TODO** +:ref:`GemmPipelineAgBgCrMem ` + +.. code-block:: + + buffer_load(0) + lds_write(0) + buffer_load(1:prefetch) + loop: + lds_read(i) + gemm + lds_write(i+1) + buffer_load(i+prefetch) + +:ref:`GemmPipelineAgBgCrCompV3 ` + +.. code-block:: + + buffer_load(0) + lds_write(0) + buffer_load(1) + lds_read(0) + loop: + lds_write(i+1) + buffer_load(i+2) + gemm(i) + lds_read(i+1) -- GemmPipelineAgBgCrMem -- GemmPipelineAgBgCrCompV3 Documentation - Xdl Pipelines ------------------------------- .. toctree:: - :maxdepth: 2 - :caption: Xdl Pipelines - - xdl-v1 - xdl-v2 + :maxdepth: 2 + :caption: Xdl Pipelines + + xdl-v1 + xdl-v2 + xdl-v3 Documentation - CK_TILE Pipelines -------------------------------- +----------------------------------- .. toctree:: - :maxdepth: 2 - :caption: CK Tile Pipelines - - ck_tile-v1 \ No newline at end of file + :maxdepth: 2 + :caption: CK Tile Pipelines + + ck_tile-v1 + ck_tile-mem + ck_tile-comp_v3 \ No newline at end of file diff --git a/docs/reference/pipelines/xdl-v3.rst b/docs/reference/pipelines/xdl-v3.rst new file mode 100644 index 0000000000..420411d1e3 --- /dev/null +++ b/docs/reference/pipelines/xdl-v3.rst @@ -0,0 +1,42 @@ +.. _xdl-v3: + +Xdl - v3 - Intrawave +---------------------- + +**loop: lds_write(i+1)** + +**buffer_load(i+2)** + +.. code-block:: + + a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf); + b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf); + + a_blockwise_copy.MoveSrcSliceWindow(a_grid_desc, a_block_copy_step); + b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step); + + static_for<0, KRepeat, 1>{}([&](auto k0) { + static_for<0, MRepeat, 1>{}([&](auto m0) { + static_for<0, NRepeat, 1>{}([&](auto n0) { + vector_type a_thread_vec; + vector_type b_thread_vec; + + static_for<0, KPack, 1>{}([&](auto ik) { + a_thread_vec.template AsType()(ik) = + a_thread_buf[Number{}]; + b_thread_vec.template AsType()(ik) = + b_thread_buf[Number{}]; + }); + + using mfma_input_type = + typename vector_type::type; + + constexpr index_t c_offset = + c_thread_desc_.CalculateOffset(make_tuple(m0, n0, 0)); + +**full class definition** + +.. literalinclude:: ../../../include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v3.hpp \ No newline at end of file diff --git a/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in index d390c9d99d..25b700de14 100644 --- a/docs/sphinx/_toc.yml.in +++ b/docs/sphinx/_toc.yml.in @@ -18,6 +18,10 @@ subtrees: title: Composable Kernel structure - file: conceptual/Composable-Kernel-math.rst title: Composable Kernel mathematical basis + - file: conceptual/ck_tile/index.rst + title: CK Tile + - file: conceptual/ck_tile_radeon/index.rst + title: CK Tile - Radeon / Navi - caption: Tutorial entries: @@ -36,12 +40,6 @@ subtrees: title: Composable Kernel wrapper - file: reference/pipelines/index.rst title: Pipeline Policies - - file: doxygen/html/namespace_c_k.rst - title: CK API reference - - file: doxygen/html/namespaceck__tile.rst - title: CK Tile API reference - - file: doxygen/html/annotated.rst - title: Full API class list - file: reference/Composable-Kernel-Glossary.rst title: Glossary