mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-06 15:54:31 +00:00
* CK-Tile GEMM with memory bound pipeline. * Memory bound gemm pipeline. * Fix not closed namespace. * Block gemm mem pipeline draft. * Do not use ck_tile:: within ck_tile namespace. * Refactoring & Move Layout info to pipeline problem. * Get hot loop and TailNum information before lunching kernel. * Fixes in pipeline. * Add comment to load_tile_raw and change variable naming style. * Few small changes & formatting. * Do not use macro. * Add gtests. * Use AccDataType for Output of MFMA instruction. * Formatting. * Refactor gemm examples. * Switch over to current block gemm. * Use currently available pipeline policy. * Refactoring and review comment.s * Fixes after merge. * Add missing include. * Add load tile overload which accepts output tensor as parameter. * This give 8% perf boost at the cost of using more registers. * Rename example. * Small changes. * Fix compilation err and lower K. * Support different layouts for A/B * Fix vector size for different layouts. * Rename Alignment into VectorSize * Unblock tests.
68 lines
3.1 KiB
C++
68 lines
3.1 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#pragma once
|
|
|
|
#include "ck_tile/core/algorithm/cluster_descriptor.hpp"
|
|
#include "ck_tile/core/algorithm/coordinate_transform.hpp"
|
|
#include "ck_tile/core/algorithm/indexing_adaptor.hpp"
|
|
#include "ck_tile/core/algorithm/space_filling_curve.hpp"
|
|
#include "ck_tile/core/arch/amd_buffer_addressing.hpp"
|
|
#include "ck_tile/core/arch/arch.hpp"
|
|
#include "ck_tile/core/arch/generic_memory_space_atomic.hpp"
|
|
#include "ck_tile/core/arch/utility.hpp"
|
|
#include "ck_tile/core/config.hpp"
|
|
#include "ck_tile/core/container/array.hpp"
|
|
#include "ck_tile/core/container/container_helper.hpp"
|
|
#include "ck_tile/core/container/map.hpp"
|
|
#include "ck_tile/core/container/meta_data_buffer.hpp"
|
|
#include "ck_tile/core/container/multi_index.hpp"
|
|
#include "ck_tile/core/container/sequence.hpp"
|
|
#include "ck_tile/core/container/span.hpp"
|
|
#include "ck_tile/core/container/statically_indexed_array.hpp"
|
|
#include "ck_tile/core/container/thread_buffer.hpp"
|
|
#include "ck_tile/core/container/tuple.hpp"
|
|
#include "ck_tile/core/numeric/bfloat16.hpp"
|
|
#include "ck_tile/core/numeric/float8.hpp"
|
|
#include "ck_tile/core/numeric/half.hpp"
|
|
#include "ck_tile/core/numeric/integer.hpp"
|
|
#include "ck_tile/core/numeric/integral_constant.hpp"
|
|
#include "ck_tile/core/numeric/math.hpp"
|
|
#include "ck_tile/core/numeric/null_type.hpp"
|
|
#include "ck_tile/core/numeric/numeric.hpp"
|
|
#include "ck_tile/core/numeric/type_convert.hpp"
|
|
#include "ck_tile/core/numeric/vector_type.hpp"
|
|
#include "ck_tile/core/tensor/buffer_view.hpp"
|
|
#include "ck_tile/core/tensor/load_tile.hpp"
|
|
#include "ck_tile/core/tensor/null_tensor.hpp"
|
|
#include "ck_tile/core/tensor/null_tile_window.hpp"
|
|
#include "ck_tile/core/tensor/shuffle_tile.hpp"
|
|
#include "ck_tile/core/tensor/slice_tile.hpp"
|
|
#include "ck_tile/core/tensor/static_distributed_tensor.hpp"
|
|
#include "ck_tile/core/tensor/store_tile.hpp"
|
|
#include "ck_tile/core/tensor/sweep_tile.hpp"
|
|
#include "ck_tile/core/tensor/tensor_adaptor.hpp"
|
|
#include "ck_tile/core/tensor/tensor_adaptor_coordinate.hpp"
|
|
#include "ck_tile/core/tensor/tensor_coordinate.hpp"
|
|
#include "ck_tile/core/tensor/tensor_descriptor.hpp"
|
|
#include "ck_tile/core/tensor/tensor_view.hpp"
|
|
#include "ck_tile/core/tensor/tile_distribution.hpp"
|
|
#include "ck_tile/core/tensor/tile_distribution_encoding.hpp"
|
|
#include "ck_tile/core/tensor/tile_elementwise.hpp"
|
|
#include "ck_tile/core/tensor/tile_window.hpp"
|
|
#include "ck_tile/core/tensor/tile_window_linear.hpp"
|
|
#include "ck_tile/core/tensor/update_tile.hpp"
|
|
#include "ck_tile/core/utility/bit_cast.hpp"
|
|
#include "ck_tile/core/utility/functional.hpp"
|
|
#include "ck_tile/core/utility/functional_with_tuple.hpp"
|
|
#include "ck_tile/core/utility/ignore.hpp"
|
|
#include "ck_tile/core/utility/literals.hpp"
|
|
#include "ck_tile/core/utility/magic_div.hpp"
|
|
#include "ck_tile/core/utility/philox_rand.hpp"
|
|
#include "ck_tile/core/utility/random.hpp"
|
|
#include "ck_tile/core/utility/reduce_operator.hpp"
|
|
#include "ck_tile/core/utility/to_sequence.hpp"
|
|
#include "ck_tile/core/utility/transpose_vectors.hpp"
|
|
#include "ck_tile/core/utility/type_traits.hpp"
|
|
#include "ck_tile/core/utility/unary_element_function.hpp"
|