mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-01 20:21:23 +00:00
- Introduced new subdirectory for MFMA 16x16x16x2 implementation. - Added CMake configuration and source files for the new example. - Implemented block GEMM and pipeline strategies to optimize performance. - Included necessary policies and tensor distribution for efficient memory access. - Updated the main GEMM kernel to support the new configuration.
12 lines
338 B
CMake
12 lines
338 B
CMake
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
include_directories(AFTER
|
|
${CMAKE_CURRENT_LIST_DIR}
|
|
)
|
|
|
|
add_subdirectory(01_naive_gemm)
|
|
add_subdirectory(02_padding_k_first)
|
|
add_subdirectory(03_mfma_16x16x16)
|
|
add_subdirectory(04_mfma_16x16x16x2)
|
|
add_subdirectory(05_xor_bank_conflict_free) |