feat: add new optimized tutorial kernels

- Add 01_naive_gemm baseline implementation
- Add 02_padding_k_first with PADDING_K_FIRST + MFMA_32x32x16
- Add 03_mfma_16x16x16 with PADDING_K_FIRST + MFMA_16x16x16
- Share common reference_gemm.hpp in parent gemm/ directory
This commit is contained in:
AviralGoelAMD
2026-01-29 12:45:18 +00:00
parent 9b168082b7
commit adb8f67b4f
38 changed files with 2604 additions and 477 deletions

View File

@@ -0,0 +1,10 @@
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
include_directories(AFTER
${CMAKE_CURRENT_LIST_DIR}
)
add_subdirectory(01_naive_gemm)
add_subdirectory(02_padding_k_first)
add_subdirectory(03_mfma_16x16x16)