mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-03-29 19:47:39 +00:00
We only want to import enums and types into the builder reflection code. But, some of the enums are included in much larger files or even big trees of include files. This leads to unintended mixing of code and very confusing interactions and symbol conflicts. We organize the includes and extract two new enum-only headers to help with decoupling in CK. This refactoring is critical if we want to include reflection in a device-operator "describe" method. * Remove a few unnecessary includes from headers in builder/reflect/. * Extract enums scheduler and pipeline to their own headers so they can be used without importing other code. * Order includes alphabetically for better organization. The immediate goal is to unblock reflection integration, and this type of cleanup helps the flexibility and robustness of the CK header library.
84 lines
2.5 KiB
C++
84 lines
2.5 KiB
C++
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
#pragma once
|
|
|
|
#if !defined(__HIPCC_RTC__) || !defined(CK_CODE_GEN_RTC)
|
|
#include <ostream>
|
|
#endif
|
|
|
|
namespace ck {
|
|
|
|
/// @brief Block GEMM pipeline version enumeration
|
|
/// @details Defines different block GEMM pipeline strategies.
|
|
/// This is a lightweight header containing only enum definitions,
|
|
/// extracted from blkgemmpipe_scheduler.hpp to minimize dependencies.
|
|
enum struct BlockGemmPipelineVersion
|
|
{
|
|
// For GEMM
|
|
v1, ///< Naive pipeline
|
|
v2, ///< Memory-optimized pipeline
|
|
v3, ///< Compute-optimized pipeline
|
|
v4, ///< Compute-optimized with double LDS buffer
|
|
v5, ///< Compute-optimized with double global prefetch register buffer
|
|
|
|
// For GEMM with preshuffled weight
|
|
// v1, single lds buffer
|
|
// v2, double lds buffer
|
|
};
|
|
|
|
/// @brief Block GEMM pipeline scheduler enumeration
|
|
/// @details Defines scheduling strategies for block GEMM pipelines.
|
|
enum struct BlockGemmPipelineScheduler
|
|
{
|
|
Intrawave, ///< Schedule within a single wavefront
|
|
Interwave, ///< Schedule across multiple wavefronts
|
|
};
|
|
|
|
/// @brief Loop scheduler enumeration
|
|
/// @details Defines scheduling strategies for computational loops.
|
|
enum struct LoopScheduler
|
|
{
|
|
Default, ///< Default scheduling strategy
|
|
Interwave, ///< Cross-wavefront scheduling
|
|
};
|
|
|
|
/// @brief Tail number enumeration for pipeline buffering
|
|
/// @details Defines the number of tail iterations in pipelined loops.
|
|
enum struct TailNumber
|
|
{
|
|
// Single / Double buffer pipeline
|
|
Odd, ///< Odd number of iterations
|
|
Even, ///< Even number of iterations
|
|
|
|
// Long prefetch pipeline, up to 8
|
|
One, ///< One tail iteration
|
|
Two, ///< Two tail iterations
|
|
Three, ///< Three tail iterations
|
|
Four, ///< Four tail iterations
|
|
Five, ///< Five tail iterations
|
|
Six, ///< Six tail iterations
|
|
Seven, ///< Seven tail iterations
|
|
|
|
// Unroll stages > Prefetch stages, number of loop is multiple of unroll stages
|
|
Empty, ///< No tail iterations
|
|
// Unroll stages <= Prefetch stages, number of loop is multiple of unroll stages add
|
|
// prefetchstages
|
|
Full, ///< Full tail iterations
|
|
};
|
|
|
|
} // namespace ck
|
|
|
|
#if !defined(__HIPCC_RTC__) || !defined(CK_CODE_GEN_RTC)
|
|
inline std::ostream& operator<<(std::ostream& os, const ck::LoopScheduler& s)
|
|
{
|
|
switch(s)
|
|
{
|
|
case ck::LoopScheduler::Default: os << "Default"; break;
|
|
case ck::LoopScheduler::Interwave: os << "Interwave"; break;
|
|
default: os << "";
|
|
}
|
|
return os;
|
|
}
|
|
#endif
|