mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 18:17:44 +00:00
* Multi ABD - initial commit
* Clang-foramt fix
* block gemm, unify the name of CDataType
* Apply chnages to mem-pipeline
* Rollback prefix for DType and Layout
* Gemm Kernel Basic, rename
* WMMA config
* Grouped GEMM
* Clang-format
* Dropout, name
* Review v2
* Move element_wise fn to unnary, remov old ones fn
* clang-format
* Fix issue review
* WP operator adjust to universal gemm
* v2 prepare
* Remove unused comment
* Remove vectorsize
* Rollback
* Adjust pipeline for abd
* Shuffle argument
* CI-fail fix quant
* Fix ag_br pipeline
* Failing tests
* Typo
* Single argument support
[ROCm/composable_kernel commit: 30ab1d6a71]
39 lines
1.5 KiB
C++
39 lines
1.5 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#pragma once
|
|
|
|
template <typename Layout>
|
|
static constexpr inline auto is_row_major(Layout layout_)
|
|
{
|
|
return ck_tile::bool_constant<std::is_same_v<ck_tile::remove_cvref_t<decltype(layout_)>,
|
|
ck_tile::tensor_layout::gemm::RowMajor>>{};
|
|
}
|
|
|
|
auto calculate_rtol_atol(const ck_tile::index_t K,
|
|
const ck_tile::index_t kbatch,
|
|
const float max_accumulated_value)
|
|
{
|
|
using ComputeTypeAB =
|
|
std::conditional_t<sizeof(A0DataType) < sizeof(B0DataType), A0DataType, B0DataType>;
|
|
|
|
using ComputeType =
|
|
std::conditional_t<sizeof(ComputeTypeAB) < sizeof(D0DataType), ComputeTypeAB, D0DataType>;
|
|
// Calculate thresholds
|
|
const auto rtol = ck_tile::get_relative_threshold<ComputeType, EDataType, AccDataType>(
|
|
ck_tile::integer_divide_ceil(K, kbatch));
|
|
|
|
const auto atol = ck_tile::get_absolute_threshold<ComputeType, EDataType, AccDataType>(
|
|
max_accumulated_value / kbatch, ck_tile::integer_divide_ceil(K, kbatch));
|
|
|
|
// Calculate error due to split_k accumulation
|
|
const auto rtol_split_k =
|
|
ck_tile::get_relative_threshold<EDataType, EDataType, EDataType>(kbatch);
|
|
|
|
const auto atol_split_k = ck_tile::get_absolute_threshold<EDataType, EDataType, EDataType>(
|
|
max_accumulated_value, kbatch);
|
|
|
|
// Use higher threshold
|
|
return ck_tile::make_tuple(std::max(rtol, rtol_split_k), std::max(atol, atol_split_k));
|
|
}
|