Files
composable_kernel/include/rapidjson/internal/clzll.h
rahjain-amd 4d041837ad Add json dump support to output details from CK/CKTile Examples. (#2551)
* Adding RapidJson Library

* Adding Json Dumps in all CK_Tile Examples

Not verified yet

* Adding json to cktile Batched Transpose

* adding json dumps to layernorm2d_fwd

* Adding  json dump to flatmm_basic

* Adding RapidJson Library

* Adding Json Dumps in all CK_Tile Examples

Not verified yet

* Adding json to cktile Batched Transpose

* adding json dumps to layernorm2d_fwd

* Adding  json dump to flatmm_basic

* Adding json in 03_gemm

* Add json dump to 16_batched_gemm

* Add json dump to gemm_multi_d_fp16

* Add json dump to grouped_gemm

* fix fmha_bwd/fwd

* Fix clang-format errors

exclude include/rapidjson in jenkins as its a third-party library

* Saparating function and defination.

* Update Documentation of 03_gemm

* Refactoring as per code review

* Disable fp8 instances on unsupported targets (#2592)

* Restrict building of gemm_universal_preshuffle_f8 instances to specific targets in CMakeLists.txt

* Add condition to skip gemm_xdl_universal_preshuffle_f8 instances for unsupported targets in CMakeLists.txt

* Add conditions to skip unsupported targets for gemm_universal_preshuffle_f8 and gemm_xdl_universal_preshuffle_f8 instances in CMakeLists.txt

* Refine conditions to exclude gemm_universal_preshuffle_f8 instances for unsupported targets in CMakeLists.txt

---------

Co-authored-by: AviralGoelAMD <aviralgoel@amd.com>

* fix clang format

* remove duplicate lines of code from library/src/tensor_operation_instance/gpu/CMakeLists.txt

* Fixing Readme and unifying jsondumps

* adding moe_smoothquant

* adding fused_moe

* Fixing Readme for batched_gemm

* Fixing Readme for grouped_gemm

* adding flatmm

* adding gemm_multi_d_fp16

* adding elementwise

* adding File name when json is dumped

* Fixing Reduce after merge

* adding batched_transpose

* Adding Warptile in Gemm

* Fixing Clang Format

---------

Co-authored-by: Aviral Goel <aviral.goel@amd.com>
Co-authored-by: AviralGoelAMD <aviralgoel@amd.com>
Co-authored-by: illsilin_amdeng <Illia.Silin@amd.com>
2025-09-02 23:31:29 -07:00

72 lines
2.0 KiB
C++

// Tencent is pleased to support the open source community by making RapidJSON available.
//
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
//
// Licensed under the MIT License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// http://opensource.org/licenses/MIT
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef RAPIDJSON_CLZLL_H_
#define RAPIDJSON_CLZLL_H_
#include "../rapidjson.h"
#if defined(_MSC_VER) && !defined(UNDER_CE)
#include <intrin.h>
#if defined(_WIN64)
#pragma intrinsic(_BitScanReverse64)
#else
#pragma intrinsic(_BitScanReverse)
#endif
#endif
RAPIDJSON_NAMESPACE_BEGIN
namespace internal {
inline uint32_t clzll(uint64_t x) {
// Passing 0 to __builtin_clzll is UB in GCC and results in an
// infinite loop in the software implementation.
RAPIDJSON_ASSERT(x != 0);
#if defined(_MSC_VER) && !defined(UNDER_CE)
unsigned long r = 0;
#if defined(_WIN64)
_BitScanReverse64(&r, x);
#else
// Scan the high 32 bits.
if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32)))
return 63 - (r + 32);
// Scan the low 32 bits.
_BitScanReverse(&r, static_cast<uint32_t>(x & 0xFFFFFFFF));
#endif // _WIN64
return 63 - r;
#elif (defined(__GNUC__) && __GNUC__ >= 4) || RAPIDJSON_HAS_BUILTIN(__builtin_clzll)
// __builtin_clzll wrapper
return static_cast<uint32_t>(__builtin_clzll(x));
#else
// naive version
uint32_t r = 0;
while (!(x & (static_cast<uint64_t>(1) << 63))) {
x <<= 1;
++r;
}
return r;
#endif // _MSC_VER
}
#define RAPIDJSON_CLZLL RAPIDJSON_NAMESPACE::internal::clzll
} // namespace internal
RAPIDJSON_NAMESPACE_END
#endif // RAPIDJSON_CLZLL_H_