From 580a54b400e63293d83235f25ffa040255ae6bd9 Mon Sep 17 00:00:00 2001
From: Johannes Graner <johannes.graner@amd.com>
Date: Fri, 17 Oct 2025 00:29:17 +0200
Subject: [PATCH] Update pre-commit to fixed versions, run remod for ck_tile
 (#2895)

* Fix ruff linter errors

* Fix remod dos2unix command

* Clang format

* Ignore utility in remod

* Run remod

* Specify clang-format version in pre-commit

* Specify ruff version

* Include PoolKernelArgs in reference_pool

* Add calculate_total_elements to reference batched contraction

* Fix calculate_total_elements declaration

* Refactor remod pre-commit hook

* Fix Aquant tests

---------

Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>

[ROCm/composable_kernel commit: d40b50b9d5b5b60c56b5e6b3837882442c882074]
---
 .github/scripts/therock_configure_ci.py       |   27 +-
 .pre-commit-config.yaml                       |   36 +-
 .../ck_tile/01_fmha/codegen/cmake_config.py   |    2 +-
 .../ck_tile/01_fmha/codegen/cpp_symbol_map.py |  130 +-
 .../01_fmha/codegen/ops/fmha_batch_prefill.py |  765 ++--
 .../ck_tile/01_fmha/codegen/ops/fmha_bwd.py   | 1117 ++++--
 .../ck_tile/01_fmha/codegen/ops/fmha_fwd.py   | 1735 ++++++--
 .../01_fmha/codegen/ops/fmha_fwd_appendkv.py  |  400 +-
 .../01_fmha/codegen/ops/fmha_fwd_splitkv.py   |  969 +++--
 .../codegen/ops/fmha_pagedkv_prefill.py       |  764 ++--
 example/ck_tile/01_fmha/generate.py           |  109 +-
 example/ck_tile/02_layernorm2d/generate.py    | 1365 ++++++-
 example/ck_tile/10_rmsnorm2d/generate.py      | 2494 ++++++++++--
 example/ck_tile/36_pooling/pool3d.cpp         |    2 +-
 example/ck_tile/remod.py                      |   16 +-
 include/ck_tile/host.hpp                      |    2 +
 .../reference_batched_contraction.hpp         |    6 +
 .../ck_tile/host/reference/reference_pool.hpp |    1 +
 include/ck_tile/ops/batched_contraction.hpp   |    4 +
 include/ck_tile/ops/gemm_quant.hpp            |    2 +-
 .../gemm_aquant_pipeline_ag_bg_cr_mem.hpp     |    1 +
 include/ck_tile/ops/{pool.hpp => pooling.hpp} |    5 +-
 include/ck_tile/remod.py                      |   49 +-
 include/rapidjson/allocators.h                |  503 ++-
 include/rapidjson/cursorstreamwrapper.h       |   38 +-
 include/rapidjson/document.h                  | 2687 ++++++++-----
 include/rapidjson/encodedstream.h             |  294 +-
 include/rapidjson/encodings.h                 |  557 ++-
 include/rapidjson/error/en.h                  |  259 +-
 include/rapidjson/error/error.h               |  174 +-
 include/rapidjson/filereadstream.h            |   88 +-
 include/rapidjson/filewritestream.h           |   87 +-
 include/rapidjson/fwd.h                       |   71 +-
 include/rapidjson/internal/biginteger.h       |  268 +-
 include/rapidjson/internal/clzll.h            |   10 +-
 include/rapidjson/internal/diyfp.h            |  150 +-
 include/rapidjson/internal/dtoa.h             |  250 +-
 include/rapidjson/internal/ieee754.h          |   55 +-
 include/rapidjson/internal/itoa.h             |  162 +-
 include/rapidjson/internal/meta.h             |  249 +-
 include/rapidjson/internal/pow10.h            |   61 +-
 include/rapidjson/internal/regex.h            |  709 ++--
 include/rapidjson/internal/stack.h            |  189 +-
 include/rapidjson/internal/strfunc.h          |   52 +-
 include/rapidjson/internal/strtod.h           |  165 +-
 include/rapidjson/internal/swap.h             |    9 +-
 include/rapidjson/istreamwrapper.h            |   93 +-
 include/rapidjson/memorybuffer.h              |   30 +-
 include/rapidjson/memorystream.h              |   52 +-
 include/rapidjson/msinttypes/inttypes.h       |  410 +-
 include/rapidjson/msinttypes/stdint.h         |  272 +-
 include/rapidjson/ostreamwrapper.h            |   55 +-
 include/rapidjson/pointer.h                   | 1200 ++++--
 include/rapidjson/prettywriter.h              |  237 +-
 include/rapidjson/rapidjson.h                 |  243 +-
 include/rapidjson/reader.h                    | 2011 ++++++----
 include/rapidjson/schema.h                    | 3503 +++++++++++------
 include/rapidjson/stream.h                    |  103 +-
 include/rapidjson/stringbuffer.h              |   50 +-
 include/rapidjson/uri.h                       |  442 ++-
 include/rapidjson/writer.h                    |  697 ++--
 python/ck4inductor/__init__.py                |    4 +-
 script/dependency-parser/main.py              |   53 +-
 .../src/enhanced_ninja_parser.py              |  230 +-
 .../src/selective_test_filter.py              |   21 +-
 script/ninja_json_converter.py                |  437 +-
 script/process_perf_data.py                   |  595 +--
 script/remod_for_ck_tile.sh                   |   18 +-
 .../run_ck_profiler_gemm_with_csv_shapes.py   |   12 +-
 test/ck_tile/layernorm2d/generate.py          | 1365 ++++++-
 test/ck_tile/pooling/test_pooling.cpp         |    2 +-
 test/ck_tile/rmsnorm2d/generate.py            | 1344 ++++++-
 test_data/generate_model_configs.py           |  212 +-
 test_data/miopen_to_csv.py                    |  530 ++-
 test_data/run_model_with_miopen.py            |  183 +-
 tile_engine/ops/gemm/codegen_utils.py         |    4 +-
 tile_engine/ops/gemm/validation_utils.py      |   33 +-
 77 files changed, 21671 insertions(+), 9858 deletions(-)
 rename include/ck_tile/ops/{pool.hpp => pooling.hpp} (58%)

diff --git a/.github/scripts/therock_configure_ci.py b/.github/scripts/therock_configure_ci.py
index cc66fdbfe8..860b6bf875 100644
--- a/.github/scripts/therock_configure_ci.py
+++ b/.github/scripts/therock_configure_ci.py
@@ -6,6 +6,7 @@ import subprocess
 import sys
 from typing import Iterable, Optional, Mapping
 
+
 def gha_set_output(vars: Mapping[str, str | Path]):
     """Sets values in a step's output parameters.
 
@@ -25,6 +26,7 @@ def gha_set_output(vars: Mapping[str, str | Path]):
     with open(step_output_file, "a") as f:
         f.writelines(f"{k}={str(v)}" + "\n" for k, v in vars.items())
 
+
 def get_modified_paths(base_ref: str) -> Optional[Iterable[str]]:
     """Returns the paths of modified files relative to the base reference."""
     try:
@@ -42,11 +44,13 @@ def get_modified_paths(base_ref: str) -> Optional[Iterable[str]]:
             file=sys.stderr,
         )
         return None
-    
+
+
 GITHUB_WORKFLOWS_CI_PATTERNS = [
     "therock*",
 ]
 
+
 def is_path_workflow_file_related_to_ci(path: str) -> bool:
     return any(
         fnmatch.fnmatch(path, ".github/workflows/" + pattern)
@@ -56,11 +60,13 @@ def is_path_workflow_file_related_to_ci(path: str) -> bool:
         for pattern in GITHUB_WORKFLOWS_CI_PATTERNS
     )
 
+
 def check_for_workflow_file_related_to_ci(paths: Optional[Iterable[str]]) -> bool:
     if paths is None:
         return False
     return any(is_path_workflow_file_related_to_ci(p) for p in paths)
 
+
 # Paths matching any of these patterns are considered to have no influence over
 # build or test workflows so any related jobs can be skipped if all paths
 # modified by a commit/PR match a pattern in this list.
@@ -70,23 +76,26 @@ SKIPPABLE_PATH_PATTERNS = [
     "*.md",
     "*.pre-commit-config.*",
     "*LICENSE",
-    'Jenkinsfile',
-    '.github/ISSUE_TEMPLATE/*',
-    '.github/CODEOWNERS',
-    '.github/*.md',
-    '.github/dependabot.yml',
+    "Jenkinsfile",
+    ".github/ISSUE_TEMPLATE/*",
+    ".github/CODEOWNERS",
+    ".github/*.md",
+    ".github/dependabot.yml",
 ]
 
+
 def is_path_skippable(path: str) -> bool:
     """Determines if a given relative path to a file matches any skippable patterns."""
     return any(fnmatch.fnmatch(path, pattern) for pattern in SKIPPABLE_PATH_PATTERNS)
 
+
 def check_for_non_skippable_path(paths: Optional[Iterable[str]]) -> bool:
     """Returns true if at least one path is not in the skippable set."""
     if paths is None:
         return False
     return any(not is_path_skippable(p) for p in paths)
 
+
 def should_ci_run_given_modified_paths(paths: Optional[Iterable[str]]) -> bool:
     """Returns true if CI workflows should run given a list of modified paths."""
 
@@ -118,16 +127,16 @@ def should_ci_run_given_modified_paths(paths: Optional[Iterable[str]]) -> bool:
         )
         return False
 
+
 def main(args):
     base_ref = args.get("base_ref")
     modified_paths = get_modified_paths(base_ref)
     print("modified_paths (max 200):", modified_paths[:200])
     enable_jobs = should_ci_run_given_modified_paths(modified_paths)
-    output = {
-        'enable_therock_ci': json.dumps(enable_jobs)
-    }
+    output = {"enable_therock_ci": json.dumps(enable_jobs)}
     gha_set_output(output)
 
+
 if __name__ == "__main__":
     args = {}
     args["base_ref"] = os.environ.get("BASE_REF", "HEAD^1")
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2d936d3a48..03d33757b0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,11 +1,25 @@
 repos:
--   repo: local
+-   repo: https://github.com/pre-commit/mirrors-clang-format
+    rev: v18.1.3
     hooks:
     -   id: clang-format
-        name: clang-format
-        entry: clang-format-18 -i --style=file
-        language: system
         types_or: [c++, inc]
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.14.0
+    hooks:
+    -   id: ruff-check
+        args: [ --fix ]
+        exclude: |
+            (?x)^(
+                docs/conf.py
+            )$
+    -   id: ruff-format
+        exclude: |
+            (?x)^(
+                docs/conf.py
+            )$
+-   repo: local
+    hooks:
     # -   id: copyright-year-checker
     #     name: copyright-year-checker
     #     entry: script/check_copyright_year.sh
@@ -18,21 +32,9 @@ repos:
         language: script
         types_or: [c++, text]
         verbose: true
-    -   id: ruff-check
-        name: Ruff Linter
-        entry: ruff check --fix
-        language: python
-        types: [python]
-        additional_dependencies: [ruff]
-    -   id: ruff-format
-        name: Ruff Formatter
-        entry: ruff format
-        language: python
-        types: [python]
-        additional_dependencies: [ruff]
     -   id: run-remod-if-ck-tile-changed
         name: Run remod.py if ck_tile files changed
         entry: script/remod_for_ck_tile.sh
         language: script
-        always_run: true
+        files: '^(include|example)/ck_tile/.*$'
         pass_filenames: false
diff --git a/example/ck_tile/01_fmha/codegen/cmake_config.py b/example/ck_tile/01_fmha/codegen/cmake_config.py
index 03ebfd6702..483934b03b 100644
--- a/example/ck_tile/01_fmha/codegen/cmake_config.py
+++ b/example/ck_tile/01_fmha/codegen/cmake_config.py
@@ -2,4 +2,4 @@
 # Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
 # generate kernel instances to speed up compilation
 
-GEN_DIR = ""    # in Cmake, have to generate files in same folder
\ No newline at end of file
+GEN_DIR = ""  # in Cmake, have to generate files in same folder
diff --git a/example/ck_tile/01_fmha/codegen/cpp_symbol_map.py b/example/ck_tile/01_fmha/codegen/cpp_symbol_map.py
index 81d34484a5..4098eb67c2 100644
--- a/example/ck_tile/01_fmha/codegen/cpp_symbol_map.py
+++ b/example/ck_tile/01_fmha/codegen/cpp_symbol_map.py
@@ -3,38 +3,35 @@
 # generate kernel instances to speed up compilation
 
 FWD_DTYPE_MAP = {
-    "fp32"   : "FmhaFwdFp32",
-    "fp16"   : "FmhaFwdFp16",
-    "bf16"   : "FmhaFwdBf16",
-    "fp8"    : "FmhaFwdFp8",
+    "fp32": "FmhaFwdFp32",
+    "fp16": "FmhaFwdFp16",
+    "bf16": "FmhaFwdBf16",
+    "fp8": "FmhaFwdFp8",
     "fp8fp16": "FmhaFwdFp8Fp16",
     "fp8bf16": "FmhaFwdFp8Bf16",
-    "fp8fp32": "FmhaFwdFp8Fp32"
+    "fp8fp32": "FmhaFwdFp8Fp32",
 }
 
-BWD_DTYPE_MAP = {
-    "fp32": "FmhaBwdFp32",
-    "fp16": "FmhaBwdFp16",
-    "bf16": "FmhaBwdBf16"
-}
+BWD_DTYPE_MAP = {"fp32": "FmhaBwdFp32", "fp16": "FmhaBwdFp16", "bf16": "FmhaBwdBf16"}
 
 MASK_IMPL = {
-    "generic" : "ck_tile::GenericAttentionMask",
-    "simplified"  : "ck_tile::SimplifiedGenericAttentionMask"
+    "generic": "ck_tile::GenericAttentionMask",
+    "simplified": "ck_tile::SimplifiedGenericAttentionMask",
 }
 
 _MASK_SIMPLIFIED_MAP = {
-    "s_no" : "ck_tile::SimplifiedGenericAttentionMask<false>",
-    "s_mask" : "ck_tile::SimplifiedGenericAttentionMask<true>",
+    "s_no": "ck_tile::SimplifiedGenericAttentionMask<false>",
+    "s_mask": "ck_tile::SimplifiedGenericAttentionMask<true>",
 }
 
 _MASK_MAP = {
-    "no" : "FmhaMasks::NoMask",
-    "causal" : "FmhaMasks::CausalMask",
-    "generic" : "FmhaMasks::GenericMask"
+    "no": "FmhaMasks::NoMask",
+    "causal": "FmhaMasks::CausalMask",
+    "generic": "FmhaMasks::GenericMask",
 }
 
-def get_mask_map(mask : str):
+
+def get_mask_map(mask: str):
     if mask == "generic":
         return _MASK_MAP
     elif mask == "simplified":
@@ -43,18 +40,20 @@ def get_mask_map(mask : str):
         assert False
         return None
 
+
 _MASK_CHECK_MAP = {
-    "no" : "t.mask_type == mask_enum::no_mask",
-    "causal" : "t.mask_type == mask_enum::mask_top_left || t.mask_type == mask_enum::mask_bottom_right",
-    "generic" : "t.mask_type == mask_enum::window_generic",
+    "no": "t.mask_type == mask_enum::no_mask",
+    "causal": "t.mask_type == mask_enum::mask_top_left || t.mask_type == mask_enum::mask_bottom_right",
+    "generic": "t.mask_type == mask_enum::window_generic",
 }
 
 _MASK_SIMPLIFIED_CHECK_MAP = {
-    "s_no" : "t.mask_type == mask_enum::no_mask",
-    "s_mask" : "t.mask_type != mask_enum::no_mask",
+    "s_no": "t.mask_type == mask_enum::no_mask",
+    "s_mask": "t.mask_type != mask_enum::no_mask",
 }
 
-def get_mask_check_map(mask : str):
+
+def get_mask_check_map(mask: str):
     if mask == "generic":
         return _MASK_CHECK_MAP
     elif mask == "simplified":
@@ -63,76 +62,71 @@ def get_mask_check_map(mask : str):
         assert False
         return None
 
+
 BIAS_MAP = {
-    "no" : "ck_tile::BlockAttentionBiasEnum::NO_BIAS",
-    "bias"  : "ck_tile::BlockAttentionBiasEnum::ELEMENTWISE_BIAS",
-    "alibi" : "ck_tile::BlockAttentionBiasEnum::ALIBI"
+    "no": "ck_tile::BlockAttentionBiasEnum::NO_BIAS",
+    "bias": "ck_tile::BlockAttentionBiasEnum::ELEMENTWISE_BIAS",
+    "alibi": "ck_tile::BlockAttentionBiasEnum::ALIBI",
 }
 
 # TODO: this is ugly
 BIAS_CHECK_MAP = {
-    "no" : "bias_enum::no_bias",
-    "bias"  : "bias_enum::elementwise_bias",
-    "alibi" : "bias_enum::alibi"
+    "no": "bias_enum::no_bias",
+    "bias": "bias_enum::elementwise_bias",
+    "alibi": "bias_enum::alibi",
 }
 
 DROPOUT_MAP = {
-    "no"                        : "ck_tile::BlockDropoutBwd<false, true,  false>",
-    "dropout_wg32"              : "ck_tile::BlockDropoutBwd<true,  true,  false>",
-    "dropout_wg32_storerandval" : "ck_tile::BlockDropoutBwd<true,  true,  true >",
-    "dropout_wg16"              : "ck_tile::BlockDropoutBwd<true,  false, false>",
-    "dropout_wg16_storerandval" : "ck_tile::BlockDropoutBwd<true,  false, true >"
+    "no": "ck_tile::BlockDropoutBwd<false, true,  false>",
+    "dropout_wg32": "ck_tile::BlockDropoutBwd<true,  true,  false>",
+    "dropout_wg32_storerandval": "ck_tile::BlockDropoutBwd<true,  true,  true >",
+    "dropout_wg16": "ck_tile::BlockDropoutBwd<true,  false, false>",
+    "dropout_wg16_storerandval": "ck_tile::BlockDropoutBwd<true,  false, true >",
 }
 
 DROPOUT_CHECK_MAP = {
-    "no"                        : "t.has_dropout == false",
-    "dropout_wg32"              : "t.has_dropout == true && t.is_store_randval == false",
-    "dropout_wg32_storerandval" : "t.has_dropout == true && t.is_store_randval == true",
-    "dropout_wg16"              : "t.has_dropout == true && t.is_store_randval == false",
-    "dropout_wg16_storerandval" : "t.has_dropout == true && t.is_store_randval == true",
+    "no": "t.has_dropout == false",
+    "dropout_wg32": "t.has_dropout == true && t.is_store_randval == false",
+    "dropout_wg32_storerandval": "t.has_dropout == true && t.is_store_randval == true",
+    "dropout_wg16": "t.has_dropout == true && t.is_store_randval == false",
+    "dropout_wg16_storerandval": "t.has_dropout == true && t.is_store_randval == true",
 }
 
 ROPE_MAP = {
-    "no" : "ck_tile::RotaryEmbeddingEnum::NONE",
-    "inter"  : "ck_tile::RotaryEmbeddingEnum::INTERLEAVED",
-    "half" : "ck_tile::RotaryEmbeddingEnum::HALF_ROTATED"
+    "no": "ck_tile::RotaryEmbeddingEnum::NONE",
+    "inter": "ck_tile::RotaryEmbeddingEnum::INTERLEAVED",
+    "half": "ck_tile::RotaryEmbeddingEnum::HALF_ROTATED",
 }
 
 ROPE_CHECK_MAP = {
-    "no"    : "rope_enum::none",
-    "inter" : "rope_enum::interleaved",
-    "half"  : "rope_enum::half_rotated"
+    "no": "rope_enum::none",
+    "inter": "rope_enum::interleaved",
+    "half": "rope_enum::half_rotated",
 }
 
-MODE_MAP = {
-    "batch" : "false",
-    "group" : "true"
-}
+MODE_MAP = {"batch": "false", "group": "true"}
 
-LAYOUT_MAP = {
-    "row" : "true",
-    "col" : "false"
-}
+LAYOUT_MAP = {"row": "true", "col": "false"}
 
 PIPELINE_MAP = {
-    "qr" : "ck_tile::BlockFmhaPipelineQRKSVS",
-    "qr_async" : "ck_tile::BlockFmhaPipelineQRKSVSAsync",
-    "qs" : "ck_tile::BlockFmhaPipelineQSKSVS",
-    "qr_async_trload" : "ck_tile::BlockFmhaPipelineQRKSVSAsyncTrload",
+    "qr": "ck_tile::BlockFmhaPipelineQRKSVS",
+    "qr_async": "ck_tile::BlockFmhaPipelineQRKSVSAsync",
+    "qs": "ck_tile::BlockFmhaPipelineQSKSVS",
+    "qr_async_trload": "ck_tile::BlockFmhaPipelineQRKSVSAsyncTrload",
 }
 
 PIPELINE_ENUM_MAP = {
-    "qr" : "ck_tile::BlockFmhaPipelineEnum::QRKSVS",
-    "qr_async" : "ck_tile::BlockFmhaPipelineEnum::QRKSVS_ASYNC",
-    "qr_nwarp_sshuffle" : "ck_tile::BlockFmhaPipelineEnum::QRKSVS",
-    "qs" : "ck_tile::BlockFmhaPipelineEnum::QSKSVS",
-    "qr_pagedkv" : "ck_tile::BlockFmhaPipelineEnum::QRKSVS",
-    "qr_async_trload" : "ck_tile::BlockFmhaPipelineEnum::QRKSVS_ASYNC_TRLOAD",
+    "qr": "ck_tile::BlockFmhaPipelineEnum::QRKSVS",
+    "qr_async": "ck_tile::BlockFmhaPipelineEnum::QRKSVS_ASYNC",
+    "qr_nwarp_sshuffle": "ck_tile::BlockFmhaPipelineEnum::QRKSVS",
+    "qs": "ck_tile::BlockFmhaPipelineEnum::QSKSVS",
+    "qr_pagedkv": "ck_tile::BlockFmhaPipelineEnum::QRKSVS",
+    "qr_async_trload": "ck_tile::BlockFmhaPipelineEnum::QRKSVS_ASYNC_TRLOAD",
 }
 
 BOOL_MAP = {
-    "t" : "true",
-    "f" : "false",
-    True : "true",
-    False : "false",
+    "t": "true",
+    "f": "false",
+    True: "true",
+    False: "false",
 }
diff --git a/example/ck_tile/01_fmha/codegen/ops/fmha_batch_prefill.py b/example/ck_tile/01_fmha/codegen/ops/fmha_batch_prefill.py
index e2f69fa49a..3b26e3ab5f 100644
--- a/example/ck_tile/01_fmha/codegen/ops/fmha_batch_prefill.py
+++ b/example/ck_tile/01_fmha/codegen/ops/fmha_batch_prefill.py
@@ -9,28 +9,26 @@ import itertools
 from pathlib import Path
 from typing import List, Optional, Tuple
 
-from codegen.cmake_config import *
-from codegen.cpp_symbol_map import *
+from codegen.cmake_config import GEN_DIR
+from codegen.cpp_symbol_map import (
+    MODE_MAP,
+    LAYOUT_MAP,
+    BIAS_CHECK_MAP,
+    get_mask_check_map,
+    get_mask_map,
+    BIAS_MAP,
+    FWD_DTYPE_MAP,
+    BOOL_MAP,
+    PIPELINE_ENUM_MAP,
+)
 
 
-DTYPE_BITS = {
-    "fp32": 32,
-    "fp16": 16,
-    "bf16": 16,
-    "fp8" : 8,
-    "bf8" : 8
-}
+DTYPE_BITS = {"fp32": 32, "fp16": 16, "bf16": 16, "fp8": 8, "bf8": 8}
 
-K0_MAX_SUBMAX_MAP = {
-    32 : 32,
-    64 : 64,
-    96 : 128,
-    128: 128,
-    256: 256
-}
+K0_MAX_SUBMAX_MAP = {32: 32, 64: 64, 96: 128, 128: 128, 256: 256}
 
 FMHA_BATCH_PREFILL_PIPELINE_MAP = {
-    "qr_async" : "ck_tile::BlockFmhaBatchPrefillPipelineQRKSVSAsync",
+    "qr_async": "ck_tile::BlockFmhaBatchPrefillPipelineQRKSVSAsync",
 }
 
 FMHA_FWD_KERNEL_HEADER = """// SPDX-License-Identifier: MIT
@@ -40,7 +38,7 @@ FMHA_FWD_KERNEL_HEADER = """// SPDX-License-Identifier: MIT
 #include "fmha_fwd.hpp"
 """
 
-FMHA_FWD_KERNEL_BODY="""
+FMHA_FWD_KERNEL_BODY = """
 using fmha_dtype_{F_idx} = {F_dtype};
 
 using fmha_block_tile_{F_idx} = ck_tile::sequence<{F_bm0}, {F_bn0}, {F_bk0}, {F_bn1}, {F_bk1}, {F_bk0max}>;
@@ -116,8 +114,8 @@ float fmha_batch_prefill_<trait_{F_idx}>(const ck_tile::stream_config& s, fmha_b
 }}
 """
 
-FMHA_FWD_API_FILENAME="fmha_batch_prefill_api.cpp"
-FMHA_FWD_API="""
+FMHA_FWD_API_FILENAME = "fmha_batch_prefill_api.cpp"
+FMHA_FWD_API = """
 #include <cstdio>
 
 namespace {{
@@ -167,173 +165,223 @@ float fmha_batch_prefill(fmha_batch_prefill_traits t, fmha_batch_prefill_args a,
 }}
 """
 
-FMHA_FWD_API_PER_DTYPE="""    {F_if}(t.data_type.compare(\"{F_dtype}\") == 0){{
+FMHA_FWD_API_PER_DTYPE = """    {F_if}(t.data_type.compare(\"{F_dtype}\") == 0){{
 {F_hdim_case}
     }}
 """
-FMHA_FWD_API_PER_HDIM_CASE="""        {F_if} (t.hdim_q <= {F_hdim} && t.hdim_v <= {F_hdim_v}) {{
+FMHA_FWD_API_PER_HDIM_CASE = """        {F_if} (t.hdim_q <= {F_hdim} && t.hdim_v <= {F_hdim_v}) {{
 {F_inner_dispatch}
         }}
 """
 
-FMHA_FWD_API_INNER_DISPATCH="""            {F_if}((t.is_group_mode == {F_mode}) && (t.is_v_rowmajor == {F_vlayout}) && (t.has_logits_soft_cap == {F_logits}) && ({F_mask_check}) && (t.bias_type == {F_bias_check}) && (t.has_lse == {F_lse})  && (t.has_dropout == {F_dropout}) && (t.do_fp8_static_quant == {F_squant}) &&
+FMHA_FWD_API_INNER_DISPATCH = """            {F_if}((t.is_group_mode == {F_mode}) && (t.is_v_rowmajor == {F_vlayout}) && (t.has_logits_soft_cap == {F_logits}) && ({F_mask_check}) && (t.bias_type == {F_bias_check}) && (t.has_lse == {F_lse})  && (t.has_dropout == {F_dropout}) && (t.do_fp8_static_quant == {F_squant}) &&
                         ({F_scheck}) && ({F_skcheck}) && ({F_dcheck}) && ({F_dvcheck}) && ({F_constraint})) {{
                 using trait_ = fmha_fwd_traits_<{F_hdim}, {F_dtype}, {F_mode}, {F_bm0}, {F_bn0}, {F_bk0}, {F_bn1}, {F_bk1}, {F_bk0max}, {F_vlayout}, {F_pipeline_enum}, {F_logits}, {F_mask}, {F_bias}, {F_lse}, {F_dropout}, {F_squant}, {F_spad}, {F_skpad}, {F_dpad}, {F_dvpad}, false>;
                 return fmha_batch_prefill_<trait_>(s, a);
             }}
 """
 
+
 @dataclass
 class CppConstraint:
     bool_expr: str = None
 
     def __str__(self):
         if self.bool_expr is None:
-            return 'true'
+            return "true"
         else:
-            return f'{self.bool_expr}'
+            return f"{self.bool_expr}"
 
     def __and__(self, other):
-        return CppConstraint(f'({str(self)}) && ({str(other)})')
+        return CppConstraint(f"({str(self)}) && ({str(other)})")
+
 
 @dataclass
 class FmhaFwdApiTrait:
-    pipeline_tag : str
+    pipeline_tag: str
     # sync with fmha_fwd_traits<>, to generate fallback calls
-    hdim       : str
-    dtype      : str  # data type
-    mode       : str  # value from MODE_MAP
-    bm0        : int  # tile size along q seqlen (block size)
-    bn0        : int  # tile size along qk seqlen
-    bk0        : int  # tile size along qk gemm unroll
-    bn1        : int  # tile size along v head_dim
-    bk1        : int  # tile size along kv gemm unroll
-    bk0max     : int
-    vlayout    : str
-    logits     : str
-    mask       : str
-    bias       : str  #
-    lse        : str  #
-    dropout    : str
-    squant     : str  #
-    spad       : str
-    skpad      : str
-    dpad       : str
-    dvpad      : str
-    constraint : CppConstraint
+    hdim: str
+    dtype: str  # data type
+    mode: str  # value from MODE_MAP
+    bm0: int  # tile size along q seqlen (block size)
+    bn0: int  # tile size along qk seqlen
+    bk0: int  # tile size along qk gemm unroll
+    bn1: int  # tile size along v head_dim
+    bk1: int  # tile size along kv gemm unroll
+    bk0max: int
+    vlayout: str
+    logits: str
+    mask: str
+    bias: str  #
+    lse: str  #
+    dropout: str
+    squant: str  #
+    spad: str
+    skpad: str
+    dpad: str
+    dvpad: str
+    constraint: CppConstraint
 
     @property
     def name(self) -> str:
-        return f'{self.hdim}-{self.dtype}-{self.mode}-{self.bm0}-{self.bn0}-{self.bk0}-{self.bn0}-{self.bk1}-{self.bk0max}-'+\
-                    f'{self.vlayout}-{self.logits}-{self.mask}-{self.bias}-{self.lse}-{self.dropout}-{self.squant}-{self.spad}-{self.skpad}-{self.dpad}-{self.dvpad}'
+        return (
+            f"{self.hdim}-{self.dtype}-{self.mode}-{self.bm0}-{self.bn0}-{self.bk0}-{self.bn0}-{self.bk1}-{self.bk0max}-"
+            + f"{self.vlayout}-{self.logits}-{self.mask}-{self.bias}-{self.lse}-{self.dropout}-{self.squant}-{self.spad}-{self.skpad}-{self.dpad}-{self.dvpad}"
+        )
 
     @property
     def scheck(self) -> str:
-        if self.mode == 'group': return 'true/*group mode spad always true*/'                  # group mode only generate spad/skpad == true
-        if self.pipeline_tag == 'qr_async':
-            if self.spad == 't' : return 'true' # always support
-            else :                return 'true'
-        elif self.pipeline_tag in ['qr']:
-            if self.spad == 't' : return f'true /*a.seqlen_q % {self.bm0} != 0*/'  # TODO: order of get_pipelines() matters! (ugly)
-            else :                return f'a.seqlen_q % {self.bm0} == 0'
-        else: assert False
+        if self.mode == "group":
+            return "true/*group mode spad always true*/"  # group mode only generate spad/skpad == true
+        if self.pipeline_tag == "qr_async":
+            if self.spad == "t":
+                return "true"  # always support
+            else:
+                return "true"
+        elif self.pipeline_tag in ["qr"]:
+            if self.spad == "t":
+                return f"true /*a.seqlen_q % {self.bm0} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.seqlen_q % {self.bm0} == 0"
+        else:
+            assert False
 
     @property
     def skcheck(self) -> str:
-        if self.mode == 'group': return 'true/*group mode skpad always true*/'                  # group mode only generate spad/skpad == true
-        if self.pipeline_tag == 'qr_async':
-            if self.skpad == 't' : return f'a.seqlen_k == 0 || a.seqlen_k % {self.bn0} != 0'
-            else :                 return f'a.seqlen_k != 0 && a.seqlen_k % {self.bn0} == 0'
-        elif self.pipeline_tag in ['qr', 'qr_fp8']:
-            if self.skpad == 't' : return f'true /*a.seqlen_k % {self.bn0} != 0*/' # TODO: order of get_pipelines() matters! (ugly)
-            else :                return f'a.seqlen_k % {self.bn0} == 0'
-        else: assert False
+        if self.mode == "group":
+            return "true/*group mode skpad always true*/"  # group mode only generate spad/skpad == true
+        if self.pipeline_tag == "qr_async":
+            if self.skpad == "t":
+                return f"a.seqlen_k == 0 || a.seqlen_k % {self.bn0} != 0"
+            else:
+                return f"a.seqlen_k != 0 && a.seqlen_k % {self.bn0} == 0"
+        elif self.pipeline_tag in ["qr", "qr_fp8"]:
+            if self.skpad == "t":
+                return f"true /*a.seqlen_k % {self.bn0} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.seqlen_k % {self.bn0} == 0"
+        else:
+            assert False
 
     @property
     def dcheck(self) -> str:
-        if self.pipeline_tag == 'qr_async':
+        if self.pipeline_tag == "qr_async":
             vec = int((32 * 4) / DTYPE_BITS[self.dtype])
-            if self.dpad == 't': return f'a.hdim_q % {vec} == 0'
-            else :               assert False
-        elif self.pipeline_tag in ['qr']:
+            if self.dpad == "t":
+                return f"a.hdim_q % {vec} == 0"
+            else:
+                assert False
+        elif self.pipeline_tag in ["qr"]:
             bk0submax = K0_MAX_SUBMAX_MAP[self.bk0max]
-            if self.dpad == 't': return f'true /*a.hdim_q % {bk0submax} != 0*/' # TODO: order of get_pipelines() matters! (ugly)
-            else :               return f'a.hdim_q % {bk0submax} == 0'
-        else:   assert False
+            if self.dpad == "t":
+                return f"true /*a.hdim_q % {bk0submax} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.hdim_q % {bk0submax} == 0"
+        else:
+            assert False
 
     @property
     def dvcheck(self) -> str:
-        if self.pipeline_tag == 'qr_async':
+        if self.pipeline_tag == "qr_async":
             vec = int((32 * 4) / DTYPE_BITS[self.dtype])
-            if self.dvpad == 't': return f'a.hdim_v % {vec} == 0'
-            else :                assert False
-        elif self.pipeline_tag in ['qr']:
+            if self.dvpad == "t":
+                return f"a.hdim_v % {vec} == 0"
+            else:
+                assert False
+        elif self.pipeline_tag in ["qr"]:
             bk0submax = K0_MAX_SUBMAX_MAP[self.bk0max]
-            if self.dvpad == 't': return f'true /*a.hdim_v % {bk0submax} != 0*/' # TODO: order of get_pipelines() matters! (ugly)
-            else :                return f'a.hdim_v % {bk0submax} == 0'
-        else:   assert False
+            if self.dvpad == "t":
+                return f"true /*a.hdim_v % {bk0submax} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.hdim_v % {bk0submax} == 0"
+        else:
+            assert False
+
 
 @dataclass
 class FmhaFwdPipeline:
-    tag : str
+    tag: str
 
-    F_vlayout    : str  # row/col
-    F_spad       : str  # true/false
-    F_skpad      : str  #
-    F_dpad       : str  #
-    F_dvpad      : str  #
-    F_logits     : str  # t/f
-    F_bias       : str  # true/false
-    F_lse        : str  #
-    F_dropout    : str  #
-    F_squant     : str  #
-    F_mask       : str  # value from MASK_MAP
-    F_constraint : CppConstraint = field(default_factory=lambda: CppConstraint())
+    F_vlayout: str  # row/col
+    F_spad: str  # true/false
+    F_skpad: str  #
+    F_dpad: str  #
+    F_dvpad: str  #
+    F_logits: str  # t/f
+    F_bias: str  # true/false
+    F_lse: str  #
+    F_dropout: str  #
+    F_squant: str  #
+    F_mask: str  # value from MASK_MAP
+    F_constraint: CppConstraint = field(default_factory=lambda: CppConstraint())
 
     @property
     def name(self) -> str:
         def pad_name() -> str:
-            n = ''
-            if self.F_spad == 't': n += 's'
-            if self.F_skpad == 't' : n += 'sk'
-            if self.F_dpad == 't' : n += 'd'
-            if self.F_dvpad == 't' : n += 'dv'
-            if n != '' : n = 'p' + n
+            n = ""
+            if self.F_spad == "t":
+                n += "s"
+            if self.F_skpad == "t":
+                n += "sk"
+            if self.F_dpad == "t":
+                n += "d"
+            if self.F_dvpad == "t":
+                n += "dv"
+            if n != "":
+                n = "p" + n
             return n
+
         pn = pad_name()
-        n = f'{self.tag}_v{self.F_vlayout[0]}'
-        if pn != '' : n += f'_{pn}'
-        else: n += '_npad'
-
-        if self.F_logits == 't' : n += '_logits'
-        else: n += '_nlogits'
-
-        if self.F_bias != 'no' : n += f'_{self.F_bias}'
-        else: n += '_nbias'
-
-        if self.F_mask[0:2] == 's_':
-            if self.F_mask == 's_mask': n += f'_mask'
-            else: n += '_nmask'
+        n = f"{self.tag}_v{self.F_vlayout[0]}"
+        if pn != "":
+            n += f"_{pn}"
         else:
-            if self.F_mask != 'no' : n += f'_m{self.F_mask[0]}'
-            else: n += '_nmask'
+            n += "_npad"
 
-        if self.F_lse == 't' : n += '_lse'
-        else: n += '_nlse'
+        if self.F_logits == "t":
+            n += "_logits"
+        else:
+            n += "_nlogits"
 
-        if self.F_dropout == 't' : n += '_dropout'
-        else: n += '_ndropout'
+        if self.F_bias != "no":
+            n += f"_{self.F_bias}"
+        else:
+            n += "_nbias"
 
-        if self.F_squant == 't' : n += '_squant'
-        else: n += '_nsquant'
+        if self.F_mask[0:2] == "s_":
+            if self.F_mask == "s_mask":
+                n += "_mask"
+            else:
+                n += "_nmask"
+        else:
+            if self.F_mask != "no":
+                n += f"_m{self.F_mask[0]}"
+            else:
+                n += "_nmask"
+
+        if self.F_lse == "t":
+            n += "_lse"
+        else:
+            n += "_nlse"
+
+        if self.F_dropout == "t":
+            n += "_dropout"
+        else:
+            n += "_ndropout"
+
+        if self.F_squant == "t":
+            n += "_squant"
+        else:
+            n += "_nsquant"
         return n
 
+
 class FmhaFwdApiPool:
     def __init__(self, mask_impl):
         self.pool = dict()
         self.mask_impl = mask_impl
 
-    def register_traits(self, trait : FmhaFwdApiTrait) -> None:
+    def register_traits(self, trait: FmhaFwdApiTrait) -> None:
         # TODO: do we need to check duplication?
         if trait.dtype not in self.pool.keys():
             self.pool[trait.dtype] = dict()
@@ -344,118 +392,152 @@ class FmhaFwdApiPool:
 
     @property
     def api(self) -> str:
-        per_dtypes=str()
+        per_dtypes = str()
         for i, dtype in enumerate(self.pool.keys()):
-            per_hdim_case=str()
+            per_hdim_case = str()
             for j, hdim in enumerate(self.pool[dtype].keys()):
-                traits=self.pool[dtype][hdim]
-                inners=str()
+                traits = self.pool[dtype][hdim]
+                inners = str()
                 for k, trait in enumerate(traits):
-                    if_k = 'if' if k == 0 else 'else if'
-                    inners = inners + FMHA_FWD_API_INNER_DISPATCH.format(F_if=if_k, F_mode=MODE_MAP[trait.mode], F_vlayout=LAYOUT_MAP[trait.vlayout],
-                                   F_pipeline_enum=PIPELINE_ENUM_MAP[trait.pipeline_tag], F_logits=BOOL_MAP[trait.logits], F_mask=get_mask_map(self.mask_impl)[trait.mask],
-                                   F_mask_check=get_mask_check_map(self.mask_impl)[trait.mask], F_bias_check=BIAS_CHECK_MAP[trait.bias], F_bias=BIAS_MAP[trait.bias],
-                                   F_lse=BOOL_MAP[trait.lse], F_dropout=BOOL_MAP[trait.dropout], F_squant=BOOL_MAP[trait.squant],
-                                   F_scheck=trait.scheck, F_skcheck=trait.skcheck, F_dcheck=trait.dcheck, F_dvcheck=trait.dvcheck, F_constraint=trait.constraint,
-                                   F_spad=BOOL_MAP[trait.spad], F_skpad=BOOL_MAP[trait.skpad], F_dpad=BOOL_MAP[trait.dpad], F_dvpad=BOOL_MAP[trait.dvpad],
-                                   F_bm0=trait.bm0, F_bn0=trait.bn0, F_bk0=trait.bk0, F_bn1=trait.bn1, F_bk1=trait.bk1, F_bk0max=trait.bk0max,
-                                   F_hdim=hdim, F_dtype=FWD_DTYPE_MAP[dtype])
-                if_j = 'if' if j == 0 else 'else if'
-                per_hdim_case = per_hdim_case + FMHA_FWD_API_PER_HDIM_CASE.format(F_if=if_j, F_hdim=hdim, F_hdim_v=trait.bn1, F_inner_dispatch=inners)
-            if_i = 'if' if i == 0 else 'else if'
-            per_dtypes = per_dtypes + FMHA_FWD_API_PER_DTYPE.format(F_if=if_i, F_dtype=dtype, F_hdim_case=per_hdim_case)
+                    if_k = "if" if k == 0 else "else if"
+                    inners = inners + FMHA_FWD_API_INNER_DISPATCH.format(
+                        F_if=if_k,
+                        F_mode=MODE_MAP[trait.mode],
+                        F_vlayout=LAYOUT_MAP[trait.vlayout],
+                        F_pipeline_enum=PIPELINE_ENUM_MAP[trait.pipeline_tag],
+                        F_logits=BOOL_MAP[trait.logits],
+                        F_mask=get_mask_map(self.mask_impl)[trait.mask],
+                        F_mask_check=get_mask_check_map(self.mask_impl)[trait.mask],
+                        F_bias_check=BIAS_CHECK_MAP[trait.bias],
+                        F_bias=BIAS_MAP[trait.bias],
+                        F_lse=BOOL_MAP[trait.lse],
+                        F_dropout=BOOL_MAP[trait.dropout],
+                        F_squant=BOOL_MAP[trait.squant],
+                        F_scheck=trait.scheck,
+                        F_skcheck=trait.skcheck,
+                        F_dcheck=trait.dcheck,
+                        F_dvcheck=trait.dvcheck,
+                        F_constraint=trait.constraint,
+                        F_spad=BOOL_MAP[trait.spad],
+                        F_skpad=BOOL_MAP[trait.skpad],
+                        F_dpad=BOOL_MAP[trait.dpad],
+                        F_dvpad=BOOL_MAP[trait.dvpad],
+                        F_bm0=trait.bm0,
+                        F_bn0=trait.bn0,
+                        F_bk0=trait.bk0,
+                        F_bn1=trait.bn1,
+                        F_bk1=trait.bk1,
+                        F_bk0max=trait.bk0max,
+                        F_hdim=hdim,
+                        F_dtype=FWD_DTYPE_MAP[dtype],
+                    )
+                if_j = "if" if j == 0 else "else if"
+                per_hdim_case = per_hdim_case + FMHA_FWD_API_PER_HDIM_CASE.format(
+                    F_if=if_j, F_hdim=hdim, F_hdim_v=trait.bn1, F_inner_dispatch=inners
+                )
+            if_i = "if" if i == 0 else "else if"
+            per_dtypes = per_dtypes + FMHA_FWD_API_PER_DTYPE.format(
+                F_if=if_i, F_dtype=dtype, F_hdim_case=per_hdim_case
+            )
         if not per_dtypes:
             # empty string we add some ignore to suppress warning in api
-            per_dtypes += '    (void)t ; (void)s ; (void)a;'
-        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_API.format(F_dispatch = per_dtypes)
+            per_dtypes += "    (void)t ; (void)s ; (void)a;"
+        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_API.format(F_dispatch=per_dtypes)
+
 
 @dataclass
 class FmhaFwdTileSize:
-    F_bm0        : int  # tile size along q seqlen (block size)
-    F_bn0        : int  # tile size along k seqlen
-    F_bk0        : int  # tile size along qk gemm unroll
-    F_bn1        : int  # tile size along v head_dim
-    F_bk1        : int  # tile size along kv gemm unroll
-    F_bk0max     : int  # total length of K0, used for pipeline that need load Q at once (or repeately load Q as a whole tile)
-    F_rm0        : int  # number of warps for gemm0 along q seqlen
-    F_rn0        : int  # number of warps for gemm0 along k seqlen
-    F_rk0        : int  # number of warps for gemm0 along head dim q (not used)
-    F_rm1        : int  # number of warps for gemm1 along q seqlen
-    F_rn1        : int  # number of warps for gemm1 along head dim v
-    F_rk1        : int  # number of warps for gemm1 along k seqlen (not used)
-    F_wm0        : int  # gemm0 warp size along m
-    F_wn0        : int  # gemm0 warp size along n
-    F_wk0        : int  # gemm0 warp size along k
-    F_wm1        : int  # gemm1 warp size along m
-    F_wn1        : int  # gemm1 warp size along n
-    F_wk1        : int  # gemm1 warp size along k
-    F_occupancy  : int  # occupancy, -1 will let pipeline decide the occupancy, other value will overwrite occupancy
-    F_constraint : CppConstraint = field(default_factory=lambda: CppConstraint())
+    F_bm0: int  # tile size along q seqlen (block size)
+    F_bn0: int  # tile size along k seqlen
+    F_bk0: int  # tile size along qk gemm unroll
+    F_bn1: int  # tile size along v head_dim
+    F_bk1: int  # tile size along kv gemm unroll
+    F_bk0max: int  # total length of K0, used for pipeline that need load Q at once (or repeately load Q as a whole tile)
+    F_rm0: int  # number of warps for gemm0 along q seqlen
+    F_rn0: int  # number of warps for gemm0 along k seqlen
+    F_rk0: int  # number of warps for gemm0 along head dim q (not used)
+    F_rm1: int  # number of warps for gemm1 along q seqlen
+    F_rn1: int  # number of warps for gemm1 along head dim v
+    F_rk1: int  # number of warps for gemm1 along k seqlen (not used)
+    F_wm0: int  # gemm0 warp size along m
+    F_wn0: int  # gemm0 warp size along n
+    F_wk0: int  # gemm0 warp size along k
+    F_wm1: int  # gemm1 warp size along m
+    F_wn1: int  # gemm1 warp size along n
+    F_wk1: int  # gemm1 warp size along k
+    F_occupancy: int  # occupancy, -1 will let pipeline decide the occupancy, other value will overwrite occupancy
+    F_constraint: CppConstraint = field(default_factory=lambda: CppConstraint())
 
     @property
     def name(self) -> str:
-        return f"b{self.F_bm0}x{self.F_bn0}x{self.F_bk0}x{self.F_bn1}x{self.F_bk1}x{self.F_bk0max}" +\
-        f"_r{self.F_rm0}x{self.F_rn0}x{self.F_rk0}_r{self.F_rm1}x{self.F_rn1}x{self.F_rk1}" +\
-        f"_w{self.F_wm0}x{self.F_wn0}x{self.F_wk0}_w{self.F_wm1}x{self.F_wn1}x{self.F_wk1}" +\
-        ("" if self.F_occupancy == -1 else f"_o{self.F_occupancy}")
+        return (
+            f"b{self.F_bm0}x{self.F_bn0}x{self.F_bk0}x{self.F_bn1}x{self.F_bk1}x{self.F_bk0max}"
+            + f"_r{self.F_rm0}x{self.F_rn0}x{self.F_rk0}_r{self.F_rm1}x{self.F_rn1}x{self.F_rk1}"
+            + f"_w{self.F_wm0}x{self.F_wn0}x{self.F_wk0}_w{self.F_wm1}x{self.F_wn1}x{self.F_wk1}"
+            + ("" if self.F_occupancy == -1 else f"_o{self.F_occupancy}")
+        )
+
 
 @dataclass
 class FmhaFwdKernel:
-    F_idx           : int  # this is not a tunable, but a counter to differentiate symbol
-    F_hdim          : int  # hdim
-    F_dtype         : str  # data type
-    F_mode          : str  # value from MODE_MAP
-    F_tile          : FmhaFwdTileSize
-    F_pipeline      : FmhaFwdPipeline
-    mask_impl       : str
+    F_idx: int  # this is not a tunable, but a counter to differentiate symbol
+    F_hdim: int  # hdim
+    F_dtype: str  # data type
+    F_mode: str  # value from MODE_MAP
+    F_tile: FmhaFwdTileSize
+    F_pipeline: FmhaFwdPipeline
+    mask_impl: str
 
     @property
     def template(self) -> str:
-        kernel_body = str()
-        return FMHA_FWD_KERNEL_HEADER + \
-            FMHA_FWD_KERNEL_BODY.format(
-                F_idx           = self.F_idx,
-                F_hdim          = self.F_hdim,
-                F_dtype         = FWD_DTYPE_MAP[self.F_dtype],
-                F_bm0           = self.F_tile.F_bm0,
-                F_bn0           = self.F_tile.F_bn0,
-                F_bk0           = self.F_tile.F_bk0,
-                F_bn1           = self.F_tile.F_bn1,
-                F_bk1           = self.F_tile.F_bk1,
-                F_bk0max        = self.F_tile.F_bk0max,
-                F_rm0           = self.F_tile.F_rm0,
-                F_rn0           = self.F_tile.F_rn0,
-                F_rk0           = self.F_tile.F_rk0,
-                F_rm1           = self.F_tile.F_rm1,
-                F_rn1           = self.F_tile.F_rn1,
-                F_rk1           = self.F_tile.F_rk1,
-                F_wm0           = self.F_tile.F_wm0,
-                F_wn0           = self.F_tile.F_wn0,
-                F_wk0           = self.F_tile.F_wk0,
-                F_wm1           = self.F_tile.F_wm1,
-                F_wn1           = self.F_tile.F_wn1,
-                F_wk1           = self.F_tile.F_wk1,
-                F_vlayout       = LAYOUT_MAP[self.F_pipeline.F_vlayout],
-                F_spad          = BOOL_MAP[self.F_pipeline.F_spad],
-                F_skpad         = BOOL_MAP[self.F_pipeline.F_skpad],
-                F_dpad          = BOOL_MAP[self.F_pipeline.F_dpad],
-                F_dvpad         = BOOL_MAP[self.F_pipeline.F_dvpad],
-                F_logits        = BOOL_MAP[self.F_pipeline.F_logits],
-                F_bias          = BIAS_MAP[self.F_pipeline.F_bias],
-                F_lse           = BOOL_MAP[self.F_pipeline.F_lse],
-                F_dropout       = BOOL_MAP[self.F_pipeline.F_dropout],
-                F_squant        = BOOL_MAP[self.F_pipeline.F_squant],
-                F_occupancy     = self.F_tile.F_occupancy,
-                F_pipeline_enum = PIPELINE_ENUM_MAP[self.F_pipeline.tag],
-                F_mask          = get_mask_map(self.mask_impl)[self.F_pipeline.F_mask],
-                F_mode          = MODE_MAP[self.F_mode],
-                F_pipeline      = FMHA_BATCH_PREFILL_PIPELINE_MAP[self.F_pipeline.tag])
+        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_KERNEL_BODY.format(
+            F_idx=self.F_idx,
+            F_hdim=self.F_hdim,
+            F_dtype=FWD_DTYPE_MAP[self.F_dtype],
+            F_bm0=self.F_tile.F_bm0,
+            F_bn0=self.F_tile.F_bn0,
+            F_bk0=self.F_tile.F_bk0,
+            F_bn1=self.F_tile.F_bn1,
+            F_bk1=self.F_tile.F_bk1,
+            F_bk0max=self.F_tile.F_bk0max,
+            F_rm0=self.F_tile.F_rm0,
+            F_rn0=self.F_tile.F_rn0,
+            F_rk0=self.F_tile.F_rk0,
+            F_rm1=self.F_tile.F_rm1,
+            F_rn1=self.F_tile.F_rn1,
+            F_rk1=self.F_tile.F_rk1,
+            F_wm0=self.F_tile.F_wm0,
+            F_wn0=self.F_tile.F_wn0,
+            F_wk0=self.F_tile.F_wk0,
+            F_wm1=self.F_tile.F_wm1,
+            F_wn1=self.F_tile.F_wn1,
+            F_wk1=self.F_tile.F_wk1,
+            F_vlayout=LAYOUT_MAP[self.F_pipeline.F_vlayout],
+            F_spad=BOOL_MAP[self.F_pipeline.F_spad],
+            F_skpad=BOOL_MAP[self.F_pipeline.F_skpad],
+            F_dpad=BOOL_MAP[self.F_pipeline.F_dpad],
+            F_dvpad=BOOL_MAP[self.F_pipeline.F_dvpad],
+            F_logits=BOOL_MAP[self.F_pipeline.F_logits],
+            F_bias=BIAS_MAP[self.F_pipeline.F_bias],
+            F_lse=BOOL_MAP[self.F_pipeline.F_lse],
+            F_dropout=BOOL_MAP[self.F_pipeline.F_dropout],
+            F_squant=BOOL_MAP[self.F_pipeline.F_squant],
+            F_occupancy=self.F_tile.F_occupancy,
+            F_pipeline_enum=PIPELINE_ENUM_MAP[self.F_pipeline.tag],
+            F_mask=get_mask_map(self.mask_impl)[self.F_pipeline.F_mask],
+            F_mode=MODE_MAP[self.F_mode],
+            F_pipeline=FMHA_BATCH_PREFILL_PIPELINE_MAP[self.F_pipeline.tag],
+        )
 
     @property
     def name(self) -> str:
         # TODO: we don't encode idx here
-        return f"fmha_batch_prefill_d{self.F_hdim}_{self.F_dtype}_{self.F_mode}_" + \
-                self.F_tile.name + '_' + self.F_pipeline.name
+        return (
+            f"fmha_batch_prefill_d{self.F_hdim}_{self.F_dtype}_{self.F_mode}_"
+            + self.F_tile.name
+            + "_"
+            + self.F_pipeline.name
+        )
 
     @property
     def filename(self) -> str:
@@ -463,35 +545,59 @@ class FmhaFwdKernel:
 
     def api_trait(self) -> FmhaFwdApiTrait:
         return FmhaFwdApiTrait(
-                pipeline_tag=self.F_pipeline.tag,
-                hdim=str(self.F_hdim),
-                dtype=self.F_dtype,
-                mode=self.F_mode,
-                bm0=self.F_tile.F_bm0,
-                bn0=self.F_tile.F_bn0,
-                bk0=self.F_tile.F_bk0,
-                bn1=self.F_tile.F_bn1,
-                bk1=self.F_tile.F_bk1,
-                bk0max=self.F_tile.F_bk0max,
-                vlayout=self.F_pipeline.F_vlayout,
-                mask=self.F_pipeline.F_mask,
-                logits=self.F_pipeline.F_logits,
-                bias=self.F_pipeline.F_bias,
-                lse=self.F_pipeline.F_lse,
-                dropout=self.F_pipeline.F_dropout,
-                squant=self.F_pipeline.F_squant,
-                spad=self.F_pipeline.F_spad,
-                skpad=self.F_pipeline.F_skpad,
-                dpad=self.F_pipeline.F_dpad,
-                dvpad=self.F_pipeline.F_dvpad,
-                constraint=self.F_tile.F_constraint & self.F_pipeline.F_constraint)
+            pipeline_tag=self.F_pipeline.tag,
+            hdim=str(self.F_hdim),
+            dtype=self.F_dtype,
+            mode=self.F_mode,
+            bm0=self.F_tile.F_bm0,
+            bn0=self.F_tile.F_bn0,
+            bk0=self.F_tile.F_bk0,
+            bn1=self.F_tile.F_bn1,
+            bk1=self.F_tile.F_bk1,
+            bk0max=self.F_tile.F_bk0max,
+            vlayout=self.F_pipeline.F_vlayout,
+            mask=self.F_pipeline.F_mask,
+            logits=self.F_pipeline.F_logits,
+            bias=self.F_pipeline.F_bias,
+            lse=self.F_pipeline.F_lse,
+            dropout=self.F_pipeline.F_dropout,
+            squant=self.F_pipeline.F_squant,
+            spad=self.F_pipeline.F_spad,
+            skpad=self.F_pipeline.F_skpad,
+            dpad=self.F_pipeline.F_dpad,
+            dvpad=self.F_pipeline.F_dvpad,
+            constraint=self.F_tile.F_constraint & self.F_pipeline.F_constraint,
+        )
+
 
 class KernelComponentFactory:
     @staticmethod
-    def get_hdim_tile_size_dict(dtype : str) -> Optional[dict]:
-        if dtype == 'fp16' or dtype == 'bf16':
+    def get_hdim_tile_size_dict(dtype: str) -> Optional[dict]:
+        if dtype == "fp16" or dtype == "bf16":
             return {
-                128 : [FmhaFwdTileSize(128, 128, 32, 128, 32,  128,  4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,  -1)],
+                128: [
+                    FmhaFwdTileSize(
+                        128,
+                        128,
+                        32,
+                        128,
+                        32,
+                        128,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        32,
+                        32,
+                        16,
+                        32,
+                        32,
+                        16,
+                        -1,
+                    )
+                ],
             }
         else:
             return None
@@ -502,28 +608,94 @@ class KernelComponentFactory:
         # TODO: the order of List matters! the later in this list will be also be checked later
         # TODO: currently for qr pipeline, let 't' padding to appear later!!
         # TODO: how to design this more generic?
-        squant = 't' if dtype == 'fp8' else 'f'
+        squant = "t" if dtype == "fp8" else "f"
         pipelines = []
-        if dtype in ['fp16', 'bf16']:
-            for logits, mask, bias, lse, dropout in itertools.product(["t", "f"], get_mask_map(mask_impl).keys(), BIAS_MAP.keys(), ["t", "f"], ["t", "f"]):
-                    pipelines.append(FmhaFwdPipeline('qr_async', 'row', 't', 'f', 't', 't', logits, bias, lse, dropout, squant, mask))
-                    pipelines.append(FmhaFwdPipeline('qr_async', 'row', 't', 't', 't', 't', logits, bias, lse, dropout, squant, mask))
-                    # pipelines.append(FmhaFwdPipeline('qr_async', 'col', 't', 'f', 't', 't', logits, bias, lse, dropout, squant, mask))
-                    # pipelines.append(FmhaFwdPipeline('qr_async', 'col', 't', 't', 't', 't', logits, bias, lse, dropout, squant, mask))
+        if dtype in ["fp16", "bf16"]:
+            for logits, mask, bias, lse, dropout in itertools.product(
+                ["t", "f"],
+                get_mask_map(mask_impl).keys(),
+                BIAS_MAP.keys(),
+                ["t", "f"],
+                ["t", "f"],
+            ):
+                pipelines.append(
+                    FmhaFwdPipeline(
+                        "qr_async",
+                        "row",
+                        "t",
+                        "f",
+                        "t",
+                        "t",
+                        logits,
+                        bias,
+                        lse,
+                        dropout,
+                        squant,
+                        mask,
+                    )
+                )
+                pipelines.append(
+                    FmhaFwdPipeline(
+                        "qr_async",
+                        "row",
+                        "t",
+                        "t",
+                        "t",
+                        "t",
+                        logits,
+                        bias,
+                        lse,
+                        dropout,
+                        squant,
+                        mask,
+                    )
+                )
+                # pipelines.append(FmhaFwdPipeline('qr_async', 'col', 't', 'f', 't', 't', logits, bias, lse, dropout, squant, mask))
+                # pipelines.append(FmhaFwdPipeline('qr_async', 'col', 't', 't', 't', 't', logits, bias, lse, dropout, squant, mask))
         else:
             assert False
         return pipelines
 
+
 class CustomFactory(KernelComponentFactory):
     @staticmethod
-    def get_hdim_tile_size_dict(dtype : str) -> Optional[dict]:
+    def get_hdim_tile_size_dict(dtype: str) -> Optional[dict]:
         result = KernelComponentFactory.get_hdim_tile_size_dict(dtype)
-        if dtype == 'fp16' or dtype == 'bf16':
+        if dtype == "fp16" or dtype == "bf16":
             if 128 in result.keys():
-                result[128].insert(0, FmhaFwdTileSize( 64, 128, 64, 128, 64,  128,  4, 1, 1,  4, 1, 1,  16, 16, 16,  16, 16, 16,  -1, CppConstraint('get_num_blocks(128) < num_cus * min_cu_util_rate')))
+                result[128].insert(
+                    0,
+                    FmhaFwdTileSize(
+                        64,
+                        128,
+                        64,
+                        128,
+                        64,
+                        128,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        -1,
+                        CppConstraint(
+                            "get_num_blocks(128) < num_cus * min_cu_util_rate"
+                        ),
+                    ),
+                )
         return result
 
-def get_fwd_blobs(kernel_filter : Optional[str], receipt, optdim_list, mask_impl) -> Tuple[FmhaFwdApiPool, List[FmhaFwdKernel]]:
+
+def get_fwd_blobs(
+    kernel_filter: Optional[str], receipt, optdim_list, mask_impl
+) -> Tuple[FmhaFwdApiPool, List[FmhaFwdKernel]]:
     # TODO: we don't support tuning yet, so pick up one value for vlayout/pipeline/pad
     #       support this in future
 
@@ -532,30 +704,41 @@ def get_fwd_blobs(kernel_filter : Optional[str], receipt, optdim_list, mask_impl
 
     for dtype in FWD_DTYPE_MAP.keys():
         d = CustomFactory.get_hdim_tile_size_dict(dtype)
-        if d == None:
+        if d is None:
             continue
-        #for hdim_str, mode, mask, bias, lse in itertools.product(d.keys(), MODE_MAP.keys(), MASK_MAP.keys(), ["t", "f"], ["t", "f"]):
+        # for hdim_str, mode, mask, bias, lse in itertools.product(d.keys(), MODE_MAP.keys(), MASK_MAP.keys(), ["t", "f"], ["t", "f"]):
         for (hdim, tiles), mode in itertools.product(d.items(), MODE_MAP.keys()):
-            for tile, pipeline in itertools.product(tiles, CustomFactory.get_pipelines(dtype, hdim, receipt, mask_impl)):
+            for tile, pipeline in itertools.product(
+                tiles, CustomFactory.get_pipelines(dtype, hdim, receipt, mask_impl)
+            ):
                 if mode == "group":
-                    if pipeline.F_spad != 't' or pipeline.F_skpad != 't':
+                    if pipeline.F_spad != "t" or pipeline.F_skpad != "t":
                         # in group mode, spad/skpad must be true, since we can't predict if seqlen of current batch need pad or not
                         continue
                 if hdim == 192 and tile.F_bn1 == 128:
                     # NOTE: this is used to speedup deepseek prefill case, we don't gen training
-                    if pipeline.F_bias != 'no' or pipeline.F_lse == 't' or pipeline.F_dropout == 't':
+                    if (
+                        pipeline.F_bias != "no"
+                        or pipeline.F_lse == "t"
+                        or pipeline.F_dropout == "t"
+                    ):
                         continue
                 # logits_soft_cap is only allowed if no bias
-                if not ((pipeline.F_logits == 't' and pipeline.F_bias == 'no') or pipeline.F_logits == 'f'):
+                if not (
+                    (pipeline.F_logits == "t" and pipeline.F_bias == "no")
+                    or pipeline.F_logits == "f"
+                ):
                     continue
-                k = FmhaFwdKernel(F_idx=0,
-                                  F_hdim=hdim,
-                                  F_dtype=dtype,
-                                  F_mode=mode,
-                                  F_tile=tile,
-                                  F_pipeline=pipeline,
-                                  mask_impl=mask_impl)
-                if kernel_filter != '':
+                k = FmhaFwdKernel(
+                    F_idx=0,
+                    F_hdim=hdim,
+                    F_dtype=dtype,
+                    F_mode=mode,
+                    F_tile=tile,
+                    F_pipeline=pipeline,
+                    mask_impl=mask_impl,
+                )
+                if kernel_filter != "":
                     if not fnmatch.fnmatch(k.name, kernel_filter):
                         continue
                 if optdim_list != [-1]:
@@ -563,48 +746,48 @@ def get_fwd_blobs(kernel_filter : Optional[str], receipt, optdim_list, mask_impl
                         continue
                 # 2 - Flash attention integration
                 if receipt in (2, 3):
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_bias in ['no', 'alibi']
-                    cond &= pipeline.F_squant == 'f'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_bias in ["no", "alibi"]
+                    cond &= pipeline.F_squant == "f"
                     if not cond:
                         continue
                 # PyTorch integration
                 elif receipt == 4:
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_bias in ['no', 'bias']
-                    cond &= pipeline.F_squant == 'f'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_bias in ["no", "bias"]
+                    cond &= pipeline.F_squant == "f"
                     if not cond:
                         continue
                 # Aiter(mha_fwd) integration
                 elif receipt == 100:
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= mode == 'batch'
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_squant == 'f'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= mode == "batch"
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_squant == "f"
                     if not cond:
                         continue
                 # Aiter(mha_batch_prefill) integration
                 elif receipt == 200:
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= mode == 'group'
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_squant == 'f'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= mode == "group"
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_squant == "f"
                     if not cond:
                         continue
                 # aiter::mha_batch_prefill C++ api integration
                 elif receipt == 600:
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= mode == 'group'
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_squant == 'f'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= mode == "group"
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_squant == "f"
                     if not cond:
                         continue
 
                 # fp32 only
                 if receipt == 800 or receipt == 801:
-                    cond = dtype == 'fp32'
+                    cond = dtype == "fp32"
                     if not cond:
                         continue
 
@@ -613,20 +796,28 @@ def get_fwd_blobs(kernel_filter : Optional[str], receipt, optdim_list, mask_impl
 
     return (api_pool, gen)
 
+
 def write_single_fwd_kernel(kernel: FmhaFwdKernel, autogen_dir: Path) -> None:
     (autogen_dir / kernel.filename).write_text(kernel.template)
 
-def write_fwd_api(api_pool : FmhaFwdApiPool, autogen_dir: Path) -> None:
+
+def write_fwd_api(api_pool: FmhaFwdApiPool, autogen_dir: Path) -> None:
     (autogen_dir / FMHA_FWD_API_FILENAME).write_text(api_pool.api)
 
-def write_blobs(output_dir : Path, kernel_filter : str, receipt, optdim_list, mask_impl) -> None:
+
+def write_blobs(
+    output_dir: Path, kernel_filter: str, receipt, optdim_list, mask_impl
+) -> None:
     api_pool, kernels = get_fwd_blobs(kernel_filter, receipt, optdim_list, mask_impl)
     for kernel in kernels:
         write_single_fwd_kernel(kernel, output_dir)
     write_fwd_api(api_pool, output_dir)
 
-def list_blobs(file_path : Path, kernel_filter : str, receipt, optdim_list, mask_impl) -> None:
-    with file_path.open('a') as f:
+
+def list_blobs(
+    file_path: Path, kernel_filter: str, receipt, optdim_list, mask_impl
+) -> None:
+    with file_path.open("a") as f:
         _, kernels = get_fwd_blobs(kernel_filter, receipt, optdim_list, mask_impl)
         for kernel in kernels:
             f.write(str(file_path.parent / GEN_DIR / kernel.filename) + "\n")
diff --git a/example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py b/example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py
index 059be0e490..19f5bb2288 100644
--- a/example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py
+++ b/example/ck_tile/01_fmha/codegen/ops/fmha_bwd.py
@@ -10,8 +10,18 @@ from pathlib import Path
 from typing import List, Tuple, Dict, Literal, Any
 from collections import defaultdict
 
-from codegen.cmake_config import *
-from codegen.cpp_symbol_map import *
+from codegen.cmake_config import GEN_DIR
+from codegen.cpp_symbol_map import (
+    get_mask_check_map,
+    BIAS_CHECK_MAP,
+    DROPOUT_CHECK_MAP,
+    MODE_MAP,
+    get_mask_map,
+    BIAS_MAP,
+    DROPOUT_MAP,
+    BWD_DTYPE_MAP,
+    BOOL_MAP,
+)
 from codegen.utils import update_file
 
 
@@ -21,7 +31,7 @@ FMHA_BWD_KERNEL_HEADER = """// SPDX-License-Identifier: MIT
 #include "fmha_bwd.hpp"
 """
 
-FMHA_BWD_DQ_DK_DV_KERNEL_BODY="""
+FMHA_BWD_DQ_DK_DV_KERNEL_BODY = """
 using fmha_dtype_{F_idx} = {F_dtype};
 
 using fmha_block_tile_{F_idx} = ck_tile::
@@ -164,8 +174,8 @@ std::string fmha_bwd_dq_dk_dv_get_name_<dq_dk_dv_trait_{F_idx}>()
 }}
 """
 
-FMHA_BWD_API_FILENAME="fmha_bwd_api.cpp"
-FMHA_BWD_API="""
+FMHA_BWD_API_FILENAME = "fmha_bwd_api.cpp"
+FMHA_BWD_API = """
 #include <iostream>
 
 template <typename dot_do_o_trait_, typename dq_dk_dv_trait_, typename convert_dq_trait_>
@@ -201,17 +211,18 @@ float fmha_bwd<2>(fmha_bwd_traits t, fmha_bwd_args a, const ck_tile::stream_conf
 }}
 """
 
-def FMHA_BWD_API_COND_STATEMENT(F_cond: str, F_body: str, *, indent=0, if_ = 0) -> str:
+
+def FMHA_BWD_API_COND_STATEMENT(F_cond: str, F_body: str, *, indent=0, if_=0) -> str:
     lines = [
         f"{'if' if if_ == 0 else 'else if'}({F_cond})",
         "{",
-        *['    ' + line for line in F_body.split('\n') if line.strip() != ''],
+        *["    " + line for line in F_body.split("\n") if line.strip() != ""],
         "}",
     ]
-    return '\n'.join(' ' * indent + line for line in lines) + '\n'
+    return "\n".join(" " * indent + line for line in lines) + "\n"
 
 
-FMHA_BWD_API_INNER_DISPATCH="""
+FMHA_BWD_API_INNER_DISPATCH = """
 {F_if}((t.is_group_mode == {F_mode}) && ({F_mask_check}) && (t.bias_type == {F_bias_check}) && (t.has_dbias == {F_dbias}) && ({F_dropout_check}) &&
         ({F_scheck}) && ({F_dcheck}) && ({F_dvcheck}) && (t.is_deterministic == {F_deterministic}){F_cond_extra}) {{
     using dot_do_o_trait_ = fmha_bwd_dot_do_o_traits_<{F_hdim}, {F_dtype}, {F_mode}, {F_spad1d}, ({F_dvpad} > 0)>;
@@ -225,6 +236,7 @@ FMHA_BWD_API_INNER_DISPATCH="""
 # M0 size for 1d kernels (dot/convert)
 M0_1D = 64
 
+
 # GEMM0: Q@K=S^T
 # GEMM1: P^T@dO^T=dV(This was chosen as G1 to match fwd, but N1 must be equal to headdim_v)
 # GEMM2: dO@V=dP^T(This was chosen as G2 because of the calculation order)
@@ -233,174 +245,537 @@ M0_1D = 64
 # Is it necessary to distinguish between K0~K4?
 @dataclass(frozen=True)
 class FmhaBwdDQDKDVTileSize:
-    F_bm0       : int  # tile size along q seqlen (block size)
-    F_bn0       : int  # tile size along k seqlen
-    F_bk0       : int  # tile size along gemm0 unroll(F_bhdq)
-    F_bk1       : int  # tile size along gemm1 unroll(F_bm0)
-    F_bk2       : int  # tile size along gemm2 unroll(F_bhdv)
-    F_bk3       : int  # tile size along gemm3 unroll(F_bm0)
-    F_bk4       : int  # tile size along gemm4 unroll(F_bn0)
-    F_bhdq      : int  # q head_dim
-    F_bhdv      : int  # v head_dim
-    F_rm0       : int  # number of warps along q seqlen (block warps) in gemm0/gemm2
-    F_rn0       : int  # number of warps along k seqlen (block warps) in gemm0/gemm2
-    F_rk0       : int  # number of warps along headdim_qk/v (not used) in gemm0/gemm2
-    F_rm1       : int  # number of warps along k seqlen (block warps) in gemm1/gemm3
-    F_rn1       : int  # number of warps along headdim_qk/v (block warps) in gemm1/gemm3
-    F_rk1       : int  # number of warps along q seqlen (not used) in gemm1/gemm3
-    F_rm2       : int  # number of warps along q seqlen (block warps) in gemm4
-    F_rn2       : int  # number of warps along headdim_qk (block warps) in gemm4
-    F_rk2       : int  # number of warps along k seqlen (not used) in gemm4
-    F_wm0       : int  # warp size along m in gemm0/gemm2/gemm4
-    F_wn0       : int  # warp size along n in gemm0/gemm2/gemm4
-    F_wk0       : int  # warp size along k in gemm0/gemm2/gemm4
-    F_wm1       : int  # warp size along m in gemm1/gemm3
-    F_wn1       : int  # warp size along n in gemm1/gemm3
-    F_wk1       : int  # warp size along k in gemm1/gemm3
-    F_occupancy : int  # occupancy
-    max_seq_q   : int = 0
+    F_bm0: int  # tile size along q seqlen (block size)
+    F_bn0: int  # tile size along k seqlen
+    F_bk0: int  # tile size along gemm0 unroll(F_bhdq)
+    F_bk1: int  # tile size along gemm1 unroll(F_bm0)
+    F_bk2: int  # tile size along gemm2 unroll(F_bhdv)
+    F_bk3: int  # tile size along gemm3 unroll(F_bm0)
+    F_bk4: int  # tile size along gemm4 unroll(F_bn0)
+    F_bhdq: int  # q head_dim
+    F_bhdv: int  # v head_dim
+    F_rm0: int  # number of warps along q seqlen (block warps) in gemm0/gemm2
+    F_rn0: int  # number of warps along k seqlen (block warps) in gemm0/gemm2
+    F_rk0: int  # number of warps along headdim_qk/v (not used) in gemm0/gemm2
+    F_rm1: int  # number of warps along k seqlen (block warps) in gemm1/gemm3
+    F_rn1: int  # number of warps along headdim_qk/v (block warps) in gemm1/gemm3
+    F_rk1: int  # number of warps along q seqlen (not used) in gemm1/gemm3
+    F_rm2: int  # number of warps along q seqlen (block warps) in gemm4
+    F_rn2: int  # number of warps along headdim_qk (block warps) in gemm4
+    F_rk2: int  # number of warps along k seqlen (not used) in gemm4
+    F_wm0: int  # warp size along m in gemm0/gemm2/gemm4
+    F_wn0: int  # warp size along n in gemm0/gemm2/gemm4
+    F_wk0: int  # warp size along k in gemm0/gemm2/gemm4
+    F_wm1: int  # warp size along m in gemm1/gemm3
+    F_wn1: int  # warp size along n in gemm1/gemm3
+    F_wk1: int  # warp size along k in gemm1/gemm3
+    F_occupancy: int  # occupancy
+    max_seq_q: int = 0
 
     @property
     def name(self) -> str:
-        return f"b{self.F_bm0}x{self.F_bn0}x{self.F_bk0}x{self.F_bk1}x{self.F_bk2}x{self.F_bk3}x{self.F_bk4}x{self.F_bhdq}x{self.F_bhdv}" +\
-        f"_r{self.F_rm0}x{self.F_rn0}x{self.F_rk0}_r{self.F_rm1}x{self.F_rn1}x{self.F_rk1}_r{self.F_rm2}x{self.F_rn2}x{self.F_rk2}" +\
-        f"_w{self.F_wm0}x{self.F_wn0}x{self.F_wk0}_w{self.F_wm1}x{self.F_wn1}x{self.F_wk1}_o{self.F_occupancy}_maxq{self.max_seq_q}"
+        return (
+            f"b{self.F_bm0}x{self.F_bn0}x{self.F_bk0}x{self.F_bk1}x{self.F_bk2}x{self.F_bk3}x{self.F_bk4}x{self.F_bhdq}x{self.F_bhdv}"
+            + f"_r{self.F_rm0}x{self.F_rn0}x{self.F_rk0}_r{self.F_rm1}x{self.F_rn1}x{self.F_rk1}_r{self.F_rm2}x{self.F_rn2}x{self.F_rk2}"
+            + f"_w{self.F_wm0}x{self.F_wn0}x{self.F_wk0}_w{self.F_wm1}x{self.F_wn1}x{self.F_wk1}_o{self.F_occupancy}_maxq{self.max_seq_q}"
+        )
+
 
 @dataclass(frozen=True)
 class FmhaBwdDQDKDVKernel:
-    F_idx           : int  # this is not a tunable, but a counter to differentiate symbol
-    F_hdim          : int  # hdim
-    F_dtype         : str  # data type
-    F_tile          : FmhaBwdDQDKDVTileSize
-    F_dpad          : Literal[0, 8 ,1]
-    F_dvpad         : Literal[0, 8 ,1]
-    F_bias          : str  #
-    F_dbias         : str  #
-    F_dropout       : str  #
-    F_mask          : str  # value from MASK_MAP
-    F_mode          : str  # value from MODE_MAP
-    F_deterministic : str  #
-    mask_impl       : str  #
-    F_trload       : str  #
+    F_idx: int  # this is not a tunable, but a counter to differentiate symbol
+    F_hdim: int  # hdim
+    F_dtype: str  # data type
+    F_tile: FmhaBwdDQDKDVTileSize
+    F_dpad: Literal[0, 8, 1]
+    F_dvpad: Literal[0, 8, 1]
+    F_bias: str  #
+    F_dbias: str  #
+    F_dropout: str  #
+    F_mask: str  # value from MASK_MAP
+    F_mode: str  # value from MODE_MAP
+    F_deterministic: str  #
+    mask_impl: str  #
+    F_trload: str  #
 
     @property
     def template(self) -> str:
-        return FMHA_BWD_KERNEL_HEADER + \
-            FMHA_BWD_DQ_DK_DV_KERNEL_BODY.format(
-                F_idx           = self.F_idx,
-                F_hdim          = self.F_hdim,
-                F_dtype         = BWD_DTYPE_MAP[self.F_dtype],
-                F_bm0           = self.F_tile.F_bm0,
-                F_bn0           = self.F_tile.F_bn0,
-                F_bk0           = self.F_tile.F_bk0,
-                F_bk1           = self.F_tile.F_bk1,
-                F_bk2           = self.F_tile.F_bk2,
-                F_bk3           = self.F_tile.F_bk3,
-                F_bk4           = self.F_tile.F_bk4,
-                F_bhdq          = self.F_tile.F_bhdq,
-                F_bhdv          = self.F_tile.F_bhdv,
-                F_rm0           = self.F_tile.F_rm0,
-                F_rn0           = self.F_tile.F_rn0,
-                F_rk0           = self.F_tile.F_rk0,
-                F_rm1           = self.F_tile.F_rm1,
-                F_rn1           = self.F_tile.F_rn1,
-                F_rk1           = self.F_tile.F_rk1,
-                F_rm2           = self.F_tile.F_rm2,
-                F_rn2           = self.F_tile.F_rn2,
-                F_rk2           = self.F_tile.F_rk2,
-                F_wm0           = self.F_tile.F_wm0,
-                F_wn0           = self.F_tile.F_wn0,
-                F_wk0           = self.F_tile.F_wk0,
-                F_wm1           = self.F_tile.F_wm1,
-                F_wn1           = self.F_tile.F_wn1,
-                F_wk1           = self.F_tile.F_wk1,
-                F_dpad          = self.F_dpad,
-                F_dvpad         = self.F_dvpad,
-                F_bias          = BIAS_MAP[self.F_bias],
-                F_dbias         = BOOL_MAP[self.F_dbias],
-                F_dropout       = DROPOUT_MAP[self.F_dropout],
-                F_occupancy     = self.F_tile.F_occupancy,
-                F_mask          = get_mask_map(self.mask_impl)[self.F_mask],
-                F_mode          = MODE_MAP[self.F_mode],
-                F_deterministic = BOOL_MAP[self.F_deterministic],
-                F_trload        = BOOL_MAP[self.F_trload],
-                F_maxq          = self.F_tile.max_seq_q
-            )
+        return FMHA_BWD_KERNEL_HEADER + FMHA_BWD_DQ_DK_DV_KERNEL_BODY.format(
+            F_idx=self.F_idx,
+            F_hdim=self.F_hdim,
+            F_dtype=BWD_DTYPE_MAP[self.F_dtype],
+            F_bm0=self.F_tile.F_bm0,
+            F_bn0=self.F_tile.F_bn0,
+            F_bk0=self.F_tile.F_bk0,
+            F_bk1=self.F_tile.F_bk1,
+            F_bk2=self.F_tile.F_bk2,
+            F_bk3=self.F_tile.F_bk3,
+            F_bk4=self.F_tile.F_bk4,
+            F_bhdq=self.F_tile.F_bhdq,
+            F_bhdv=self.F_tile.F_bhdv,
+            F_rm0=self.F_tile.F_rm0,
+            F_rn0=self.F_tile.F_rn0,
+            F_rk0=self.F_tile.F_rk0,
+            F_rm1=self.F_tile.F_rm1,
+            F_rn1=self.F_tile.F_rn1,
+            F_rk1=self.F_tile.F_rk1,
+            F_rm2=self.F_tile.F_rm2,
+            F_rn2=self.F_tile.F_rn2,
+            F_rk2=self.F_tile.F_rk2,
+            F_wm0=self.F_tile.F_wm0,
+            F_wn0=self.F_tile.F_wn0,
+            F_wk0=self.F_tile.F_wk0,
+            F_wm1=self.F_tile.F_wm1,
+            F_wn1=self.F_tile.F_wn1,
+            F_wk1=self.F_tile.F_wk1,
+            F_dpad=self.F_dpad,
+            F_dvpad=self.F_dvpad,
+            F_bias=BIAS_MAP[self.F_bias],
+            F_dbias=BOOL_MAP[self.F_dbias],
+            F_dropout=DROPOUT_MAP[self.F_dropout],
+            F_occupancy=self.F_tile.F_occupancy,
+            F_mask=get_mask_map(self.mask_impl)[self.F_mask],
+            F_mode=MODE_MAP[self.F_mode],
+            F_deterministic=BOOL_MAP[self.F_deterministic],
+            F_trload=BOOL_MAP[self.F_trload],
+            F_maxq=self.F_tile.max_seq_q,
+        )
 
     @property
     def name(self) -> str:
         def pad_name() -> str:
-            n = ''
-            if self.F_dpad : n += f'd{self.F_dpad}'
-            if self.F_dvpad : n += f'dv{self.F_dvpad}'
-            if n != '' : n = 'p' + n
+            n = ""
+            if self.F_dpad:
+                n += f"d{self.F_dpad}"
+            if self.F_dvpad:
+                n += f"dv{self.F_dvpad}"
+            if n != "":
+                n = "p" + n
             return n
+
         pn = pad_name()
         n = f"fmha_bwd_d{self.F_hdim}_{self.F_dtype}_{self.F_mode}_" + self.F_tile.name
-        if pn != '' : n += f'_{pn}'
-        else: n += '_npad'
-
-        if self.F_bias != 'no' : n += f'_{self.F_bias}'
-        else: n += '_nbias'
-
-        if self.F_dbias == 't' : n += '_dbias'
-        else: n += '_ndbias'
-
-        if self.F_mask[0:2] == 's_':
-            if self.F_mask == 's_mask': n += f'_mask'
-            else: n += '_nmask'
+        if pn != "":
+            n += f"_{pn}"
         else:
-            if self.F_mask != 'no' : n += f'_m{self.F_mask[0]}'
-            else: n += '_nmask'
+            n += "_npad"
 
-        if self.F_dropout != 'no' : n += f'_{self.F_dropout}'
-        else: n += '_ndropout'
+        if self.F_bias != "no":
+            n += f"_{self.F_bias}"
+        else:
+            n += "_nbias"
 
-        if self.F_deterministic == 't' : n += '_deterministic'
-        else: n += '_ndeterministic'
+        if self.F_dbias == "t":
+            n += "_dbias"
+        else:
+            n += "_ndbias"
 
-        if self.F_trload == 't' : n += '_trload'
-        else: n += '_ntrload'
+        if self.F_mask[0:2] == "s_":
+            if self.F_mask == "s_mask":
+                n += "_mask"
+            else:
+                n += "_nmask"
+        else:
+            if self.F_mask != "no":
+                n += f"_m{self.F_mask[0]}"
+            else:
+                n += "_nmask"
+
+        if self.F_dropout != "no":
+            n += f"_{self.F_dropout}"
+        else:
+            n += "_ndropout"
+
+        if self.F_deterministic == "t":
+            n += "_deterministic"
+        else:
+            n += "_ndeterministic"
+
+        if self.F_trload == "t":
+            n += "_trload"
+        else:
+            n += "_ntrload"
         return n
 
     @property
     def filename(self) -> str:
         return self.name + ".cpp"
 
+
 # TODO: design a more practical way to do it
 # this is current supported tile size.
-def get_dq_dk_dv_tiles(dtype : str, tr_load: str) -> List[FmhaBwdDQDKDVTileSize]:
-    if dtype == 'fp32' and tr_load == 'f':
+def get_dq_dk_dv_tiles(dtype: str, tr_load: str) -> List[FmhaBwdDQDKDVTileSize]:
+    if dtype == "fp32" and tr_load == "f":
         return [
             #                     bm0, bn0, bk0, bk1, bk2, bk3, bk4, bhdq, bhdv,
-            FmhaBwdDQDKDVTileSize( 32, 128,  32,  32,  32,  32,  64,   32,   32, 1, 4, 1, 4, 1, 1, 2, 2, 1, 16, 16, 16, 16, 16, 16, 1),
-            FmhaBwdDQDKDVTileSize( 16,  64,  64,  16,  64,  16,  16,   64,   64, 1, 4, 1, 4, 1, 1, 1, 4, 1, 16, 16, 16, 16, 16, 16, 1),
-            FmhaBwdDQDKDVTileSize( 16,  64, 128,  16, 128,  16,  16,  128,  128, 1, 4, 1, 4, 1, 1, 1, 4, 1, 16, 16, 16, 16, 16, 16, 1),
+            FmhaBwdDQDKDVTileSize(
+                32,
+                128,
+                32,
+                32,
+                32,
+                32,
+                64,
+                32,
+                32,
+                1,
+                4,
+                1,
+                4,
+                1,
+                1,
+                2,
+                2,
+                1,
+                16,
+                16,
+                16,
+                16,
+                16,
+                16,
+                1,
+            ),
+            FmhaBwdDQDKDVTileSize(
+                16,
+                64,
+                64,
+                16,
+                64,
+                16,
+                16,
+                64,
+                64,
+                1,
+                4,
+                1,
+                4,
+                1,
+                1,
+                1,
+                4,
+                1,
+                16,
+                16,
+                16,
+                16,
+                16,
+                16,
+                1,
+            ),
+            FmhaBwdDQDKDVTileSize(
+                16,
+                64,
+                128,
+                16,
+                128,
+                16,
+                16,
+                128,
+                128,
+                1,
+                4,
+                1,
+                4,
+                1,
+                1,
+                1,
+                4,
+                1,
+                16,
+                16,
+                16,
+                16,
+                16,
+                16,
+                1,
+            ),
         ]
-    elif (dtype == 'fp16' or dtype == 'bf16') and tr_load == 'f':
+    elif (dtype == "fp16" or dtype == "bf16") and tr_load == "f":
         return [
-            FmhaBwdDQDKDVTileSize( 32, 128,  32, 32,  32, 32, 64,  32,  32, 1, 4, 1, 4, 1, 1, 2, 2, 1, 16, 16, 32, 16, 16, 16, 1),
-            FmhaBwdDQDKDVTileSize( 32, 128,  64, 32,  64, 32, 32,  64,  64, 1, 4, 1, 4, 1, 1, 1, 4, 1, 16, 16, 32, 16, 16, 16, 1),
-            FmhaBwdDQDKDVTileSize( 32, 128,  96, 32,  96, 32, 32,  96,  96, 1, 4, 1, 4, 1, 1, 2, 2, 1, 16, 16, 32, 16, 16, 16, 1),
-            FmhaBwdDQDKDVTileSize( 16, 128, 128, 16, 128, 16, 32, 128, 128, 1, 4, 1, 4, 1, 1, 1, 4, 1, 16, 16, 32, 16, 16, 16, 1),
+            FmhaBwdDQDKDVTileSize(
+                32,
+                128,
+                32,
+                32,
+                32,
+                32,
+                64,
+                32,
+                32,
+                1,
+                4,
+                1,
+                4,
+                1,
+                1,
+                2,
+                2,
+                1,
+                16,
+                16,
+                32,
+                16,
+                16,
+                16,
+                1,
+            ),
+            FmhaBwdDQDKDVTileSize(
+                32,
+                128,
+                64,
+                32,
+                64,
+                32,
+                32,
+                64,
+                64,
+                1,
+                4,
+                1,
+                4,
+                1,
+                1,
+                1,
+                4,
+                1,
+                16,
+                16,
+                32,
+                16,
+                16,
+                16,
+                1,
+            ),
+            FmhaBwdDQDKDVTileSize(
+                32,
+                128,
+                96,
+                32,
+                96,
+                32,
+                32,
+                96,
+                96,
+                1,
+                4,
+                1,
+                4,
+                1,
+                1,
+                2,
+                2,
+                1,
+                16,
+                16,
+                32,
+                16,
+                16,
+                16,
+                1,
+            ),
+            FmhaBwdDQDKDVTileSize(
+                16,
+                128,
+                128,
+                16,
+                128,
+                16,
+                32,
+                128,
+                128,
+                1,
+                4,
+                1,
+                4,
+                1,
+                1,
+                1,
+                4,
+                1,
+                16,
+                16,
+                32,
+                16,
+                16,
+                16,
+                1,
+            ),
             # FmhaBwdDQDKDVTileSize( 32, 64, 160, 32, 160, 32, 32, 160, 160, 1, 4, 1, 4, 1, 1, 2, 2, 1, 16, 16, 32, 16, 16, 16, 1),
-            FmhaBwdDQDKDVTileSize( 16,  64, 256, 16, 256, 16, 32, 256, 256, 1, 4, 1, 4, 1, 1, 1, 4, 1, 16, 16, 32, 16, 16, 16, 1),
+            FmhaBwdDQDKDVTileSize(
+                16,
+                64,
+                256,
+                16,
+                256,
+                16,
+                32,
+                256,
+                256,
+                1,
+                4,
+                1,
+                4,
+                1,
+                1,
+                1,
+                4,
+                1,
+                16,
+                16,
+                32,
+                16,
+                16,
+                16,
+                1,
+            ),
         ]
-    elif (dtype == 'fp16' or dtype == 'bf16') and tr_load == 't':
+    elif (dtype == "fp16" or dtype == "bf16") and tr_load == "t":
         return [
-                FmhaBwdDQDKDVTileSize( 32, 128,  64, 32,  64, 32, 32,  64,  64, 1, 4, 1, 4, 1, 1, 1, 4, 1, 16, 16, 32, 16, 16, 32, 1),
-                FmhaBwdDQDKDVTileSize( 32, 128, 128, 32, 128, 32, 32, 128, 128, 1, 4, 1, 4, 1, 1, 1, 4, 1, 16, 16, 32, 16, 16, 32, 1),
-                FmhaBwdDQDKDVTileSize( 16, 192, 128, 16, 128, 16, 32, 128, 128, 1, 4, 1, 4, 1, 1, 1, 4, 1, 16, 16, 32, 16, 16, 16, 1),
-
-                # FmhaBwdDQDKDVTileSize( 32,  32,  64, 32,  64, 32, 32,  64,  64, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 32, 16, 16, 32, 1, 32),
-                FmhaBwdDQDKDVTileSize( 32,  16,  64, 32,  64, 32, 16,  64,  64, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 32, 16, 16, 16, 2, 32),
-                # FmhaBwdDQDKDVTileSize( 16, 32, 128, 16, 128, 16, 32, 128, 128, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 32, 16, 16, 16, 1, 16),
-                FmhaBwdDQDKDVTileSize( 16,  16, 128, 16, 128, 16, 16, 128, 128, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 32, 16, 16, 16, 2, 16),
+            FmhaBwdDQDKDVTileSize(
+                32,
+                128,
+                64,
+                32,
+                64,
+                32,
+                32,
+                64,
+                64,
+                1,
+                4,
+                1,
+                4,
+                1,
+                1,
+                1,
+                4,
+                1,
+                16,
+                16,
+                32,
+                16,
+                16,
+                32,
+                1,
+            ),
+            FmhaBwdDQDKDVTileSize(
+                32,
+                128,
+                128,
+                32,
+                128,
+                32,
+                32,
+                128,
+                128,
+                1,
+                4,
+                1,
+                4,
+                1,
+                1,
+                1,
+                4,
+                1,
+                16,
+                16,
+                32,
+                16,
+                16,
+                32,
+                1,
+            ),
+            FmhaBwdDQDKDVTileSize(
+                16,
+                192,
+                128,
+                16,
+                128,
+                16,
+                32,
+                128,
+                128,
+                1,
+                4,
+                1,
+                4,
+                1,
+                1,
+                1,
+                4,
+                1,
+                16,
+                16,
+                32,
+                16,
+                16,
+                16,
+                1,
+            ),
+            # FmhaBwdDQDKDVTileSize( 32,  32,  64, 32,  64, 32, 32,  64,  64, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 32, 16, 16, 32, 1, 32),
+            FmhaBwdDQDKDVTileSize(
+                32,
+                16,
+                64,
+                32,
+                64,
+                32,
+                16,
+                64,
+                64,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                16,
+                16,
+                32,
+                16,
+                16,
+                16,
+                2,
+                32,
+            ),
+            # FmhaBwdDQDKDVTileSize( 16, 32, 128, 16, 128, 16, 32, 128, 128, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 32, 16, 16, 16, 1, 16),
+            FmhaBwdDQDKDVTileSize(
+                16,
+                16,
+                128,
+                16,
+                128,
+                16,
+                16,
+                128,
+                128,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                16,
+                16,
+                32,
+                16,
+                16,
+                16,
+                2,
+                16,
+            ),
         ]
     else:
         return []
 
-FMHA_BWD_DOT_DO_O_KERNEL_BODY="""
+
+FMHA_BWD_DOT_DO_O_KERNEL_BODY = """
 using fmha_dtype_{F_idx} = {F_dtype};
 
 using fmha_bwd_dot_do_o_trait_{F_idx} =
@@ -458,47 +833,55 @@ std::string fmha_bwd_dot_do_o_get_name_<dot_do_o_trait_{F_idx}>()
 }}
 """
 
+
 @dataclass(frozen=True)
 class FmhaBwdOGradDotOKernel:
-    F_idx       : int  # this is not a tunable, but a counter to differentiate symbol
-    F_hdim      : int  # hdim
-    F_dtype     : str  # data type
-    F_spad      : str  # true/false
-    F_dvpad     : str  #
-    F_mode      : str  # value from MODE_MAP
-    F_occupancy : int
+    F_idx: int  # this is not a tunable, but a counter to differentiate symbol
+    F_hdim: int  # hdim
+    F_dtype: str  # data type
+    F_spad: str  # true/false
+    F_dvpad: str  #
+    F_mode: str  # value from MODE_MAP
+    F_occupancy: int
 
     @property
     def template(self) -> str:
-        return FMHA_BWD_KERNEL_HEADER + \
-            FMHA_BWD_DOT_DO_O_KERNEL_BODY.format(
-                F_idx       = self.F_idx,
-                F_hdim      = self.F_hdim,
-                F_dtype     = BWD_DTYPE_MAP[self.F_dtype],
-                F_spad      = BOOL_MAP[self.F_spad],
-                F_dvpad     = BOOL_MAP[self.F_dvpad],
-                F_mode      = MODE_MAP[self.F_mode],
-                F_occupancy = self.F_occupancy)
+        return FMHA_BWD_KERNEL_HEADER + FMHA_BWD_DOT_DO_O_KERNEL_BODY.format(
+            F_idx=self.F_idx,
+            F_hdim=self.F_hdim,
+            F_dtype=BWD_DTYPE_MAP[self.F_dtype],
+            F_spad=BOOL_MAP[self.F_spad],
+            F_dvpad=BOOL_MAP[self.F_dvpad],
+            F_mode=MODE_MAP[self.F_mode],
+            F_occupancy=self.F_occupancy,
+        )
 
     @property
     def name(self) -> str:
         def pad_name() -> str:
-            n = ''
-            if self.F_spad == 't': n += 's'
-            if self.F_dvpad == 't' : n += 'dv'
-            if n != '' : n = 'p' + n
+            n = ""
+            if self.F_spad == "t":
+                n += "s"
+            if self.F_dvpad == "t":
+                n += "dv"
+            if n != "":
+                n = "p" + n
             return n
+
         pn = pad_name()
         n = f"fmha_bwd_dot_do_o_d{self.F_hdim}_{self.F_dtype}_{self.F_mode}_o{self.F_occupancy}"
-        if pn != '' : n += f'_{pn}'
-        else: n += '_npad'
+        if pn != "":
+            n += f"_{pn}"
+        else:
+            n += "_npad"
         return n
 
     @property
     def filename(self) -> str:
         return self.name + ".cpp"
 
-FMHA_BWD_CONVERT_DQ_KERNEL_BODY="""
+
+FMHA_BWD_CONVERT_DQ_KERNEL_BODY = """
 using fmha_dtype_{F_idx} = {F_dtype};
 
 using fmha_bwd_convert_dq_trait_{F_idx} =
@@ -565,116 +948,133 @@ std::string fmha_bwd_convert_dq_get_name_<convert_dq_trait_{F_idx}>()
 }}
 """
 
+
 @dataclass(frozen=True)
 class FmhaBwdConvertQGradKernel:
-    F_idx           : int  # this is not a tunable, but a counter to differentiate symbol
-    F_hdim          : int  # hdim
-    F_dtype         : str  # data type
-    F_bm0           : int  # tile size along q seqlen (block size)
-    F_bn0           : int  # tile size along k seqlen
-    F_spad          : str  # true/false
-    F_dpad          : str  #
-    F_mode          : str  # value from MODE_MAP
-    F_occupancy     : int  #
-    F_deterministic : str  #
-    disabled        : bool # sometimes this kernel is not used
+    F_idx: int  # this is not a tunable, but a counter to differentiate symbol
+    F_hdim: int  # hdim
+    F_dtype: str  # data type
+    F_bm0: int  # tile size along q seqlen (block size)
+    F_bn0: int  # tile size along k seqlen
+    F_spad: str  # true/false
+    F_dpad: str  #
+    F_mode: str  # value from MODE_MAP
+    F_occupancy: int  #
+    F_deterministic: str  #
+    disabled: bool  # sometimes this kernel is not used
 
     @property
     def template(self) -> str:
-        return FMHA_BWD_KERNEL_HEADER + \
-            FMHA_BWD_CONVERT_DQ_KERNEL_BODY.format(
-                F_idx           = self.F_idx,
-                F_hdim          = self.F_hdim,
-                F_dtype         = BWD_DTYPE_MAP[self.F_dtype],
-                F_bm0           = self.F_bm0,
-                F_bn0           = self.F_bn0,
-                F_spad          = BOOL_MAP[self.F_spad],
-                F_dpad          = BOOL_MAP[self.F_dpad],
-                F_mode          = MODE_MAP[self.F_mode],
-                F_occupancy     = self.F_occupancy,
-                F_deterministic = BOOL_MAP[self.F_deterministic])
+        return FMHA_BWD_KERNEL_HEADER + FMHA_BWD_CONVERT_DQ_KERNEL_BODY.format(
+            F_idx=self.F_idx,
+            F_hdim=self.F_hdim,
+            F_dtype=BWD_DTYPE_MAP[self.F_dtype],
+            F_bm0=self.F_bm0,
+            F_bn0=self.F_bn0,
+            F_spad=BOOL_MAP[self.F_spad],
+            F_dpad=BOOL_MAP[self.F_dpad],
+            F_mode=MODE_MAP[self.F_mode],
+            F_occupancy=self.F_occupancy,
+            F_deterministic=BOOL_MAP[self.F_deterministic],
+        )
 
     @property
     def name(self) -> str:
         def pad_name() -> str:
-            n = ''
-            if self.F_spad == 't': n += 's'
-            if self.F_dpad == 't' : n += 'd'
-            if n != '' : n = 'p' + n
+            n = ""
+            if self.F_spad == "t":
+                n += "s"
+            if self.F_dpad == "t":
+                n += "d"
+            if n != "":
+                n = "p" + n
             return n
+
         pn = pad_name()
         n = f"fmha_bwd_convert_dq_d{self.F_hdim}_{self.F_dtype}_b{self.F_bm0}x{self.F_bn0}_{self.F_mode}_o{self.F_occupancy}"
-        if pn != '' : n += f'_{pn}'
-        else: n += '_npad'
-        if self.F_deterministic == 't' : n += '_deterministic'
-        else: n += '_ndeterministic'
+        if pn != "":
+            n += f"_{pn}"
+        else:
+            n += "_npad"
+        if self.F_deterministic == "t":
+            n += "_deterministic"
+        else:
+            n += "_ndeterministic"
         return n
 
     @property
     def filename(self) -> str:
         return self.name + ".cpp"
 
+
 @dataclass(frozen=True)
 class FmhaBwdApiTrait:
-    idx           : int  # this is not a tunable, but a counter to differentiate symbol
+    idx: int  # this is not a tunable, but a counter to differentiate symbol
     # sync with fmha_bwd_traits<>, to generate fallback calls
-    hdim          : int
-    dtype         : str  # data type
-    mode          : str  # value from MODE_MAP
-    tile          : FmhaBwdDQDKDVTileSize
-    mask          : str
-    bias          : str
-    dbias         : str
-    dropout       : str
-    spad1d        : str # spad for 1d kernels (dot/convert)
-    dpad          : Literal[0, 1, 8]
-    dvpad         : Literal[0, 1, 8]
-    deterministic : str
-    mask_impl     : str
-    tr_load       : str
+    hdim: int
+    dtype: str  # data type
+    mode: str  # value from MODE_MAP
+    tile: FmhaBwdDQDKDVTileSize
+    mask: str
+    bias: str
+    dbias: str
+    dropout: str
+    spad1d: str  # spad for 1d kernels (dot/convert)
+    dpad: Literal[0, 1, 8]
+    dvpad: Literal[0, 1, 8]
+    deterministic: str
+    mask_impl: str
+    tr_load: str
 
     @property
     def bm0(self) -> int:
         return self.tile.F_bm0
+
     @property
     def bn0(self) -> int:
         return self.tile.F_bn0
+
     @property
     def bhdq(self) -> int:
         return self.tile.F_bhdq
+
     @property
     def bhdv(self) -> int:
         return self.tile.F_bhdv
 
     @property
     def scheck(self) -> str:
-        if self.mode == 'group':
-            return 'true' # always support
-        elif self.spad1d == 't':
-            return f'a.seqlen_q % {M0_1D} != 0'
-        else: # self.spad1d == 'f'
-            return f'a.seqlen_q % {M0_1D} == 0'
+        if self.mode == "group":
+            return "true"  # always support
+        elif self.spad1d == "t":
+            return f"a.seqlen_q % {M0_1D} != 0"
+        else:  # self.spad1d == 'f'
+            return f"a.seqlen_q % {M0_1D} == 0"
 
     @property
     def dcheck(self) -> str:
-        if self.dpad == 0: return f'a.hdim_q % {self.bhdq} == 0'
-        else:              return f'a.hdim_q % {self.dpad} == 0'
+        if self.dpad == 0:
+            return f"a.hdim_q % {self.bhdq} == 0"
+        else:
+            return f"a.hdim_q % {self.dpad} == 0"
 
     @property
     def dvcheck(self) -> str:
-        if self.dvpad == 0: return f'a.hdim_v % {self.bhdv} == 0'
-        else:               return f'a.hdim_v % {self.dvpad} == 0'
+        if self.dvpad == 0:
+            return f"a.hdim_v % {self.bhdv} == 0"
+        else:
+            return f"a.hdim_v % {self.dvpad} == 0"
 
     @property
     def extra_cond(self) -> str:
-        if self.tr_load == 't' and self.tile.max_seq_q == 0 and self.tile.F_bn0 == 128:
+        if self.tr_load == "t" and self.tile.max_seq_q == 0 and self.tile.F_bn0 == 128:
             return "&& (a.seqlen_k <= 256)"
         else:
             return ""
-    
+
     @property
     def convert_dq_bn0(self) -> int:
-        return self.tile.F_bn0 if self.deterministic == 't' else 0
+        return self.tile.F_bn0 if self.deterministic == "t" else 0
 
     @property
     def dot_do_o_kernel(self) -> FmhaBwdOGradDotOKernel:
@@ -683,15 +1083,35 @@ class FmhaBwdApiTrait:
         def get_occupancy(dtype, hdim):
             return 2
 
-        F_dvpad = 't' if self.dvpad else 'f'
-        return FmhaBwdOGradDotOKernel(F_idx=self.idx, F_hdim=self.hdim, F_dtype=self.dtype, F_spad=self.spad1d,
-            F_dvpad=F_dvpad, F_mode=self.mode, F_occupancy=get_occupancy(self.dtype, self.hdim))
+        F_dvpad = "t" if self.dvpad else "f"
+        return FmhaBwdOGradDotOKernel(
+            F_idx=self.idx,
+            F_hdim=self.hdim,
+            F_dtype=self.dtype,
+            F_spad=self.spad1d,
+            F_dvpad=F_dvpad,
+            F_mode=self.mode,
+            F_occupancy=get_occupancy(self.dtype, self.hdim),
+        )
 
     @property
     def dq_dk_dv_kernel(self) -> FmhaBwdDQDKDVKernel:
-        return FmhaBwdDQDKDVKernel(F_idx=self.idx, F_hdim=self.hdim, F_dtype=self.dtype, F_tile=self.tile,
-            F_dpad=self.dpad, F_dvpad=self.dvpad, F_bias=self.bias, F_dbias=self.dbias, F_dropout=self.dropout,
-            F_mask=self.mask, F_mode=self.mode, F_deterministic=self.deterministic, mask_impl=self.mask_impl, F_trload=self.tr_load)
+        return FmhaBwdDQDKDVKernel(
+            F_idx=self.idx,
+            F_hdim=self.hdim,
+            F_dtype=self.dtype,
+            F_tile=self.tile,
+            F_dpad=self.dpad,
+            F_dvpad=self.dvpad,
+            F_bias=self.bias,
+            F_dbias=self.dbias,
+            F_dropout=self.dropout,
+            F_mask=self.mask,
+            F_mode=self.mode,
+            F_deterministic=self.deterministic,
+            mask_impl=self.mask_impl,
+            F_trload=self.tr_load,
+        )
 
     @property
     def convert_dq_kernel(self) -> FmhaBwdConvertQGradKernel:
@@ -700,44 +1120,76 @@ class FmhaBwdApiTrait:
         def get_occupancy(dtype, hdim):
             return 2
 
-        F_dpad = 't' if self.dpad else 'f'
-        return FmhaBwdConvertQGradKernel(F_idx=self.idx, F_hdim=self.hdim, F_dtype=self.dtype,
-            F_bm0=M0_1D, F_bn0=self.convert_dq_bn0, F_spad=self.spad1d, F_dpad=F_dpad,
-            F_mode=self.mode, F_occupancy=get_occupancy(self.dtype, self.hdim),
-            F_deterministic=self.deterministic, disabled=self.tile.max_seq_q != 0)
+        F_dpad = "t" if self.dpad else "f"
+        return FmhaBwdConvertQGradKernel(
+            F_idx=self.idx,
+            F_hdim=self.hdim,
+            F_dtype=self.dtype,
+            F_bm0=M0_1D,
+            F_bn0=self.convert_dq_bn0,
+            F_spad=self.spad1d,
+            F_dpad=F_dpad,
+            F_mode=self.mode,
+            F_occupancy=get_occupancy(self.dtype, self.hdim),
+            F_deterministic=self.deterministic,
+            disabled=self.tile.max_seq_q != 0,
+        )
+
 
 class FmhaBwdApiPool:
     def __init__(self, mask_impl):
-        self.dq_dk_dv_pool = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list))))
-        
+        self.dq_dk_dv_pool = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
+        )
+
         self.mask_impl = mask_impl
 
-    def register_dq_dk_dv_traits(self, trait : FmhaBwdApiTrait) -> None:
+    def register_dq_dk_dv_traits(self, trait: FmhaBwdApiTrait) -> None:
         # TODO: do we need to check duplication?
-        self.dq_dk_dv_pool[trait.tr_load][trait.tile.max_seq_q][trait.dtype][trait.hdim].append(copy.copy(trait))
+        self.dq_dk_dv_pool[trait.tr_load][trait.tile.max_seq_q][trait.dtype][
+            trait.hdim
+        ].append(copy.copy(trait))
 
     @staticmethod
     def if_(i: int) -> str:
-        return 'if' if i == 0 else 'else if'
+        return "if" if i == 0 else "else if"
 
     def _api_innders(self, traits: List[FmhaBwdApiTrait]) -> str:
         inners = ""
-        i = 0 
+        i = 0
         for trait in traits:
-            inners += FMHA_BWD_API_INNER_DISPATCH.format(F_if=self.if_(i), F_mode=MODE_MAP[trait.mode],
-                F_mask_check=get_mask_check_map(self.mask_impl)[trait.mask], F_mask=get_mask_map(self.mask_impl)[trait.mask], F_bias_check=BIAS_CHECK_MAP[trait.bias],
-                F_bias=BIAS_MAP[trait.bias], F_dbias=BOOL_MAP[trait.dbias], F_dropout_check=DROPOUT_CHECK_MAP[trait.dropout], F_dropout=DROPOUT_MAP[trait.dropout],
-                F_scheck=trait.scheck, F_dcheck=trait.dcheck, F_dvcheck=trait.dvcheck, F_hdim=trait.hdim, F_dtype=BWD_DTYPE_MAP[trait.dtype],
-                F_spad1d=BOOL_MAP[trait.spad1d], F_dpad=trait.dpad, F_dvpad=trait.dvpad,
-                F_deterministic=BOOL_MAP[trait.deterministic], F_trload=BOOL_MAP[trait.tr_load], F_maxq=trait.tile.max_seq_q,
-                F_convert_dq_enabled=BOOL_MAP[not trait.convert_dq_kernel.disabled], F_bn0=trait.tile.F_bn0, F_cond_extra=trait.extra_cond,
-                F_convert_dq_bn0=trait.convert_dq_bn0)
+            inners += FMHA_BWD_API_INNER_DISPATCH.format(
+                F_if=self.if_(i),
+                F_mode=MODE_MAP[trait.mode],
+                F_mask_check=get_mask_check_map(self.mask_impl)[trait.mask],
+                F_mask=get_mask_map(self.mask_impl)[trait.mask],
+                F_bias_check=BIAS_CHECK_MAP[trait.bias],
+                F_bias=BIAS_MAP[trait.bias],
+                F_dbias=BOOL_MAP[trait.dbias],
+                F_dropout_check=DROPOUT_CHECK_MAP[trait.dropout],
+                F_dropout=DROPOUT_MAP[trait.dropout],
+                F_scheck=trait.scheck,
+                F_dcheck=trait.dcheck,
+                F_dvcheck=trait.dvcheck,
+                F_hdim=trait.hdim,
+                F_dtype=BWD_DTYPE_MAP[trait.dtype],
+                F_spad1d=BOOL_MAP[trait.spad1d],
+                F_dpad=trait.dpad,
+                F_dvpad=trait.dvpad,
+                F_deterministic=BOOL_MAP[trait.deterministic],
+                F_trload=BOOL_MAP[trait.tr_load],
+                F_maxq=trait.tile.max_seq_q,
+                F_convert_dq_enabled=BOOL_MAP[not trait.convert_dq_kernel.disabled],
+                F_bn0=trait.tile.F_bn0,
+                F_cond_extra=trait.extra_cond,
+                F_convert_dq_bn0=trait.convert_dq_bn0,
+            )
             i += 1
         return inners
 
     @staticmethod
     def trload_sort_key(tf):
-        return 0 if tf == 't' else 1  # sort 't' before 'f'
+        return 0 if tf == "t" else 1  # sort 't' before 'f'
 
     @staticmethod
     def max_seq_q_sort_key(max_seq_q):
@@ -746,9 +1198,9 @@ class FmhaBwdApiPool:
     @staticmethod
     def max_seq_q_cond(max_seq_q: int) -> str:
         if max_seq_q == 0:
-            return 'true /* no seqlen_q limit */'
+            return "true /* no seqlen_q limit */"
         else:
-            return f'a.seqlen_q <= {max_seq_q}'
+            return f"a.seqlen_q <= {max_seq_q}"
 
     @staticmethod
     def dtype_cond(dtype: str) -> str:
@@ -756,39 +1208,56 @@ class FmhaBwdApiPool:
 
     @staticmethod
     def hdim_cond(hdim: int) -> str:
-        return f't.hdim_q <= {hdim} && t.hdim_v <= {hdim}'
+        return f"t.hdim_q <= {hdim} && t.hdim_v <= {hdim}"
 
     @property
     def api(self) -> str:
-        tr_load_cond_map = {
-            "t": "has_load_tr",
-            "f": "true /* no trload requirement */"
-        }
-        per_tr_load = ''
+        tr_load_cond_map = {"t": "has_load_tr", "f": "true /* no trload requirement */"}
+        per_tr_load = ""
         for tr_load in sorted(self.dq_dk_dv_pool.keys(), key=self.trload_sort_key):
-            per_max_seq_q = ''
-            for max_seq_q in sorted(self.dq_dk_dv_pool[tr_load].keys(), key=self.max_seq_q_sort_key):
-                per_dtypes = ''
+            per_max_seq_q = ""
+            for max_seq_q in sorted(
+                self.dq_dk_dv_pool[tr_load].keys(), key=self.max_seq_q_sort_key
+            ):
+                per_dtypes = ""
                 for j, dtype in enumerate(self.dq_dk_dv_pool[tr_load][max_seq_q]):
-                    per_hdim_case = ''
-                    for k, hdim in enumerate(self.dq_dk_dv_pool[tr_load][max_seq_q][dtype]):
+                    per_hdim_case = ""
+                    for k, hdim in enumerate(
+                        self.dq_dk_dv_pool[tr_load][max_seq_q][dtype]
+                    ):
                         traits = self.dq_dk_dv_pool[tr_load][max_seq_q][dtype][hdim]
                         inners = self._api_innders(traits)
-                        per_hdim_case += FMHA_BWD_API_COND_STATEMENT(if_=k, F_cond=self.hdim_cond(hdim), F_body=inners)
-                    per_dtypes += FMHA_BWD_API_COND_STATEMENT(if_=j, F_cond=self.dtype_cond(dtype), F_body=per_hdim_case)
-                per_max_seq_q += FMHA_BWD_API_COND_STATEMENT(F_cond=self.max_seq_q_cond(max_seq_q), F_body=per_dtypes)
-            per_tr_load += FMHA_BWD_API_COND_STATEMENT(F_cond=tr_load_cond_map[tr_load], F_body=per_max_seq_q, indent=4)
+                        per_hdim_case += FMHA_BWD_API_COND_STATEMENT(
+                            if_=k, F_cond=self.hdim_cond(hdim), F_body=inners
+                        )
+                    per_dtypes += FMHA_BWD_API_COND_STATEMENT(
+                        if_=j, F_cond=self.dtype_cond(dtype), F_body=per_hdim_case
+                    )
+                per_max_seq_q += FMHA_BWD_API_COND_STATEMENT(
+                    F_cond=self.max_seq_q_cond(max_seq_q), F_body=per_dtypes
+                )
+            per_tr_load += FMHA_BWD_API_COND_STATEMENT(
+                F_cond=tr_load_cond_map[tr_load], F_body=per_max_seq_q, indent=4
+            )
         if not per_tr_load:
             # empty string we add some ignore to suppress warning in api
-            per_tr_load += '    (void)t ; (void)s ; (void)a; (void)has_load_tr;'
-        result = FMHA_BWD_KERNEL_HEADER + FMHA_BWD_API.format(F_dispatch = per_tr_load)
-        return result.replace('\n\n', '\n')
+            per_tr_load += "    (void)t ; (void)s ; (void)a; (void)has_load_tr;"
+        result = FMHA_BWD_KERNEL_HEADER + FMHA_BWD_API.format(F_dispatch=per_tr_load)
+        return result.replace("\n\n", "\n")
 
-def get_bwd_blobs(filter_list: str, receipt, mask_impl, optdim_list) -> Tuple[FmhaBwdApiPool, List[FmhaBwdOGradDotOKernel], List[FmhaBwdDQDKDVKernel], List[FmhaBwdConvertQGradKernel]]:
-    if filter_list == '':
-        filter_list = '*@*@*'
-    filters = filter_list.split('@')
-    filters.extend(['*'] * (3 - len(filters)))
+
+def get_bwd_blobs(
+    filter_list: str, receipt, mask_impl, optdim_list
+) -> Tuple[
+    FmhaBwdApiPool,
+    List[FmhaBwdOGradDotOKernel],
+    List[FmhaBwdDQDKDVKernel],
+    List[FmhaBwdConvertQGradKernel],
+]:
+    if filter_list == "":
+        filter_list = "*@*@*"
+    filters = filter_list.split("@")
+    filters.extend(["*"] * (3 - len(filters)))
     filter_dot_do_o = filters[0]
     filter_convert_dq = filters[1]
     filter_dq_dk_dv = filters[2]
@@ -803,30 +1272,60 @@ def get_bwd_blobs(filter_list: str, receipt, mask_impl, optdim_list) -> Tuple[Fm
         tiles: Any = get_dq_dk_dv_tiles(dtype, tr_load)
         dpad_options = itertools.product(*([[0, 8, 1]] * 2))
         tf = ["t", "f"]
-        for tile, mode, mask, bias, dbias, dropout, spad1d, (dpad, dvpad), deterministic in itertools.product(
-                tiles, MODE_MAP.keys(), get_mask_map(mask_impl).keys(), BIAS_MAP.keys(), tf, DROPOUT_MAP.keys(), tf, dpad_options, tf):
-            assert isinstance(tile, FmhaBwdDQDKDVTileSize), "tile must be FmhaBwdDQDKDVTileSize"
+        for tile, mode, mask, bias, dbias, dropout, spad1d, (
+            dpad,
+            dvpad,
+        ), deterministic in itertools.product(
+            tiles,
+            MODE_MAP.keys(),
+            get_mask_map(mask_impl).keys(),
+            BIAS_MAP.keys(),
+            tf,
+            DROPOUT_MAP.keys(),
+            tf,
+            dpad_options,
+            tf,
+        ):
+            assert isinstance(tile, FmhaBwdDQDKDVTileSize), (
+                "tile must be FmhaBwdDQDKDVTileSize"
+            )
             hdim = tile.F_bhdq
             if (mode == "group") and (spad1d == "f"):
                 continue
-            if (mode == "group" or ('no' not in mask)) and tile.max_seq_q != 0:
+            if (mode == "group" or ("no" not in mask)) and tile.max_seq_q != 0:
                 continue
-            if ((bias == "no" or bias == "alibi") and dbias == "t"):
+            if (bias == "no" or bias == "alibi") and dbias == "t":
                 continue
-            if ("wg32" in dropout):
+            if "wg32" in dropout:
                 continue
             if tr_load == "t":
                 # tr_load can only work with 8 pad
                 if dpad != dvpad or dpad == 1:
                     continue
-            else: # tr_load == "f"
+            else:  # tr_load == "f"
                 # do not generate instance with only 1 of dpad/dvpad being 8
                 if dpad != dvpad and dpad == 8:
                     continue
             if optdim_list != [-1]:
                 if hdim not in optdim_list:
                     continue
-            t = FmhaBwdApiTrait(idx=0, hdim=hdim, dtype=dtype, mode=mode,tile=tile,mask=mask, bias=bias, dbias=dbias, dropout=dropout, spad1d=spad1d, dpad=dpad, dvpad=dvpad, deterministic=deterministic, mask_impl=mask_impl, tr_load=tr_load)
+            t = FmhaBwdApiTrait(
+                idx=0,
+                hdim=hdim,
+                dtype=dtype,
+                mode=mode,
+                tile=tile,
+                mask=mask,
+                bias=bias,
+                dbias=dbias,
+                dropout=dropout,
+                spad1d=spad1d,
+                dpad=dpad,
+                dvpad=dvpad,
+                deterministic=deterministic,
+                mask_impl=mask_impl,
+                tr_load=tr_load,
+            )
 
             if not fnmatch.fnmatch(t.dot_do_o_kernel.name, filter_dot_do_o):
                 continue
@@ -837,69 +1336,69 @@ def get_bwd_blobs(filter_list: str, receipt, mask_impl, optdim_list) -> Tuple[Fm
 
             # Flash attention integration
             if receipt == 2:
-                cond = dtype in ['fp16', 'bf16']
-                cond &= bias in ['no', 'alibi']
-                cond &= dropout in ['no', 'dropout_wg32',  'dropout_wg16']
+                cond = dtype in ["fp16", "bf16"]
+                cond &= bias in ["no", "alibi"]
+                cond &= dropout in ["no", "dropout_wg32", "dropout_wg16"]
                 cond &= dpad == dvpad
                 if not cond:
                     continue
             elif receipt == 3:
-                cond = dtype in ['fp16', 'bf16']
-                cond &= bias in ['no', 'alibi']
+                cond = dtype in ["fp16", "bf16"]
+                cond &= bias in ["no", "alibi"]
                 cond &= dpad == dvpad
                 cond &= deterministic == "f"
                 if not cond:
                     continue
             # PyTorch integration
             elif receipt == 4:
-                cond = dtype in ['fp16', 'bf16']
-                cond &= bias in ['no', 'bias']
-                cond &= dropout in ['no', 'dropout_wg32',  'dropout_wg16']
+                cond = dtype in ["fp16", "bf16"]
+                cond &= bias in ["no", "bias"]
+                cond &= dropout in ["no", "dropout_wg32", "dropout_wg16"]
                 cond &= dpad == dvpad
                 cond &= deterministic == "f"
                 if not cond:
                     continue
             # Aiter (mha_bwd) integration
             elif receipt == 300:
-                cond = dtype in ['fp16', 'bf16']
+                cond = dtype in ["fp16", "bf16"]
                 cond &= mode == "batch"
-                cond &= dropout in ['no', 'dropout_wg32',  'dropout_wg16']
+                cond &= dropout in ["no", "dropout_wg32", "dropout_wg16"]
                 if not cond:
                     continue
             # Aiter (mha_varlen_bwd) integration
             elif receipt == 400:
-                cond = dtype in ['fp16', 'bf16']
+                cond = dtype in ["fp16", "bf16"]
                 cond &= mode == "group"
-                cond &= dropout in ['no', 'dropout_wg32',  'dropout_wg16']
+                cond &= dropout in ["no", "dropout_wg32", "dropout_wg16"]
                 if not cond:
                     continue
             # aiter::mha_bwd C++ api integration
             elif receipt == 600:
-                cond = dtype in ['fp16', 'bf16']
+                cond = dtype in ["fp16", "bf16"]
                 if not cond:
                     continue
 
             # fp32 only, all variations
             if receipt == 800:
-                cond = dtype == 'fp32'
+                cond = dtype == "fp32"
                 cond &= dpad == dvpad
                 if not cond:
                     continue
             # fp32 only, minimal set of parameters
             elif receipt == 801:
-                cond = dtype == 'fp32'
+                cond = dtype == "fp32"
                 cond &= hdim in [64, 128]
                 cond &= dpad == dvpad
-                cond &= mode == 'batch'
-                cond &= bias == 'no'
-                cond &= dropout == 'no'
-                cond &= mask == 's_no'
+                cond &= mode == "batch"
+                cond &= bias == "no"
+                cond &= dropout == "no"
+                cond &= mask == "s_no"
                 cond &= deterministic == "f"
                 if not cond:
                     continue
             else:
                 # Don't build fp32 by default
-                if dtype == 'fp32':
+                if dtype == "fp32":
                     continue
 
             gen_dot_do_o[t.dot_do_o_kernel] = True
@@ -908,10 +1407,20 @@ def get_bwd_blobs(filter_list: str, receipt, mask_impl, optdim_list) -> Tuple[Fm
                 gen_convert_dq[t.convert_dq_kernel] = True
             api_pool.register_dq_dk_dv_traits(t)
 
-    return api_pool, list(gen_dot_do_o.keys()), list(gen_dq_dk_dv.keys()), list(gen_convert_dq.keys())
+    return (
+        api_pool,
+        list(gen_dot_do_o.keys()),
+        list(gen_dq_dk_dv.keys()),
+        list(gen_convert_dq.keys()),
+    )
 
-def write_blobs(output_dir : Path, filter_list : str, receipt, optdim_list, mask_impl) -> None:
-    api_pool, kernels_dot_do_o,  kernels_dq_dk_dv,  kernels_convert_dq = get_bwd_blobs(filter_list, receipt, mask_impl, optdim_list)
+
+def write_blobs(
+    output_dir: Path, filter_list: str, receipt, optdim_list, mask_impl
+) -> None:
+    api_pool, kernels_dot_do_o, kernels_dq_dk_dv, kernels_convert_dq = get_bwd_blobs(
+        filter_list, receipt, mask_impl, optdim_list
+    )
     update_file(output_dir / FMHA_BWD_API_FILENAME, api_pool.api)
     for k in kernels_dot_do_o:
         update_file(output_dir / k.filename, k.template)
@@ -921,7 +1430,9 @@ def write_blobs(output_dir : Path, filter_list : str, receipt, optdim_list, mask
         update_file(output_dir / k.filename, k.template)
 
 
-def list_blobs(file_path: Path, filter_list: str, receipt, optdim_list, mask_impl) -> None:
+def list_blobs(
+    file_path: Path, filter_list: str, receipt, optdim_list, mask_impl
+) -> None:
     _, kernels_dot_do_o, kernels_dq_dk_dv, kernels_convert_dq = get_bwd_blobs(
         filter_list, receipt, mask_impl, optdim_list
     )
diff --git a/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py b/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py
index f898d5f7b2..cc77718c88 100644
--- a/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py
+++ b/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py
@@ -10,28 +10,25 @@ import os
 from pathlib import Path
 from typing import List, Optional, Tuple
 
-from codegen.cmake_config import *
-from codegen.cpp_symbol_map import *
+from codegen.cmake_config import GEN_DIR
+from codegen.cpp_symbol_map import (
+    LAYOUT_MAP,
+    BIAS_CHECK_MAP,
+    get_mask_check_map,
+    BOOL_MAP,
+    PIPELINE_MAP,
+    PIPELINE_ENUM_MAP,
+    MODE_MAP,
+    FWD_DTYPE_MAP,
+    BIAS_MAP,
+    get_mask_map,
+)
 from codegen.utils import update_file
 
 
-DTYPE_BITS = {
-    "fp32": 32,
-    "fp16": 16,
-    "bf16": 16,
-    "fp8" : 8,
-    "bf8" : 8
-}
+DTYPE_BITS = {"fp32": 32, "fp16": 16, "bf16": 16, "fp8": 8, "bf8": 8}
 
-K0_MAX_SUBMAX_MAP = {
-    32 : 32,
-    48 : 48,
-    64 : 64,
-    96 : 128,
-    128: 128,
-    192: 192,
-    256: 256
-}
+K0_MAX_SUBMAX_MAP = {32: 32, 48: 48, 64: 64, 96: 128, 128: 128, 192: 192, 256: 256}
 
 FMHA_FWD_KERNEL_HEADER = """// SPDX-License-Identifier: MIT
 // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.\n
@@ -40,7 +37,7 @@ FMHA_FWD_KERNEL_HEADER = """// SPDX-License-Identifier: MIT
 #include "fmha_fwd.hpp"
 """
 
-FMHA_FWD_KERNEL_BODY="""
+FMHA_FWD_KERNEL_BODY = """
 using fmha_dtype_{F_idx} = {F_dtype};
 
 using fmha_block_tile_{F_idx} = ck_tile::sequence<{F_bm0}, {F_bn0}, {F_bk0}, {F_bn1}, {F_bk1}, {F_bk0max}>;
@@ -117,8 +114,8 @@ float fmha_fwd_<trait_{F_idx}>(const ck_tile::stream_config& s, fmha_fwd_args a)
 }}
 """
 
-FMHA_FWD_API_FILENAME="fmha_fwd_api.cpp"
-FMHA_FWD_API="""
+FMHA_FWD_API_FILENAME = "fmha_fwd_api.cpp"
+FMHA_FWD_API = """
 #include <cstdio>
 
 #include <hip/hip_runtime.h>
@@ -172,197 +169,254 @@ float fmha_fwd(fmha_fwd_traits t, fmha_fwd_args a, const ck_tile::stream_config&
 }}
 """
 
-FMHA_FWD_API_PER_TRLOAD="""    {F_if}({F_trload_cond}){{
+FMHA_FWD_API_PER_TRLOAD = """    {F_if}({F_trload_cond}){{
 {F_dtype_case}
     }}
 """
 
-FMHA_FWD_API_PER_DTYPE="""    {F_if}(t.data_type.compare(\"{F_dtype}\") == 0){{
+FMHA_FWD_API_PER_DTYPE = """    {F_if}(t.data_type.compare(\"{F_dtype}\") == 0){{
 {F_hdim_case}
     }}
 """
-FMHA_FWD_API_PER_HDIM_CASE="""        {F_if} (t.hdim_q <= {F_hdim} && t.hdim_v <= {F_hdim_v}) {{
+FMHA_FWD_API_PER_HDIM_CASE = """        {F_if} (t.hdim_q <= {F_hdim} && t.hdim_v <= {F_hdim_v}) {{
 {F_inner_dispatch}
         }}
 """
 
-FMHA_FWD_API_INNER_DISPATCH="""            {F_if}((t.is_group_mode == {F_mode}) && (t.is_v_rowmajor == {F_vlayout}) && (t.has_logits_soft_cap == {F_logits}) && ({F_mask_check}) && (t.bias_type == {F_bias_check}) && (t.has_lse == {F_lse})  && (t.has_dropout == {F_dropout}) && (t.do_fp8_static_quant == {F_squant}) && (t.skip_min_seqlen_q == {F_skip}) &&
+FMHA_FWD_API_INNER_DISPATCH = """            {F_if}((t.is_group_mode == {F_mode}) && (t.is_v_rowmajor == {F_vlayout}) && (t.has_logits_soft_cap == {F_logits}) && ({F_mask_check}) && (t.bias_type == {F_bias_check}) && (t.has_lse == {F_lse})  && (t.has_dropout == {F_dropout}) && (t.do_fp8_static_quant == {F_squant}) && (t.skip_min_seqlen_q == {F_skip}) &&
                         ({F_scheck}) && ({F_seqtune}) && ({F_skcheck}) && ({F_dcheck}) && ({F_dvcheck}) && ({F_constraint})) {{
                 using trait_ = fmha_fwd_traits_<{F_hdim}, {F_dtype}, {F_mode}, {F_bm0}, {F_bn0}, {F_bk0}, {F_bn1}, {F_bk1}, {F_bk0max}, {F_vlayout}, {F_pipeline_enum}, {F_logits}, {F_mask}, {F_bias}, {F_lse}, {F_dropout}, {F_squant}, {F_spad}, {F_skpad}, {F_dpad}, {F_dvpad}, {F_trload}, {F_skip}>;
                 return fmha_fwd_<trait_>(s, a);
             }}
 """
 
+
 @dataclass
 class CppConstraint:
     bool_expr: str = None
 
     def __str__(self):
         if self.bool_expr is None:
-            return 'true'
+            return "true"
         else:
-            return f'{self.bool_expr}'
+            return f"{self.bool_expr}"
 
     def __and__(self, other):
-        return CppConstraint(f'({str(self)}) && ({str(other)})')
+        return CppConstraint(f"({str(self)}) && ({str(other)})")
+
 
 @dataclass
 class FmhaFwdApiTrait:
-    pipeline_tag : str
+    pipeline_tag: str
     # sync with fmha_fwd_traits<>, to generate fallback calls
-    hdim       : str
-    dtype      : str  # data type
-    mode       : str  # value from MODE_MAP
-    bm0        : int  # tile size along q seqlen (block size)
-    bn0        : int  # tile size along qk seqlen
-    bk0        : int  # tile size along qk gemm unroll
-    bn1        : int  # tile size along v head_dim
-    bk1        : int  # tile size along kv gemm unroll
-    bk0max     : int
-    vlayout    : str
-    logits     : str
-    mask       : str
-    bias       : str  #
-    lse        : str  #
-    dropout    : str
-    squant     : str  #
-    spad       : str
-    skpad      : str
-    dpad       : str
-    dvpad      : str
-    skip       : str
-    tr_load    : str
-    constraint : CppConstraint
+    hdim: str
+    dtype: str  # data type
+    mode: str  # value from MODE_MAP
+    bm0: int  # tile size along q seqlen (block size)
+    bn0: int  # tile size along qk seqlen
+    bk0: int  # tile size along qk gemm unroll
+    bn1: int  # tile size along v head_dim
+    bk1: int  # tile size along kv gemm unroll
+    bk0max: int
+    vlayout: str
+    logits: str
+    mask: str
+    bias: str  #
+    lse: str  #
+    dropout: str
+    squant: str  #
+    spad: str
+    skpad: str
+    dpad: str
+    dvpad: str
+    skip: str
+    tr_load: str
+    constraint: CppConstraint
 
     @property
     def name(self) -> str:
-        return f'{self.hdim}-{self.dtype}-{self.mode}-{self.bm0}-{self.bn0}-{self.bk0}-{self.bn0}-{self.bk1}-{self.bk0max}-'+\
-                    f'{self.vlayout}-{self.logits}-{self.mask}-{self.bias}-{self.lse}-{self.dropout}-{self.squant}-{self.spad}-{self.skpad}-{self.dpad}-{self.dvpad}-{self.skip}'
+        return (
+            f"{self.hdim}-{self.dtype}-{self.mode}-{self.bm0}-{self.bn0}-{self.bk0}-{self.bn0}-{self.bk1}-{self.bk0max}-"
+            + f"{self.vlayout}-{self.logits}-{self.mask}-{self.bias}-{self.lse}-{self.dropout}-{self.squant}-{self.spad}-{self.skpad}-{self.dpad}-{self.dvpad}-{self.skip}"
+        )
 
     @property
     def scheck(self) -> str:
-        if self.mode == 'group': return 'true/*group mode spad always true*/'                  # group mode only generate spad/skpad == true
-        if self.pipeline_tag in ['qr_async', 'qr_async_trload']:
-            if self.spad == 't' : return 'true' # always support
-            else :                return 'true'
-        elif self.pipeline_tag in ['qr', 'qs']:
-            if self.spad == 't' : return f'true /*a.seqlen_q % {self.bm0} != 0*/'  # TODO: order of get_pipelines() matters! (ugly)
-            else :                return f'a.seqlen_q % {self.bm0} == 0'
-        else: assert False
-
-    def seqtune(self, max_bm0 : int) -> str:
-        if self.bm0 == max_bm0: return 'true/*fall back to largest tile*/'
+        if self.mode == "group":
+            return "true/*group mode spad always true*/"  # group mode only generate spad/skpad == true
+        if self.pipeline_tag in ["qr_async", "qr_async_trload"]:
+            if self.spad == "t":
+                return "true"  # always support
+            else:
+                return "true"
+        elif self.pipeline_tag in ["qr", "qs"]:
+            if self.spad == "t":
+                return f"true /*a.seqlen_q % {self.bm0} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.seqlen_q % {self.bm0} == 0"
         else:
-            return f'a.seqlen_q <= {self.bm0}'
+            assert False
+
+    def seqtune(self, max_bm0: int) -> str:
+        if self.bm0 == max_bm0:
+            return "true/*fall back to largest tile*/"
+        else:
+            return f"a.seqlen_q <= {self.bm0}"
 
     @property
     def skcheck(self) -> str:
-        if self.mode == 'group': return 'true/*group mode skpad always true*/'                  # group mode only generate spad/skpad == true
-        if self.pipeline_tag == 'qr_async':
-            if self.skpad == 't' : return f'(a.cu_seqlen_kv_ptr != nullptr) || (a.seqlen_k == 0 || a.seqlen_k % {self.bn0} != 0)'
-            else :                 return f'(a.cu_seqlen_kv_ptr == nullptr) && (a.seqlen_k != 0 && a.seqlen_k % {self.bn0} == 0)'
-        elif self.pipeline_tag in ['qr', 'qs']:
-            if self.skpad == 't' : return f'true /*a.seqlen_k % {self.bn0} != 0*/' # TODO: order of get_pipelines() matters! (ugly)
-            else :                 return f'(a.cu_seqlen_kv_ptr == nullptr) && (a.seqlen_k != 0 && a.seqlen_k % {self.bn0} == 0)'
-        elif self.pipeline_tag == 'qr_async_trload':
-            if self.skpad == 't' : return 'true'
-            else:                  return 'true'
-        else: assert False
+        if self.mode == "group":
+            return "true/*group mode skpad always true*/"  # group mode only generate spad/skpad == true
+        if self.pipeline_tag == "qr_async":
+            if self.skpad == "t":
+                return f"(a.cu_seqlen_kv_ptr != nullptr) || (a.seqlen_k == 0 || a.seqlen_k % {self.bn0} != 0)"
+            else:
+                return f"(a.cu_seqlen_kv_ptr == nullptr) && (a.seqlen_k != 0 && a.seqlen_k % {self.bn0} == 0)"
+        elif self.pipeline_tag in ["qr", "qs"]:
+            if self.skpad == "t":
+                return f"true /*a.seqlen_k % {self.bn0} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"(a.cu_seqlen_kv_ptr == nullptr) && (a.seqlen_k != 0 && a.seqlen_k % {self.bn0} == 0)"
+        elif self.pipeline_tag == "qr_async_trload":
+            if self.skpad == "t":
+                return "true"
+            else:
+                return "true"
+        else:
+            assert False
 
     @property
     def dcheck(self) -> str:
-        if self.pipeline_tag == 'qr_async':
+        if self.pipeline_tag == "qr_async":
             vec = int((32 * 4) / DTYPE_BITS[self.dtype])
-            if self.dpad == 't': return f'a.hdim_q % {vec} == 0'
-            else :               assert False
-        elif self.pipeline_tag in ['qr', 'qs', 'qr_async_trload']:
+            if self.dpad == "t":
+                return f"a.hdim_q % {vec} == 0"
+            else:
+                assert False
+        elif self.pipeline_tag in ["qr", "qs", "qr_async_trload"]:
             bk0submax = K0_MAX_SUBMAX_MAP[self.bk0max]
-            if self.dpad == 't': return f'true /*a.hdim_q % {bk0submax} != 0*/' # TODO: order of get_pipelines() matters! (ugly)
-            else :               return f'a.hdim_q % {bk0submax} == 0'
-        else:   assert False
+            if self.dpad == "t":
+                return f"true /*a.hdim_q % {bk0submax} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.hdim_q % {bk0submax} == 0"
+        else:
+            assert False
 
     @property
     def dvcheck(self) -> str:
-        if self.pipeline_tag == 'qr_async':
+        if self.pipeline_tag == "qr_async":
             vec = int((32 * 4) / DTYPE_BITS[self.dtype])
-            if self.dvpad == 't': return f'a.hdim_v % {vec} == 0'
-            else :                assert False
-        elif self.pipeline_tag in ['qr', 'qs', 'qr_async_trload']:
+            if self.dvpad == "t":
+                return f"a.hdim_v % {vec} == 0"
+            else:
+                assert False
+        elif self.pipeline_tag in ["qr", "qs", "qr_async_trload"]:
             bk0submax = K0_MAX_SUBMAX_MAP[self.bk0max]
-            if self.dvpad == 't': return f'true /*a.hdim_v % {bk0submax} != 0*/' # TODO: order of get_pipelines() matters! (ugly)
-            else :                return f'a.hdim_v % {bk0submax} == 0'
-        else:   assert False
+            if self.dvpad == "t":
+                return f"true /*a.hdim_v % {bk0submax} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.hdim_v % {bk0submax} == 0"
+        else:
+            assert False
+
 
 @dataclass
 class FmhaFwdPipeline:
-    tag : str
+    tag: str
 
-    F_vlayout    : str  # row/col
-    F_spad       : str  # true/false
-    F_skpad      : str  #
-    F_dpad       : str  #
-    F_dvpad      : str  #
-    F_logits     : str  # t/f
-    F_bias       : str  # true/false
-    F_lse        : str  #
-    F_dropout    : str  #
-    F_squant     : str  #
-    F_mask       : str  # value from MASK_MAP
-    F_skip       : str  # true/false
-    F_trload     : str  # true/false
-    F_constraint : CppConstraint = field(default_factory=lambda: CppConstraint())
+    F_vlayout: str  # row/col
+    F_spad: str  # true/false
+    F_skpad: str  #
+    F_dpad: str  #
+    F_dvpad: str  #
+    F_logits: str  # t/f
+    F_bias: str  # true/false
+    F_lse: str  #
+    F_dropout: str  #
+    F_squant: str  #
+    F_mask: str  # value from MASK_MAP
+    F_skip: str  # true/false
+    F_trload: str  # true/false
+    F_constraint: CppConstraint = field(default_factory=lambda: CppConstraint())
 
     @property
     def name(self) -> str:
         def pad_name() -> str:
-            n = ''
-            if self.F_spad == 't': n += 's'
-            if self.F_skpad == 't' : n += 'sk'
-            if self.F_dpad == 't' : n += 'd'
-            if self.F_dvpad == 't' : n += 'dv'
-            if n != '' : n = 'p' + n
+            n = ""
+            if self.F_spad == "t":
+                n += "s"
+            if self.F_skpad == "t":
+                n += "sk"
+            if self.F_dpad == "t":
+                n += "d"
+            if self.F_dvpad == "t":
+                n += "dv"
+            if n != "":
+                n = "p" + n
             return n
+
         pn = pad_name()
-        n = f'{self.tag}_v{self.F_vlayout[0]}'
-        if pn != '' : n += f'_{pn}'
-        else: n += '_npad'
-
-        if self.F_logits == 't' : n += '_logits'
-        else: n += '_nlogits'
-
-        if self.F_bias != 'no' : n += f'_{self.F_bias}'
-        else: n += '_nbias'
-
-        if self.F_mask[0:2] == 's_':
-            if self.F_mask == 's_mask': n += f'_mask'
-            else: n += '_nmask'
+        n = f"{self.tag}_v{self.F_vlayout[0]}"
+        if pn != "":
+            n += f"_{pn}"
         else:
-            if self.F_mask != 'no' : n += f'_m{self.F_mask[0]}'
-            else: n += '_nmask'
+            n += "_npad"
 
-        if self.F_lse == 't' : n += '_lse'
-        else: n += '_nlse'
+        if self.F_logits == "t":
+            n += "_logits"
+        else:
+            n += "_nlogits"
 
-        if self.F_dropout == 't' : n += '_dropout'
-        else: n += '_ndropout'
+        if self.F_bias != "no":
+            n += f"_{self.F_bias}"
+        else:
+            n += "_nbias"
 
-        if self.F_skip == 't' : n += '_skip'
-        else: n += '_nskip'
+        if self.F_mask[0:2] == "s_":
+            if self.F_mask == "s_mask":
+                n += "_mask"
+            else:
+                n += "_nmask"
+        else:
+            if self.F_mask != "no":
+                n += f"_m{self.F_mask[0]}"
+            else:
+                n += "_nmask"
 
-        if self.F_squant == 't' : n += '_squant'
-        else: n += '_nsquant'
+        if self.F_lse == "t":
+            n += "_lse"
+        else:
+            n += "_nlse"
 
-        if self.F_trload == 't' : n += '_trload'
-        else: n += '_ntrload'
+        if self.F_dropout == "t":
+            n += "_dropout"
+        else:
+            n += "_ndropout"
+
+        if self.F_skip == "t":
+            n += "_skip"
+        else:
+            n += "_nskip"
+
+        if self.F_squant == "t":
+            n += "_squant"
+        else:
+            n += "_nsquant"
+
+        if self.F_trload == "t":
+            n += "_trload"
+        else:
+            n += "_ntrload"
 
         return n
 
+
 class FmhaFwdApiPool:
     def __init__(self, mask_impl):
         self.pool = dict()
         self.mask_impl = mask_impl
 
-    def register_traits(self, trait : FmhaFwdApiTrait) -> None:
+    def register_traits(self, trait: FmhaFwdApiTrait) -> None:
         # TODO: do we need to check duplication?
         if trait.dtype not in self.pool.keys():
             self.pool[trait.dtype] = dict()
@@ -374,130 +428,171 @@ class FmhaFwdApiPool:
 
     @property
     def api(self) -> str:
-        tr_load_cond_map = {
-            "t": "has_load_tr",
-            "f": "true"
-        }
+        tr_load_cond_map = {"t": "has_load_tr", "f": "true"}
 
-        per_tr_load =str()
+        per_tr_load = str()
         for tr_load in ["t", "f"]:
-            per_dtypes=str()
+            per_dtypes = str()
             for i, dtype in enumerate(self.pool.keys()):
-                per_hdim_case=str()
+                per_hdim_case = str()
                 for j, (hdim, hdim_v) in enumerate(self.pool[dtype].keys()):
-                    traits=[t for t in self.pool[dtype][(hdim, hdim_v)] if tr_load == t.tr_load]
+                    traits = [
+                        t
+                        for t in self.pool[dtype][(hdim, hdim_v)]
+                        if tr_load == t.tr_load
+                    ]
                     max_bm0 = max((t.bm0 for t in traits), default=0)
-                    inners=str()
+                    inners = str()
                     for k, trait in enumerate(traits):
-                        if_k = 'if' if k == 0 else 'else if'
-                        inners = inners + FMHA_FWD_API_INNER_DISPATCH.format(F_if=if_k, F_mode=MODE_MAP[trait.mode], F_vlayout=LAYOUT_MAP[trait.vlayout],
-                                       F_pipeline_enum=PIPELINE_ENUM_MAP[trait.pipeline_tag], F_logits=BOOL_MAP[trait.logits], F_mask=get_mask_map(self.mask_impl)[trait.mask],
-                                       F_mask_check=get_mask_check_map(self.mask_impl)[trait.mask], F_bias_check=BIAS_CHECK_MAP[trait.bias], F_bias=BIAS_MAP[trait.bias],
-                                       F_lse=BOOL_MAP[trait.lse], F_dropout=BOOL_MAP[trait.dropout], F_skip=BOOL_MAP[trait.skip], F_trload=BOOL_MAP[trait.tr_load],
-                                       F_squant=BOOL_MAP[trait.squant], F_scheck=trait.scheck, F_seqtune=trait.seqtune(max_bm0), F_skcheck=trait.skcheck, F_dcheck=trait.dcheck, F_dvcheck=trait.dvcheck,
-                                       F_constraint=trait.constraint,
-                                       F_spad=BOOL_MAP[trait.spad], F_skpad=BOOL_MAP[trait.skpad], F_dpad=BOOL_MAP[trait.dpad], F_dvpad=BOOL_MAP[trait.dvpad],
-                                       F_bm0=trait.bm0, F_bn0=trait.bn0, F_bk0=trait.bk0, F_bn1=trait.bn1, F_bk1=trait.bk1, F_bk0max=trait.bk0max,
-                                       F_hdim=hdim, F_dtype=FWD_DTYPE_MAP[dtype])
-                    if_j = 'if' if j == 0 else 'else if'
-                    per_hdim_case = per_hdim_case + FMHA_FWD_API_PER_HDIM_CASE.format(F_if=if_j, F_hdim=hdim, F_hdim_v=hdim_v, F_inner_dispatch=inners)
-                if_i = 'if' if i == 0 else 'else if'
-                per_dtypes = per_dtypes + FMHA_FWD_API_PER_DTYPE.format(F_if=if_i, F_dtype=dtype, F_hdim_case=per_hdim_case)
-            per_tr_load += FMHA_FWD_API_PER_TRLOAD.format(F_if='if', F_trload_cond=tr_load_cond_map[tr_load], F_dtype_case=per_dtypes)
+                        if_k = "if" if k == 0 else "else if"
+                        inners = inners + FMHA_FWD_API_INNER_DISPATCH.format(
+                            F_if=if_k,
+                            F_mode=MODE_MAP[trait.mode],
+                            F_vlayout=LAYOUT_MAP[trait.vlayout],
+                            F_pipeline_enum=PIPELINE_ENUM_MAP[trait.pipeline_tag],
+                            F_logits=BOOL_MAP[trait.logits],
+                            F_mask=get_mask_map(self.mask_impl)[trait.mask],
+                            F_mask_check=get_mask_check_map(self.mask_impl)[trait.mask],
+                            F_bias_check=BIAS_CHECK_MAP[trait.bias],
+                            F_bias=BIAS_MAP[trait.bias],
+                            F_lse=BOOL_MAP[trait.lse],
+                            F_dropout=BOOL_MAP[trait.dropout],
+                            F_skip=BOOL_MAP[trait.skip],
+                            F_trload=BOOL_MAP[trait.tr_load],
+                            F_squant=BOOL_MAP[trait.squant],
+                            F_scheck=trait.scheck,
+                            F_seqtune=trait.seqtune(max_bm0),
+                            F_skcheck=trait.skcheck,
+                            F_dcheck=trait.dcheck,
+                            F_dvcheck=trait.dvcheck,
+                            F_constraint=trait.constraint,
+                            F_spad=BOOL_MAP[trait.spad],
+                            F_skpad=BOOL_MAP[trait.skpad],
+                            F_dpad=BOOL_MAP[trait.dpad],
+                            F_dvpad=BOOL_MAP[trait.dvpad],
+                            F_bm0=trait.bm0,
+                            F_bn0=trait.bn0,
+                            F_bk0=trait.bk0,
+                            F_bn1=trait.bn1,
+                            F_bk1=trait.bk1,
+                            F_bk0max=trait.bk0max,
+                            F_hdim=hdim,
+                            F_dtype=FWD_DTYPE_MAP[dtype],
+                        )
+                    if_j = "if" if j == 0 else "else if"
+                    per_hdim_case = per_hdim_case + FMHA_FWD_API_PER_HDIM_CASE.format(
+                        F_if=if_j, F_hdim=hdim, F_hdim_v=hdim_v, F_inner_dispatch=inners
+                    )
+                if_i = "if" if i == 0 else "else if"
+                per_dtypes = per_dtypes + FMHA_FWD_API_PER_DTYPE.format(
+                    F_if=if_i, F_dtype=dtype, F_hdim_case=per_hdim_case
+                )
+            per_tr_load += FMHA_FWD_API_PER_TRLOAD.format(
+                F_if="if",
+                F_trload_cond=tr_load_cond_map[tr_load],
+                F_dtype_case=per_dtypes,
+            )
         if not per_tr_load:
             # empty string we add some ignore to suppress warning in api
-            per_tr_load += '    (void)t ; (void)s ; (void)a;'
-        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_API.format(F_dispatch = per_tr_load)
+            per_tr_load += "    (void)t ; (void)s ; (void)a;"
+        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_API.format(F_dispatch=per_tr_load)
+
 
 @dataclass
 class FmhaFwdTileSize:
-    F_bm0        : int  # tile size along q seqlen (block size)
-    F_bn0        : int  # tile size along k seqlen
-    F_bk0        : int  # tile size along qk gemm unroll
-    F_bn1        : int  # tile size along v head_dim
-    F_bk1        : int  # tile size along kv gemm unroll
-    F_bk0max     : int  # total length of K0, used for pipeline that need load Q at once (or repeately load Q as a whole tile)
-    F_rm0        : int  # number of warps for gemm0 along q seqlen
-    F_rn0        : int  # number of warps for gemm0 along k seqlen
-    F_rk0        : int  # number of warps for gemm0 along head dim q (not used)
-    F_rm1        : int  # number of warps for gemm1 along q seqlen
-    F_rn1        : int  # number of warps for gemm1 along head dim v
-    F_rk1        : int  # number of warps for gemm1 along k seqlen (not used)
-    F_wm0        : int  # gemm0 warp size along m
-    F_wn0        : int  # gemm0 warp size along n
-    F_wk0        : int  # gemm0 warp size along k
-    F_wm1        : int  # gemm1 warp size along m
-    F_wn1        : int  # gemm1 warp size along n
-    F_wk1        : int  # gemm1 warp size along k
-    F_occupancy  : int  # occupancy, -1 will let pipeline decide the occupancy, other value will overwrite occupancy
-    F_constraint : CppConstraint = field(default_factory=lambda: CppConstraint())
+    F_bm0: int  # tile size along q seqlen (block size)
+    F_bn0: int  # tile size along k seqlen
+    F_bk0: int  # tile size along qk gemm unroll
+    F_bn1: int  # tile size along v head_dim
+    F_bk1: int  # tile size along kv gemm unroll
+    F_bk0max: int  # total length of K0, used for pipeline that need load Q at once (or repeately load Q as a whole tile)
+    F_rm0: int  # number of warps for gemm0 along q seqlen
+    F_rn0: int  # number of warps for gemm0 along k seqlen
+    F_rk0: int  # number of warps for gemm0 along head dim q (not used)
+    F_rm1: int  # number of warps for gemm1 along q seqlen
+    F_rn1: int  # number of warps for gemm1 along head dim v
+    F_rk1: int  # number of warps for gemm1 along k seqlen (not used)
+    F_wm0: int  # gemm0 warp size along m
+    F_wn0: int  # gemm0 warp size along n
+    F_wk0: int  # gemm0 warp size along k
+    F_wm1: int  # gemm1 warp size along m
+    F_wn1: int  # gemm1 warp size along n
+    F_wk1: int  # gemm1 warp size along k
+    F_occupancy: int  # occupancy, -1 will let pipeline decide the occupancy, other value will overwrite occupancy
+    F_constraint: CppConstraint = field(default_factory=lambda: CppConstraint())
 
     @property
     def name(self) -> str:
-        return f"b{self.F_bm0}x{self.F_bn0}x{self.F_bk0}x{self.F_bn1}x{self.F_bk1}x{self.F_bk0max}" +\
-        f"_r{self.F_rm0}x{self.F_rn0}x{self.F_rk0}_r{self.F_rm1}x{self.F_rn1}x{self.F_rk1}" +\
-        f"_w{self.F_wm0}x{self.F_wn0}x{self.F_wk0}_w{self.F_wm1}x{self.F_wn1}x{self.F_wk1}" +\
-        ("" if self.F_occupancy == -1 else f"_o{self.F_occupancy}")
+        return (
+            f"b{self.F_bm0}x{self.F_bn0}x{self.F_bk0}x{self.F_bn1}x{self.F_bk1}x{self.F_bk0max}"
+            + f"_r{self.F_rm0}x{self.F_rn0}x{self.F_rk0}_r{self.F_rm1}x{self.F_rn1}x{self.F_rk1}"
+            + f"_w{self.F_wm0}x{self.F_wn0}x{self.F_wk0}_w{self.F_wm1}x{self.F_wn1}x{self.F_wk1}"
+            + ("" if self.F_occupancy == -1 else f"_o{self.F_occupancy}")
+        )
+
 
 @dataclass
 class FmhaFwdKernel:
-    F_idx           : int  # this is not a tunable, but a counter to differentiate symbol
-    F_hdim          : int  # hdim
-    F_dtype         : str  # data type
-    F_mode          : str  # value from MODE_MAP
-    F_tile          : FmhaFwdTileSize
-    F_pipeline      : FmhaFwdPipeline
-    mask_impl       : str
+    F_idx: int  # this is not a tunable, but a counter to differentiate symbol
+    F_hdim: int  # hdim
+    F_dtype: str  # data type
+    F_mode: str  # value from MODE_MAP
+    F_tile: FmhaFwdTileSize
+    F_pipeline: FmhaFwdPipeline
+    mask_impl: str
 
     @property
     def template(self) -> str:
-        kernel_body = str()
-        return FMHA_FWD_KERNEL_HEADER + \
-            FMHA_FWD_KERNEL_BODY.format(
-                F_idx           = self.F_idx,
-                F_hdim          = self.F_hdim,
-                F_dtype         = FWD_DTYPE_MAP[self.F_dtype],
-                F_bm0           = self.F_tile.F_bm0,
-                F_bn0           = self.F_tile.F_bn0,
-                F_bk0           = self.F_tile.F_bk0,
-                F_bn1           = self.F_tile.F_bn1,
-                F_bk1           = self.F_tile.F_bk1,
-                F_bk0max        = self.F_tile.F_bk0max,
-                F_rm0           = self.F_tile.F_rm0,
-                F_rn0           = self.F_tile.F_rn0,
-                F_rk0           = self.F_tile.F_rk0,
-                F_rm1           = self.F_tile.F_rm1,
-                F_rn1           = self.F_tile.F_rn1,
-                F_rk1           = self.F_tile.F_rk1,
-                F_wm0           = self.F_tile.F_wm0,
-                F_wn0           = self.F_tile.F_wn0,
-                F_wk0           = self.F_tile.F_wk0,
-                F_wm1           = self.F_tile.F_wm1,
-                F_wn1           = self.F_tile.F_wn1,
-                F_wk1           = self.F_tile.F_wk1,
-                F_vlayout       = LAYOUT_MAP[self.F_pipeline.F_vlayout],
-                F_spad          = BOOL_MAP[self.F_pipeline.F_spad],
-                F_skpad         = BOOL_MAP[self.F_pipeline.F_skpad],
-                F_dpad          = BOOL_MAP[self.F_pipeline.F_dpad],
-                F_dvpad         = BOOL_MAP[self.F_pipeline.F_dvpad],
-                F_logits        = BOOL_MAP[self.F_pipeline.F_logits],
-                F_bias          = BIAS_MAP[self.F_pipeline.F_bias],
-                F_lse           = BOOL_MAP[self.F_pipeline.F_lse],
-                F_dropout       = BOOL_MAP[self.F_pipeline.F_dropout],
-                F_squant        = BOOL_MAP[self.F_pipeline.F_squant],
-                F_skip          = BOOL_MAP[self.F_pipeline.F_skip],
-                F_occupancy     = self.F_tile.F_occupancy,
-                F_pipeline_enum = PIPELINE_ENUM_MAP[self.F_pipeline.tag],
-                F_mask          = get_mask_map(self.mask_impl)[self.F_pipeline.F_mask],
-                F_mode          = MODE_MAP[self.F_mode],
-                F_pipeline      = PIPELINE_MAP[self.F_pipeline.tag],
-                F_trload        = BOOL_MAP[self.F_pipeline.F_trload])
+        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_KERNEL_BODY.format(
+            F_idx=self.F_idx,
+            F_hdim=self.F_hdim,
+            F_dtype=FWD_DTYPE_MAP[self.F_dtype],
+            F_bm0=self.F_tile.F_bm0,
+            F_bn0=self.F_tile.F_bn0,
+            F_bk0=self.F_tile.F_bk0,
+            F_bn1=self.F_tile.F_bn1,
+            F_bk1=self.F_tile.F_bk1,
+            F_bk0max=self.F_tile.F_bk0max,
+            F_rm0=self.F_tile.F_rm0,
+            F_rn0=self.F_tile.F_rn0,
+            F_rk0=self.F_tile.F_rk0,
+            F_rm1=self.F_tile.F_rm1,
+            F_rn1=self.F_tile.F_rn1,
+            F_rk1=self.F_tile.F_rk1,
+            F_wm0=self.F_tile.F_wm0,
+            F_wn0=self.F_tile.F_wn0,
+            F_wk0=self.F_tile.F_wk0,
+            F_wm1=self.F_tile.F_wm1,
+            F_wn1=self.F_tile.F_wn1,
+            F_wk1=self.F_tile.F_wk1,
+            F_vlayout=LAYOUT_MAP[self.F_pipeline.F_vlayout],
+            F_spad=BOOL_MAP[self.F_pipeline.F_spad],
+            F_skpad=BOOL_MAP[self.F_pipeline.F_skpad],
+            F_dpad=BOOL_MAP[self.F_pipeline.F_dpad],
+            F_dvpad=BOOL_MAP[self.F_pipeline.F_dvpad],
+            F_logits=BOOL_MAP[self.F_pipeline.F_logits],
+            F_bias=BIAS_MAP[self.F_pipeline.F_bias],
+            F_lse=BOOL_MAP[self.F_pipeline.F_lse],
+            F_dropout=BOOL_MAP[self.F_pipeline.F_dropout],
+            F_squant=BOOL_MAP[self.F_pipeline.F_squant],
+            F_skip=BOOL_MAP[self.F_pipeline.F_skip],
+            F_occupancy=self.F_tile.F_occupancy,
+            F_pipeline_enum=PIPELINE_ENUM_MAP[self.F_pipeline.tag],
+            F_mask=get_mask_map(self.mask_impl)[self.F_pipeline.F_mask],
+            F_mode=MODE_MAP[self.F_mode],
+            F_pipeline=PIPELINE_MAP[self.F_pipeline.tag],
+            F_trload=BOOL_MAP[self.F_pipeline.F_trload],
+        )
 
     @property
     def name(self) -> str:
         # TODO: we don't encode idx here
-        return f"fmha_fwd_d{self.F_hdim}_{self.F_dtype}_{self.F_mode}_" + \
-                self.F_tile.name + '_' + self.F_pipeline.name
+        return (
+            f"fmha_fwd_d{self.F_hdim}_{self.F_dtype}_{self.F_mode}_"
+            + self.F_tile.name
+            + "_"
+            + self.F_pipeline.name
+        )
 
     @property
     def filename(self) -> str:
@@ -505,74 +600,612 @@ class FmhaFwdKernel:
 
     def api_trait(self) -> FmhaFwdApiTrait:
         return FmhaFwdApiTrait(
-                pipeline_tag=self.F_pipeline.tag,
-                hdim=str(self.F_hdim),
-                dtype=self.F_dtype,
-                mode=self.F_mode,
-                bm0=self.F_tile.F_bm0,
-                bn0=self.F_tile.F_bn0,
-                bk0=self.F_tile.F_bk0,
-                bn1=self.F_tile.F_bn1,
-                bk1=self.F_tile.F_bk1,
-                bk0max=self.F_tile.F_bk0max,
-                vlayout=self.F_pipeline.F_vlayout,
-                mask=self.F_pipeline.F_mask,
-                logits=self.F_pipeline.F_logits,
-                bias=self.F_pipeline.F_bias,
-                lse=self.F_pipeline.F_lse,
-                dropout=self.F_pipeline.F_dropout,
-                squant=self.F_pipeline.F_squant,
-                spad=self.F_pipeline.F_spad,
-                skpad=self.F_pipeline.F_skpad,
-                dpad=self.F_pipeline.F_dpad,
-                dvpad=self.F_pipeline.F_dvpad,
-                skip=self.F_pipeline.F_skip,
-                tr_load=self.F_pipeline.F_trload,
-                constraint=self.F_tile.F_constraint & self.F_pipeline.F_constraint)
+            pipeline_tag=self.F_pipeline.tag,
+            hdim=str(self.F_hdim),
+            dtype=self.F_dtype,
+            mode=self.F_mode,
+            bm0=self.F_tile.F_bm0,
+            bn0=self.F_tile.F_bn0,
+            bk0=self.F_tile.F_bk0,
+            bn1=self.F_tile.F_bn1,
+            bk1=self.F_tile.F_bk1,
+            bk0max=self.F_tile.F_bk0max,
+            vlayout=self.F_pipeline.F_vlayout,
+            mask=self.F_pipeline.F_mask,
+            logits=self.F_pipeline.F_logits,
+            bias=self.F_pipeline.F_bias,
+            lse=self.F_pipeline.F_lse,
+            dropout=self.F_pipeline.F_dropout,
+            squant=self.F_pipeline.F_squant,
+            spad=self.F_pipeline.F_spad,
+            skpad=self.F_pipeline.F_skpad,
+            dpad=self.F_pipeline.F_dpad,
+            dvpad=self.F_pipeline.F_dvpad,
+            skip=self.F_pipeline.F_skip,
+            tr_load=self.F_pipeline.F_trload,
+            constraint=self.F_tile.F_constraint & self.F_pipeline.F_constraint,
+        )
+
 
 class KernelComponentFactory:
     # TODO: design a more practical way to do it
     # this is current supported tile size per hdim
     @staticmethod
-    def get_hdim_tile_size_dict(dtype : str) -> Optional[dict]:
-        if dtype == 'fp32':
+    def get_hdim_tile_size_dict(dtype: str) -> Optional[dict]:
+        if dtype == "fp32":
             return {
                 #                             bm0, bn0, bk0, bn1, bk1,
-                ( 32,  32) : [FmhaFwdTileSize( 64,  64,  16,  32,  32,   32,  4, 1, 1,  4, 1, 1,  16, 16, 16,  16, 16, 16,  -1)],
-                ( 48,  48) : [FmhaFwdTileSize( 32, 128,  16,  48,  16,   48,  2, 1, 1,  2, 1, 1,  16, 16, 16,  16, 16, 16,  -1),
-                              FmhaFwdTileSize(128,  64,  16,  48,  32,   48,  4, 1, 1,  4, 1, 1,  16, 16, 16,  16, 16, 16,  -1)],
-                ( 64,  64) : [FmhaFwdTileSize( 64,  64,  32,  64,  32,   64,  4, 1, 1,  4, 1, 1,  16, 16, 16,  16, 16, 16,  -1)],
-                ( 96, 128) : [FmhaFwdTileSize(128,  64,  32, 128,  32,   96,  4, 1, 1,  4, 1, 1,  16, 16, 16,  16, 16, 16,  -1)],
-                (128, 128) : [FmhaFwdTileSize( 32, 128,  32, 128,  16,  128,  2, 1, 1,  2, 1, 1,  16, 16, 16,  16, 16, 16,  -1),
-                              FmhaFwdTileSize(128,  64,  32, 128,  32,  128,  4, 1, 1,  4, 1, 1,  16, 16, 16,  16, 16, 16,  -1)],
-                (192, 192) : [FmhaFwdTileSize( 64,  64,  32, 192,  32,  192,  4, 1, 1,  4, 1, 1,  16, 16, 16,  16, 16, 16,  -1)],
-                (256, 256) : [FmhaFwdTileSize( 64,  64,  32, 256,  32,  256,  4, 1, 1,  4, 1, 1,  16, 16, 16,  16, 16, 16,  -1)],
+                (32, 32): [
+                    FmhaFwdTileSize(
+                        64,
+                        64,
+                        16,
+                        32,
+                        32,
+                        32,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        -1,
+                    )
+                ],
+                (48, 48): [
+                    FmhaFwdTileSize(
+                        32,
+                        128,
+                        16,
+                        48,
+                        16,
+                        48,
+                        2,
+                        1,
+                        1,
+                        2,
+                        1,
+                        1,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        -1,
+                    ),
+                    FmhaFwdTileSize(
+                        128,
+                        64,
+                        16,
+                        48,
+                        32,
+                        48,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        -1,
+                    ),
+                ],
+                (64, 64): [
+                    FmhaFwdTileSize(
+                        64,
+                        64,
+                        32,
+                        64,
+                        32,
+                        64,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        -1,
+                    )
+                ],
+                (96, 128): [
+                    FmhaFwdTileSize(
+                        128,
+                        64,
+                        32,
+                        128,
+                        32,
+                        96,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        -1,
+                    )
+                ],
+                (128, 128): [
+                    FmhaFwdTileSize(
+                        32,
+                        128,
+                        32,
+                        128,
+                        16,
+                        128,
+                        2,
+                        1,
+                        1,
+                        2,
+                        1,
+                        1,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        -1,
+                    ),
+                    FmhaFwdTileSize(
+                        128,
+                        64,
+                        32,
+                        128,
+                        32,
+                        128,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        -1,
+                    ),
+                ],
+                (192, 192): [
+                    FmhaFwdTileSize(
+                        64,
+                        64,
+                        32,
+                        192,
+                        32,
+                        192,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        -1,
+                    )
+                ],
+                (256, 256): [
+                    FmhaFwdTileSize(
+                        64,
+                        64,
+                        32,
+                        256,
+                        32,
+                        256,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        -1,
+                    )
+                ],
             }
-        elif dtype == 'fp16' or dtype == 'bf16':
+        elif dtype == "fp16" or dtype == "bf16":
             return {
-                (32, 32)  : [FmhaFwdTileSize(128, 64,  16, 32,  32,  32,   4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,  -1)],
-                (64, 64)  : [FmhaFwdTileSize(16, 32,  64, 64,  32,  64,   1, 1, 1,  1, 1, 1,  16, 16, 32,  16, 16, 32,  -1),
-                             FmhaFwdTileSize(32, 32,  64, 64,  32,  64,   1, 1, 1,  1, 1, 1,  32, 32, 16,  32, 32, 16,  -1),
-                             FmhaFwdTileSize(128, 64,  32, 64,  32,  64,   4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,  -1)],
-                (96, 128) : [FmhaFwdTileSize(128, 128, 32, 128, 32,  96,   4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,  -1)],
-                (128,128) : [FmhaFwdTileSize(16, 32, 64, 128, 32,  128,  1, 1, 1,  1, 1, 1,  16, 16, 32,  16, 16, 32,  -1),
-                             FmhaFwdTileSize(32, 32, 128, 128, 32,  128,  1, 1, 1,  1, 1, 1,  32, 32, 16,  32, 32, 16,  -1),
-                             FmhaFwdTileSize(128, 64, 32, 128, 16,  128,  4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,  -1),
-                             FmhaFwdTileSize(128, 128, 32, 128, 32,  128,  4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,  -1)],
+                (32, 32): [
+                    FmhaFwdTileSize(
+                        128,
+                        64,
+                        16,
+                        32,
+                        32,
+                        32,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        32,
+                        32,
+                        16,
+                        32,
+                        32,
+                        16,
+                        -1,
+                    )
+                ],
+                (64, 64): [
+                    FmhaFwdTileSize(
+                        16,
+                        32,
+                        64,
+                        64,
+                        32,
+                        64,
+                        1,
+                        1,
+                        1,
+                        1,
+                        1,
+                        1,
+                        16,
+                        16,
+                        32,
+                        16,
+                        16,
+                        32,
+                        -1,
+                    ),
+                    FmhaFwdTileSize(
+                        32,
+                        32,
+                        64,
+                        64,
+                        32,
+                        64,
+                        1,
+                        1,
+                        1,
+                        1,
+                        1,
+                        1,
+                        32,
+                        32,
+                        16,
+                        32,
+                        32,
+                        16,
+                        -1,
+                    ),
+                    FmhaFwdTileSize(
+                        128,
+                        64,
+                        32,
+                        64,
+                        32,
+                        64,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        32,
+                        32,
+                        16,
+                        32,
+                        32,
+                        16,
+                        -1,
+                    ),
+                ],
+                (96, 128): [
+                    FmhaFwdTileSize(
+                        128,
+                        128,
+                        32,
+                        128,
+                        32,
+                        96,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        32,
+                        32,
+                        16,
+                        32,
+                        32,
+                        16,
+                        -1,
+                    )
+                ],
+                (128, 128): [
+                    FmhaFwdTileSize(
+                        16,
+                        32,
+                        64,
+                        128,
+                        32,
+                        128,
+                        1,
+                        1,
+                        1,
+                        1,
+                        1,
+                        1,
+                        16,
+                        16,
+                        32,
+                        16,
+                        16,
+                        32,
+                        -1,
+                    ),
+                    FmhaFwdTileSize(
+                        32,
+                        32,
+                        128,
+                        128,
+                        32,
+                        128,
+                        1,
+                        1,
+                        1,
+                        1,
+                        1,
+                        1,
+                        32,
+                        32,
+                        16,
+                        32,
+                        32,
+                        16,
+                        -1,
+                    ),
+                    FmhaFwdTileSize(
+                        128,
+                        64,
+                        32,
+                        128,
+                        16,
+                        128,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        32,
+                        32,
+                        16,
+                        32,
+                        32,
+                        16,
+                        -1,
+                    ),
+                    FmhaFwdTileSize(
+                        128,
+                        128,
+                        32,
+                        128,
+                        32,
+                        128,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        32,
+                        32,
+                        16,
+                        32,
+                        32,
+                        16,
+                        -1,
+                    ),
+                ],
                 # (160,160) : [FmhaFwdTileSize(128, 128, 32, 160, 32,  160,  4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,   1)],
-                (192,128) : [FmhaFwdTileSize(128, 128, 32, 128, 32,  192,  4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,  -1)],
-                (192,192) : [FmhaFwdTileSize(128, 128, 32, 192, 32,  192,  4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,   1)],
-                (256,256) : [FmhaFwdTileSize(128, 128, 32, 256, 32,  256,  4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,  -1)],
+                (192, 128): [
+                    FmhaFwdTileSize(
+                        128,
+                        128,
+                        32,
+                        128,
+                        32,
+                        192,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        32,
+                        32,
+                        16,
+                        32,
+                        32,
+                        16,
+                        -1,
+                    )
+                ],
+                (192, 192): [
+                    FmhaFwdTileSize(
+                        128,
+                        128,
+                        32,
+                        192,
+                        32,
+                        192,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        32,
+                        32,
+                        16,
+                        32,
+                        32,
+                        16,
+                        1,
+                    )
+                ],
+                (256, 256): [
+                    FmhaFwdTileSize(
+                        128,
+                        128,
+                        32,
+                        256,
+                        32,
+                        256,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        32,
+                        32,
+                        16,
+                        32,
+                        32,
+                        16,
+                        -1,
+                    )
+                ],
             }
-        elif dtype == 'fp8' or dtype == 'fp8bf16':
+        elif dtype == "fp8" or dtype == "fp8bf16":
             return {
-                (64,64 )  : [FmhaFwdTileSize(128, 64,  32, 64,  32,  64,   2, 1, 1,  2, 1, 1,  32, 32, 32,  32, 32, 32,  -1)],
-                (128,128) : [FmhaFwdTileSize(128, 128, 32, 128, 32,  128,  4, 1, 1,  4, 1, 1,  32, 32, 32,  32, 32, 32,  -1)],
-                (256,256) : [FmhaFwdTileSize(128, 128, 32, 256, 32,  256,  4, 1, 1,  4, 1, 1,  32, 32, 32,  32, 32, 32,  -1)],
+                (64, 64): [
+                    FmhaFwdTileSize(
+                        128,
+                        64,
+                        32,
+                        64,
+                        32,
+                        64,
+                        2,
+                        1,
+                        1,
+                        2,
+                        1,
+                        1,
+                        32,
+                        32,
+                        32,
+                        32,
+                        32,
+                        32,
+                        -1,
+                    )
+                ],
+                (128, 128): [
+                    FmhaFwdTileSize(
+                        128,
+                        128,
+                        32,
+                        128,
+                        32,
+                        128,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        32,
+                        32,
+                        32,
+                        32,
+                        32,
+                        32,
+                        -1,
+                    )
+                ],
+                (256, 256): [
+                    FmhaFwdTileSize(
+                        128,
+                        128,
+                        32,
+                        256,
+                        32,
+                        256,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        32,
+                        32,
+                        32,
+                        32,
+                        32,
+                        32,
+                        -1,
+                    )
+                ],
             }
-        elif dtype == 'fp8fp32':
+        elif dtype == "fp8fp32":
             return {
-                (128,128) : [FmhaFwdTileSize(128, 128, 32, 128, 32,  128,  4, 1, 1,  4, 1, 1,  32, 32, 32,  32, 32, 32,  -1)],
+                (128, 128): [
+                    FmhaFwdTileSize(
+                        128,
+                        128,
+                        32,
+                        128,
+                        32,
+                        128,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        32,
+                        32,
+                        32,
+                        32,
+                        32,
+                        32,
+                        -1,
+                    )
+                ],
             }
         else:
             return None
@@ -586,95 +1219,425 @@ class KernelComponentFactory:
         # TODO: currently for qr pipeline, let 't' padding to appear later!!
         # TODO: how to design this more generic?
         pipelines = []
-        if dtype in ['fp32']:
-            squant = 'f'
-            for logits, mask, bias, lse, dropout, skip in itertools.product(["t", "f"], get_mask_map(mask_impl).keys(), BIAS_MAP.keys(), ["t", "f"], ["t", "f"], ["t", "f"]):
-                pipelines.append(FmhaFwdPipeline('qr', 'row', 'f', 'f', 'f', 'f', logits, bias, lse, dropout, squant, mask, skip, 'f'))
-                pipelines.append(FmhaFwdPipeline('qr', 'row', 'f', 't', 'f', 'f', logits, bias, lse, dropout, squant, mask, skip, 'f'))
-                pipelines.append(FmhaFwdPipeline('qr', 'row', 't', 't', 't', 't', logits, bias, lse, dropout, squant, mask, skip, 'f'))
-        elif dtype in ['fp16', 'bf16']:
-            squant = 'f'
-            for logits, mask, bias, lse, dropout, skip in itertools.product(["t", "f"], get_mask_map(mask_impl).keys(), BIAS_MAP.keys(), ["t", "f"], ["t", "f"], ["t", "f"]):
+        if dtype in ["fp32"]:
+            squant = "f"
+            for logits, mask, bias, lse, dropout, skip in itertools.product(
+                ["t", "f"],
+                get_mask_map(mask_impl).keys(),
+                BIAS_MAP.keys(),
+                ["t", "f"],
+                ["t", "f"],
+                ["t", "f"],
+            ):
+                pipelines.append(
+                    FmhaFwdPipeline(
+                        "qr",
+                        "row",
+                        "f",
+                        "f",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        lse,
+                        dropout,
+                        squant,
+                        mask,
+                        skip,
+                        "f",
+                    )
+                )
+                pipelines.append(
+                    FmhaFwdPipeline(
+                        "qr",
+                        "row",
+                        "f",
+                        "t",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        lse,
+                        dropout,
+                        squant,
+                        mask,
+                        skip,
+                        "f",
+                    )
+                )
+                pipelines.append(
+                    FmhaFwdPipeline(
+                        "qr",
+                        "row",
+                        "t",
+                        "t",
+                        "t",
+                        "t",
+                        logits,
+                        bias,
+                        lse,
+                        dropout,
+                        squant,
+                        mask,
+                        skip,
+                        "f",
+                    )
+                )
+        elif dtype in ["fp16", "bf16"]:
+            squant = "f"
+            for logits, mask, bias, lse, dropout, skip in itertools.product(
+                ["t", "f"],
+                get_mask_map(mask_impl).keys(),
+                BIAS_MAP.keys(),
+                ["t", "f"],
+                ["t", "f"],
+                ["t", "f"],
+            ):
                 if hdim == 256 and hdim_v == 256:
-                    pipelines.append(FmhaFwdPipeline('qr', 'row', 'f', 'f', 'f', 'f', logits, bias, lse, dropout, squant, mask, skip, 'f'))
+                    pipelines.append(
+                        FmhaFwdPipeline(
+                            "qr",
+                            "row",
+                            "f",
+                            "f",
+                            "f",
+                            "f",
+                            logits,
+                            bias,
+                            lse,
+                            dropout,
+                            squant,
+                            mask,
+                            skip,
+                            "f",
+                        )
+                    )
                     # the below two is used for hdim vectorize load
-                    pipelines.append(FmhaFwdPipeline('qr', 'row', 't', 't', 'f', 'f', logits, bias, lse, dropout, squant, mask, skip, 'f'))
-                    pipelines.append(FmhaFwdPipeline('qr', 'row', 't', 't', 't', 't', logits, bias, lse, dropout, squant, mask, skip, 'f'))
+                    pipelines.append(
+                        FmhaFwdPipeline(
+                            "qr",
+                            "row",
+                            "t",
+                            "t",
+                            "f",
+                            "f",
+                            logits,
+                            bias,
+                            lse,
+                            dropout,
+                            squant,
+                            mask,
+                            skip,
+                            "f",
+                        )
+                    )
+                    pipelines.append(
+                        FmhaFwdPipeline(
+                            "qr",
+                            "row",
+                            "t",
+                            "t",
+                            "t",
+                            "t",
+                            logits,
+                            bias,
+                            lse,
+                            dropout,
+                            squant,
+                            mask,
+                            skip,
+                            "f",
+                        )
+                    )
                 else:
                     if bias == "bias":
                         # TODO: rocm 6.2 compiler problem if using qr_async for bias case
-                        pipelines.append(FmhaFwdPipeline('qr', 'row', 'f', 'f', 'f', 'f', logits, bias, lse, dropout, squant, mask, skip, 'f'))
-                        pipelines.append(FmhaFwdPipeline('qr', 'row', 't', 't', 't', 't', logits, bias, lse, dropout, squant, mask, skip, 'f'))
+                        pipelines.append(
+                            FmhaFwdPipeline(
+                                "qr",
+                                "row",
+                                "f",
+                                "f",
+                                "f",
+                                "f",
+                                logits,
+                                bias,
+                                lse,
+                                dropout,
+                                squant,
+                                mask,
+                                skip,
+                                "f",
+                            )
+                        )
+                        pipelines.append(
+                            FmhaFwdPipeline(
+                                "qr",
+                                "row",
+                                "t",
+                                "t",
+                                "t",
+                                "t",
+                                logits,
+                                bias,
+                                lse,
+                                dropout,
+                                squant,
+                                mask,
+                                skip,
+                                "f",
+                            )
+                        )
                     else:
-                        pipelines.append(FmhaFwdPipeline('qr_async', 'row', 't', 'f', 't', 't', logits, bias, lse, dropout, squant, mask, skip, 'f'))
-                        pipelines.append(FmhaFwdPipeline('qr_async', 'row', 't', 't', 't', 't', logits, bias, lse, dropout, squant, mask, skip, 'f'))
-                        if (hdim, hdim_v) in [(64, 64), (128, 128)] and logits == "f" and bias == "no" and dropout == "f" and lse == "f" and skip == "f":
-                            pipelines.append(FmhaFwdPipeline('qr_async_trload', 'row', 'f', 'f', 'f', 'f', logits, bias, lse, dropout, squant, mask, skip, 't'))
-                            pipelines.append(FmhaFwdPipeline('qr_async_trload', 'row', 'f', 'f', 't', 't', logits, bias, lse, dropout, squant, mask, skip, 't'))
+                        pipelines.append(
+                            FmhaFwdPipeline(
+                                "qr_async",
+                                "row",
+                                "t",
+                                "f",
+                                "t",
+                                "t",
+                                logits,
+                                bias,
+                                lse,
+                                dropout,
+                                squant,
+                                mask,
+                                skip,
+                                "f",
+                            )
+                        )
+                        pipelines.append(
+                            FmhaFwdPipeline(
+                                "qr_async",
+                                "row",
+                                "t",
+                                "t",
+                                "t",
+                                "t",
+                                logits,
+                                bias,
+                                lse,
+                                dropout,
+                                squant,
+                                mask,
+                                skip,
+                                "f",
+                            )
+                        )
+                        if (
+                            (hdim, hdim_v) in [(64, 64), (128, 128)]
+                            and logits == "f"
+                            and bias == "no"
+                            and dropout == "f"
+                            and lse == "f"
+                            and skip == "f"
+                        ):
+                            pipelines.append(
+                                FmhaFwdPipeline(
+                                    "qr_async_trload",
+                                    "row",
+                                    "f",
+                                    "f",
+                                    "f",
+                                    "f",
+                                    logits,
+                                    bias,
+                                    lse,
+                                    dropout,
+                                    squant,
+                                    mask,
+                                    skip,
+                                    "t",
+                                )
+                            )
+                            pipelines.append(
+                                FmhaFwdPipeline(
+                                    "qr_async_trload",
+                                    "row",
+                                    "f",
+                                    "f",
+                                    "t",
+                                    "t",
+                                    logits,
+                                    bias,
+                                    lse,
+                                    dropout,
+                                    squant,
+                                    mask,
+                                    skip,
+                                    "t",
+                                )
+                            )
+
                     if receipt == 1 and bias != "bias":
-                        pipelines.append(FmhaFwdPipeline('qr', 'row', 't', 't', 't', 't', logits, bias, lse, dropout, squant, mask, skip, 'f')) # TODO: cover arbitraty hdim
-        elif dtype in ['fp8', 'fp8bf16', 'fp8fp32']:
+                        pipelines.append(
+                            FmhaFwdPipeline(
+                                "qr",
+                                "row",
+                                "t",
+                                "t",
+                                "t",
+                                "t",
+                                logits,
+                                bias,
+                                lse,
+                                dropout,
+                                squant,
+                                mask,
+                                skip,
+                                "f",
+                            )
+                        )  # TODO: cover arbitraty hdim
+        elif dtype in ["fp8", "fp8bf16", "fp8fp32"]:
             # no need lse/dropout kernels
-            for logits, squant, mask, bias in itertools.product(["f"], ["t", "f"], get_mask_map(mask_impl).keys(), BIAS_MAP.keys()):
-                pipelines.append(FmhaFwdPipeline('qr', 'row', 'f', 'f', 'f', 'f', logits, bias, 'f', 'f', squant, mask, 'f', 'f'))
-                pipelines.append(FmhaFwdPipeline('qr', 'row', 't', 't', 'f', 'f', logits, bias, 'f', 'f', squant, mask, 'f', 'f'))
-        elif dtype in ['fp8fp16', 'bf8']:
+            for logits, squant, mask, bias in itertools.product(
+                ["f"], ["t", "f"], get_mask_map(mask_impl).keys(), BIAS_MAP.keys()
+            ):
+                pipelines.append(
+                    FmhaFwdPipeline(
+                        "qr",
+                        "row",
+                        "f",
+                        "f",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        "f",
+                        "f",
+                        squant,
+                        mask,
+                        "f",
+                        "f",
+                    )
+                )
+                pipelines.append(
+                    FmhaFwdPipeline(
+                        "qr",
+                        "row",
+                        "t",
+                        "t",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        "f",
+                        "f",
+                        squant,
+                        mask,
+                        "f",
+                        "f",
+                    )
+                )
+        elif dtype in ["fp8fp16", "bf8"]:
             # TODO
             None
         else:
             assert False
         return pipelines
 
+
 class CustomFactory(KernelComponentFactory):
     @staticmethod
-    def get_hdim_tile_size_dict(dtype : str) -> Optional[dict]:
+    def get_hdim_tile_size_dict(dtype: str) -> Optional[dict]:
         result = KernelComponentFactory.get_hdim_tile_size_dict(dtype)
-        if dtype == 'fp16' or dtype == 'bf16':
+        if dtype == "fp16" or dtype == "bf16":
             if (128, 128) in result.keys():
-                result[(128, 128)].insert(0, FmhaFwdTileSize( 64, 128, 64, 128, 64,  128,  4, 1, 1,  4, 1, 1,  16, 16, 16,  16, 16, 16,  -1, CppConstraint('get_num_blocks(128) < num_cus * min_cu_util_rate')))
+                result[(128, 128)].insert(
+                    0,
+                    FmhaFwdTileSize(
+                        64,
+                        128,
+                        64,
+                        128,
+                        64,
+                        128,
+                        4,
+                        1,
+                        1,
+                        4,
+                        1,
+                        1,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        16,
+                        -1,
+                        CppConstraint(
+                            "get_num_blocks(128) < num_cus * min_cu_util_rate"
+                        ),
+                    ),
+                )
         return result
 
-def get_fwd_blobs(kernel_filter : Optional[str], receipt, optdim_list, mask_impl) -> Tuple[FmhaFwdApiPool, List[FmhaFwdKernel]]:
+
+def get_fwd_blobs(
+    kernel_filter: Optional[str], receipt, optdim_list, mask_impl
+) -> Tuple[FmhaFwdApiPool, List[FmhaFwdKernel]]:
     gen = list()
     api_pool = FmhaFwdApiPool(mask_impl)
 
-    factory = CustomFactory if os.environ.get('CK_TILE_FMHA_FWD_CUSTOM_FACTORY', '0') == '1' else KernelComponentFactory
+    factory = (
+        CustomFactory
+        if os.environ.get("CK_TILE_FMHA_FWD_CUSTOM_FACTORY", "0") == "1"
+        else KernelComponentFactory
+    )
 
     for dtype in FWD_DTYPE_MAP.keys():
         d = factory.get_hdim_tile_size_dict(dtype)
-        if d == None:
+        if d is None:
             continue
-        #for hdim_str, mode, mask, bias, lse in itertools.product(d.keys(), MODE_MAP.keys(), MASK_MAP.keys(), ["t", "f"], ["t", "f"]):
-        for ((hdim, hdim_v), tiles), mode in itertools.product(d.items(), MODE_MAP.keys()):
+        # for hdim_str, mode, mask, bias, lse in itertools.product(d.keys(), MODE_MAP.keys(), MASK_MAP.keys(), ["t", "f"], ["t", "f"]):
+        for ((hdim, hdim_v), tiles), mode in itertools.product(
+            d.items(), MODE_MAP.keys()
+        ):
             for tile, next_tile in zip(tiles, tiles[1:]):
-                assert next_tile.F_bm0 >= tile.F_bm0, 'Tiles must be ordered by increasing bm0'
-            for tile, pipeline in itertools.product(tiles, factory.get_pipelines(dtype, hdim, hdim_v, receipt, mask_impl)):
+                assert next_tile.F_bm0 >= tile.F_bm0, (
+                    "Tiles must be ordered by increasing bm0"
+                )
+            for tile, pipeline in itertools.product(
+                tiles, factory.get_pipelines(dtype, hdim, hdim_v, receipt, mask_impl)
+            ):
                 if mode == "group":
-                    if pipeline.F_spad != 't' or pipeline.F_skpad != 't':
+                    if pipeline.F_spad != "t" or pipeline.F_skpad != "t":
                         # in group mode, spad/skpad must be true, since we can't predict if seqlen of current batch need pad or not
                         continue
                 if (hdim, hdim_v) == (192, 128):
                     # NOTE: this is used to speedup deepseek prefill case, we don't gen training
-                    if pipeline.F_bias != 'no' or pipeline.F_dropout == 't':
+                    if pipeline.F_bias != "no" or pipeline.F_dropout == "t":
                         continue
-                if dtype != 'fp32':
-                    if pipeline.tag != 'qr_async_trload' and (((hdim, hdim_v) == (128, 128) and tile.F_bn0 != 128) or ((hdim, hdim_v) != (128, 128) and tile.F_bm0 != 128)):
+                if dtype != "fp32":
+                    if pipeline.tag != "qr_async_trload" and (
+                        ((hdim, hdim_v) == (128, 128) and tile.F_bn0 != 128)
+                        or ((hdim, hdim_v) != (128, 128) and tile.F_bm0 != 128)
+                    ):
                         # non qr_async_trload only support km0=128 tile size when hdim is not 128
                         # non qr_async only support kn0=128 tile size when hdim is 128
                         continue
-                    if pipeline.tag == 'qr_async_trload' and (((hdim, hdim_v) == (128, 128) and tile.F_bn0 == 128) or ((hdim, hdim_v) not in [(64, 64), (128, 128)])):
+                    if pipeline.tag == "qr_async_trload" and (
+                        ((hdim, hdim_v) == (128, 128) and tile.F_bn0 == 128)
+                        or ((hdim, hdim_v) not in [(64, 64), (128, 128)])
+                    ):
                         continue
                 # logits_soft_cap is only allowed if no bias
-                if not ((pipeline.F_logits == 't' and pipeline.F_bias == 'no') or pipeline.F_logits == 'f'):
+                if not (
+                    (pipeline.F_logits == "t" and pipeline.F_bias == "no")
+                    or pipeline.F_logits == "f"
+                ):
                     continue
-                k = FmhaFwdKernel(F_idx=0,
-                                  F_hdim=hdim,
-                                  F_dtype=dtype,
-                                  F_mode=mode,
-                                  F_tile=tile,
-                                  F_pipeline=pipeline,
-                                  mask_impl=mask_impl)
-                if kernel_filter != '':
+                k = FmhaFwdKernel(
+                    F_idx=0,
+                    F_hdim=hdim,
+                    F_dtype=dtype,
+                    F_mode=mode,
+                    F_tile=tile,
+                    F_pipeline=pipeline,
+                    mask_impl=mask_impl,
+                )
+                if kernel_filter != "":
                     if not fnmatch.fnmatch(k.name, kernel_filter):
                         continue
                 if optdim_list != [-1]:
@@ -682,80 +1645,80 @@ def get_fwd_blobs(kernel_filter : Optional[str], receipt, optdim_list, mask_impl
                         continue
                 # 2 - Flash attention integration
                 if receipt in (2, 3):
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_bias in ['no', 'alibi']
-                    cond &= pipeline.F_squant == 'f'
-                    cond &= pipeline.F_skip == 'f'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_bias in ["no", "alibi"]
+                    cond &= pipeline.F_squant == "f"
+                    cond &= pipeline.F_skip == "f"
                     if not cond:
                         continue
                 # PyTorch integration
                 elif receipt == 4:
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_bias in ['no', 'bias']
-                    cond &= pipeline.F_squant == 'f'
-                    cond &= mode == 'batch'
-                    cond &= pipeline.F_skip == 'f'
-                    cond &= pipeline.F_logits == 'f'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_bias in ["no", "bias"]
+                    cond &= pipeline.F_squant == "f"
+                    cond &= mode == "batch"
+                    cond &= pipeline.F_skip == "f"
+                    cond &= pipeline.F_logits == "f"
                     if not cond:
                         continue
                 # Aiter(mha_fwd) integration
                 elif receipt == 100:
-                    cond = dtype in ['fp16', 'bf16', 'fp8bf16']
-                    cond &= mode == 'batch'
-                    cond &= pipeline.F_vlayout == 'row'
-                    if dtype == 'fp8bf16':
+                    cond = dtype in ["fp16", "bf16", "fp8bf16"]
+                    cond &= mode == "batch"
+                    cond &= pipeline.F_vlayout == "row"
+                    if dtype == "fp8bf16":
                         cond &= hdim == 128
                     if not cond:
                         continue
                 # Aiter(mha_varlen_fwd) integration
                 elif receipt == 200:
-                    cond = dtype in ['fp16', 'bf16', 'fp8bf16']
-                    cond &= mode == 'group'
-                    cond &= pipeline.F_vlayout == 'row'
-                    if dtype == 'fp8bf16':
+                    cond = dtype in ["fp16", "bf16", "fp8bf16"]
+                    cond &= mode == "group"
+                    cond &= pipeline.F_vlayout == "row"
+                    if dtype == "fp8bf16":
                         cond &= hdim == 128
                     if not cond:
                         continue
                 # aiter::mha_fwd C++ api integration
                 elif receipt == 600:
-                    cond = dtype in ['fp16', 'bf16', 'fp8bf16']
-                    cond &= pipeline.F_vlayout == 'row'
-                    if dtype == 'fp8bf16':
+                    cond = dtype in ["fp16", "bf16", "fp8bf16"]
+                    cond &= pipeline.F_vlayout == "row"
+                    if dtype == "fp8bf16":
                         cond &= hdim == 128
                     if not cond:
                         continue
                 elif receipt == 888:
-                    cond = dtype in ['fp8', 'fp8bf16', 'fp8fp32']
-                    cond &= pipeline.F_vlayout == 'row'
+                    cond = dtype in ["fp8", "fp8bf16", "fp8fp32"]
+                    cond &= pipeline.F_vlayout == "row"
                     cond &= hdim == 128
                     if not cond:
                         continue
 
                 # fp32 only, all variations
                 if receipt == 800:
-                    cond = dtype == 'fp32'
-                    cond &= pipeline.F_skip == 'f'
-                    cond &= pipeline.F_logits == 'f'
+                    cond = dtype == "fp32"
+                    cond &= pipeline.F_skip == "f"
+                    cond &= pipeline.F_logits == "f"
                     if not cond:
                         continue
                 # fp32 only, minimal set of parameters
                 elif receipt == 801:
-                    cond = dtype == 'fp32'
+                    cond = dtype == "fp32"
                     cond &= hdim in [48, 128]
-                    cond &= mode == 'batch'
-                    cond &= pipeline.F_bias == 'no'
-                    cond &= pipeline.F_lse == 'f'
-                    cond &= pipeline.F_dropout == 'f'
-                    cond &= pipeline.F_skip == 'f'
-                    cond &= pipeline.F_logits == 'f'
-                    cond &= pipeline.F_mask == 's_no'
+                    cond &= mode == "batch"
+                    cond &= pipeline.F_bias == "no"
+                    cond &= pipeline.F_lse == "f"
+                    cond &= pipeline.F_dropout == "f"
+                    cond &= pipeline.F_skip == "f"
+                    cond &= pipeline.F_logits == "f"
+                    cond &= pipeline.F_mask == "s_no"
                     if not cond:
                         continue
                 else:
                     # Don't build fp32 by default
-                    if dtype == 'fp32':
+                    if dtype == "fp32":
                         continue
 
                 api_pool.register_traits(k.api_trait())
@@ -763,20 +1726,28 @@ def get_fwd_blobs(kernel_filter : Optional[str], receipt, optdim_list, mask_impl
 
     return (api_pool, gen)
 
+
 def write_single_fwd_kernel(kernel: FmhaFwdKernel, autogen_dir: Path) -> None:
     update_file(autogen_dir / kernel.filename, kernel.template)
 
-def write_fwd_api(api_pool : FmhaFwdApiPool, autogen_dir: Path) -> None:
+
+def write_fwd_api(api_pool: FmhaFwdApiPool, autogen_dir: Path) -> None:
     update_file(autogen_dir / FMHA_FWD_API_FILENAME, api_pool.api)
 
-def write_blobs(output_dir : Path, kernel_filter : str, receipt, optdim_list, mask_impl) -> None:
+
+def write_blobs(
+    output_dir: Path, kernel_filter: str, receipt, optdim_list, mask_impl
+) -> None:
     api_pool, kernels = get_fwd_blobs(kernel_filter, receipt, optdim_list, mask_impl)
     for kernel in kernels:
         write_single_fwd_kernel(kernel, output_dir)
     write_fwd_api(api_pool, output_dir)
 
-def list_blobs(file_path : Path, kernel_filter : str, receipt, optdim_list, mask_impl) -> None:
-    with file_path.open('a') as f:
+
+def list_blobs(
+    file_path: Path, kernel_filter: str, receipt, optdim_list, mask_impl
+) -> None:
+    with file_path.open("a") as f:
         _, kernels = get_fwd_blobs(kernel_filter, receipt, optdim_list, mask_impl)
         for kernel in kernels:
             f.write(str(file_path.parent / GEN_DIR / kernel.filename) + "\n")
diff --git a/example/ck_tile/01_fmha/codegen/ops/fmha_fwd_appendkv.py b/example/ck_tile/01_fmha/codegen/ops/fmha_fwd_appendkv.py
index 38491b56c4..9e107062e1 100644
--- a/example/ck_tile/01_fmha/codegen/ops/fmha_fwd_appendkv.py
+++ b/example/ck_tile/01_fmha/codegen/ops/fmha_fwd_appendkv.py
@@ -5,23 +5,27 @@
 import copy
 from dataclasses import dataclass
 import fnmatch
-import itertools
 from pathlib import Path
 from typing import List, Optional, Tuple
 
-from codegen.cmake_config import *
-from codegen.cpp_symbol_map import *
+from codegen.cmake_config import GEN_DIR
+from codegen.cpp_symbol_map import (
+    FWD_DTYPE_MAP,
+    BOOL_MAP,
+    ROPE_MAP,
+    LAYOUT_MAP,
+    ROPE_CHECK_MAP,
+)
 
 from codegen.ops.fmha_fwd import (
     FmhaFwdApiTrait,
-    DTYPE_BITS,
     FMHA_FWD_KERNEL_HEADER,
     FMHA_FWD_API_PER_DTYPE,
     FMHA_FWD_API_PER_HDIM_CASE,
 )
 
 
-FMHA_FWD_APPENDKV_KERNEL_BODY="""
+FMHA_FWD_APPENDKV_KERNEL_BODY = """
 using fmha_dtype_{F_idx} = {F_dtype};
 
 using fmha_trait_{F_idx} = ck_tile::TileFmhaFwdAppendKVTraits<{F_spad},
@@ -66,8 +70,8 @@ float fmha_fwd_appendkv_<trait_{F_idx}>(const ck_tile::stream_config& s, fmha_fw
 }}
 """
 
-FMHA_FWD_APPENDKV_API_FILENAME="fmha_fwd_appendkv_api.cpp"
-FMHA_FWD_APPENDKV_API="""
+FMHA_FWD_APPENDKV_API_FILENAME = "fmha_fwd_appendkv_api.cpp"
+FMHA_FWD_APPENDKV_API = """
 float fmha_fwd_appendkv(fmha_fwd_appendkv_traits t, fmha_fwd_appendkv_args a, const ck_tile::stream_config& s){{
     float r = -1;
 {F_dispatch}
@@ -75,7 +79,7 @@ float fmha_fwd_appendkv(fmha_fwd_appendkv_traits t, fmha_fwd_appendkv_args a, co
 }}
 """
 
-FMHA_FWD_APPENDKV_API_INNER_DISPATCH="""            {F_if}((t.is_v_rowmajor == {F_vlayout}) &&
+FMHA_FWD_APPENDKV_API_INNER_DISPATCH = """            {F_if}((t.is_v_rowmajor == {F_vlayout}) &&
                         ({F_scheck}) && ({F_skcheck}) && ({F_dcheck}) && ({F_dvcheck}) && (t.rope_type == {F_rope_check}) &&
                         ((a.block_table_ptr != nullptr) == {F_pagedkv})) {{
                 using trait_ = fmha_fwd_appendkv_traits_<{F_hdim}, {F_dtype}, {F_bs}, {F_bsk}, {F_bd}, {F_bdv}, {F_vlayout}, {F_spad}, {F_skpad}, {F_dpad}, {F_dvpad}, {F_rope}, {F_pagedkv}>;
@@ -83,81 +87,101 @@ FMHA_FWD_APPENDKV_API_INNER_DISPATCH="""            {F_if}((t.is_v_rowmajor == {
             }}
 """
 
+
 @dataclass
 class FmhaFwdAppendKVApiTrait:
     # sync with fmha_fwd_traits<>, to generate fallback calls
-    hdim      : str
-    dtype     : str  # data type
-    bs        : int  # tile size along q seqlen
-    bsk       : int  # tile size along k seqlen
-    bd        : int  # tile size along qk gemm unroll
-    bdv       : int  # tile size along kv gemm unroll
-    vlayout   : str
-    spad      : str
-    skpad     : str
-    dpad      : str
-    dvpad     : str
-    rope      : str # key from ROPE_MAP
-    pagedkv   : str
+    hdim: str
+    dtype: str  # data type
+    bs: int  # tile size along q seqlen
+    bsk: int  # tile size along k seqlen
+    bd: int  # tile size along qk gemm unroll
+    bdv: int  # tile size along kv gemm unroll
+    vlayout: str
+    spad: str
+    skpad: str
+    dpad: str
+    dvpad: str
+    rope: str  # key from ROPE_MAP
+    pagedkv: str
 
     @property
     def name(self) -> str:
-        return f'{self.hdim}-{self.dtype}-{self.bs}-{self.bsk}-{self.bd}-{self.bdv}-{self.vlayout}-'+\
-               f'{self.spad}-{self.skpad}-{self.dpad}-{self.dvpad}-{self.rope}-{self.pagedkv}'
+        return (
+            f"{self.hdim}-{self.dtype}-{self.bs}-{self.bsk}-{self.bd}-{self.bdv}-{self.vlayout}-"
+            + f"{self.spad}-{self.skpad}-{self.dpad}-{self.dvpad}-{self.rope}-{self.pagedkv}"
+        )
 
     @property
     def scheck(self) -> str:
-        if self.spad == 't' : return f'true /*a.seqlen_q % {self.bs} != 0*/'
-        else :                return f'a.seqlen_q % {self.bs} == 0'
+        if self.spad == "t":
+            return f"true /*a.seqlen_q % {self.bs} != 0*/"
+        else:
+            return f"a.seqlen_q % {self.bs} == 0"
 
     @property
     def skcheck(self) -> str:
         # we do not check all the values in a.seqlen_k_ptr
-        return 'true'
+        return "true"
 
     @property
     def dcheck(self) -> str:
-        if self.dpad == 't': return f'true /*a.hdim_q % {self.bd} != 0*/' # TODO: order of get_pipelines() matters! (ugly)
-        else :               return f'a.hdim_q % {self.bd} == 0'
+        if self.dpad == "t":
+            return f"true /*a.hdim_q % {self.bd} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+        else:
+            return f"a.hdim_q % {self.bd} == 0"
 
     @property
     def dvcheck(self) -> str:
-        if self.dvpad == 't': return f'true /*a.hdim_v % {self.bdv} != 0*/' # TODO: order of get_pipelines() matters! (ugly)
-        else :                return f'a.hdim_v % {self.bdv} == 0'
+        if self.dvpad == "t":
+            return f"true /*a.hdim_v % {self.bdv} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+        else:
+            return f"a.hdim_v % {self.bdv} == 0"
+
 
 @dataclass
 class FmhaFwdAppendKVPipeline:
-    F_vlayout   : str  # row/col
-    F_spad      : str  # true/false
-    F_skpad     : str  #
-    F_dpad      : str  #
-    F_dvpad     : str  #
-    F_rope      : str  # key from ROPE_MAP
-    F_pagedkv   : str  # t/f
+    F_vlayout: str  # row/col
+    F_spad: str  # true/false
+    F_skpad: str  #
+    F_dpad: str  #
+    F_dvpad: str  #
+    F_rope: str  # key from ROPE_MAP
+    F_pagedkv: str  # t/f
 
     @property
     def name(self) -> str:
         def pad_name() -> str:
-            n = ''
-            if self.F_spad == 't': n += 's'
-            if self.F_skpad == 't' : n += 'sk'
-            if self.F_dpad == 't' : n += 'd'
-            if self.F_dvpad == 't' : n += 'dv'
-            if n != '' : n = 'p' + n
+            n = ""
+            if self.F_spad == "t":
+                n += "s"
+            if self.F_skpad == "t":
+                n += "sk"
+            if self.F_dpad == "t":
+                n += "d"
+            if self.F_dvpad == "t":
+                n += "dv"
+            if n != "":
+                n = "p" + n
             return n
+
         pn = pad_name()
-        n = f'v{self.F_vlayout[0]}'
-        if pn != '' : n += f'_{pn}'
-        if self.F_rope != 'no': n += f'_{self.F_rope}'
-        if self.F_pagedkv == 't': n += '_pagedkv'
+        n = f"v{self.F_vlayout[0]}"
+        if pn != "":
+            n += f"_{pn}"
+        if self.F_rope != "no":
+            n += f"_{self.F_rope}"
+        if self.F_pagedkv == "t":
+            n += "_pagedkv"
         return n
 
+
 class FmhaFwdAppendKVApiPool:
     def __init__(self, mask_impl):
         self.pool = dict()
         self.mask_impl = mask_impl
 
-    def register_traits(self, trait : FmhaFwdApiTrait) -> None:
+    def register_traits(self, trait: FmhaFwdApiTrait) -> None:
         # TODO: do we need to check duplication?
         if trait.dtype not in self.pool.keys():
             self.pool[trait.dtype] = dict()
@@ -168,74 +192,104 @@ class FmhaFwdAppendKVApiPool:
 
     @property
     def api(self) -> str:
-        per_dtypes=str()
+        per_dtypes = str()
         for i, dtype in enumerate(self.pool.keys()):
-            per_hdim_case=str()
+            per_hdim_case = str()
             for j, hdim in enumerate(self.pool[dtype].keys()):
-                traits=self.pool[dtype][hdim]
-                inners=str()
+                traits = self.pool[dtype][hdim]
+                inners = str()
                 for k, trait in enumerate(traits):
-                    if_k = 'if' if k == 0 else 'else if'
-                    inners = inners + FMHA_FWD_APPENDKV_API_INNER_DISPATCH.format(F_if=if_k, F_vlayout=LAYOUT_MAP[trait.vlayout],
-                                   F_scheck=trait.scheck, F_skcheck=trait.skcheck, F_dcheck=trait.dcheck, F_dvcheck=trait.dvcheck, F_rope_check=ROPE_CHECK_MAP[trait.rope],
-                                   F_pagedkv=BOOL_MAP[trait.pagedkv], F_spad=BOOL_MAP[trait.spad], F_skpad=BOOL_MAP[trait.skpad], F_dpad=BOOL_MAP[trait.dpad], F_dvpad=BOOL_MAP[trait.dvpad],
-                                   F_rope=ROPE_MAP[trait.rope], F_bs=trait.bs, F_bsk=trait.bsk, F_bd=trait.bd, F_bdv=trait.bdv, F_hdim=hdim, F_dtype=FWD_DTYPE_MAP[dtype])
-                if_j = 'if' if j == 0 else 'else if'
-                per_hdim_case = per_hdim_case + FMHA_FWD_API_PER_HDIM_CASE.format(F_if=if_j, F_hdim=hdim, F_hdim_v=hdim, F_inner_dispatch=inners)
-            if_i = 'if' if i == 0 else 'else if'
-            per_dtypes = per_dtypes + FMHA_FWD_API_PER_DTYPE.format(F_if=if_i, F_dtype=dtype, F_hdim_case=per_hdim_case)
+                    if_k = "if" if k == 0 else "else if"
+                    inners = inners + FMHA_FWD_APPENDKV_API_INNER_DISPATCH.format(
+                        F_if=if_k,
+                        F_vlayout=LAYOUT_MAP[trait.vlayout],
+                        F_scheck=trait.scheck,
+                        F_skcheck=trait.skcheck,
+                        F_dcheck=trait.dcheck,
+                        F_dvcheck=trait.dvcheck,
+                        F_rope_check=ROPE_CHECK_MAP[trait.rope],
+                        F_pagedkv=BOOL_MAP[trait.pagedkv],
+                        F_spad=BOOL_MAP[trait.spad],
+                        F_skpad=BOOL_MAP[trait.skpad],
+                        F_dpad=BOOL_MAP[trait.dpad],
+                        F_dvpad=BOOL_MAP[trait.dvpad],
+                        F_rope=ROPE_MAP[trait.rope],
+                        F_bs=trait.bs,
+                        F_bsk=trait.bsk,
+                        F_bd=trait.bd,
+                        F_bdv=trait.bdv,
+                        F_hdim=hdim,
+                        F_dtype=FWD_DTYPE_MAP[dtype],
+                    )
+                if_j = "if" if j == 0 else "else if"
+                per_hdim_case = per_hdim_case + FMHA_FWD_API_PER_HDIM_CASE.format(
+                    F_if=if_j, F_hdim=hdim, F_hdim_v=hdim, F_inner_dispatch=inners
+                )
+            if_i = "if" if i == 0 else "else if"
+            per_dtypes = per_dtypes + FMHA_FWD_API_PER_DTYPE.format(
+                F_if=if_i, F_dtype=dtype, F_hdim_case=per_hdim_case
+            )
         if not per_dtypes:
             # empty string we add some ignore to suppress warning in api
-            per_dtypes += '    (void)t ; (void)s ; (void)a;'
-        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_APPENDKV_API.format(F_dispatch = per_dtypes)
+            per_dtypes += "    (void)t ; (void)s ; (void)a;"
+        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_APPENDKV_API.format(
+            F_dispatch=per_dtypes
+        )
+
 
 @dataclass
 class FmhaFwdAppendKVTileSize:
-    F_bs        : int  # tile size along q seqlen
-    F_bsk       : int  # tile size along k seqlen
-    F_bd        : int  # tile size along qk gemm unroll
-    F_bdv       : int  # tile size along kv gemm unroll
-    F_occupancy : int  # occupancy, -1 will let pipeline decide the occupancy, other value will overwrite occupancy
+    F_bs: int  # tile size along q seqlen
+    F_bsk: int  # tile size along k seqlen
+    F_bd: int  # tile size along qk gemm unroll
+    F_bdv: int  # tile size along kv gemm unroll
+    F_occupancy: int  # occupancy, -1 will let pipeline decide the occupancy, other value will overwrite occupancy
+
     @property
     def name(self) -> str:
-        return f"b{self.F_bs}x{self.F_bsk}x{self.F_bd}x{self.F_bdv}" +\
-            ("" if self.F_occupancy == -1 else f"_o{self.F_occupancy}")
+        return f"b{self.F_bs}x{self.F_bsk}x{self.F_bd}x{self.F_bdv}" + (
+            "" if self.F_occupancy == -1 else f"_o{self.F_occupancy}"
+        )
+
 
 @dataclass
 class FmhaFwdAppendKVKernel:
-    F_idx           : int  # this is not a tunable, but a counter to differentiate symbol
-    F_hdim          : int  # hdim
-    F_dtype         : str  # data type
-    F_tile          : FmhaFwdAppendKVTileSize
-    F_pipeline      : FmhaFwdAppendKVPipeline
-    mask_impl       : str
+    F_idx: int  # this is not a tunable, but a counter to differentiate symbol
+    F_hdim: int  # hdim
+    F_dtype: str  # data type
+    F_tile: FmhaFwdAppendKVTileSize
+    F_pipeline: FmhaFwdAppendKVPipeline
+    mask_impl: str
 
     @property
     def template(self) -> str:
-        kernel_body = str()
-        return FMHA_FWD_KERNEL_HEADER + \
-            FMHA_FWD_APPENDKV_KERNEL_BODY.format(
-                F_idx           = self.F_idx,
-                F_hdim          = self.F_hdim,
-                F_dtype         = FWD_DTYPE_MAP[self.F_dtype],
-                F_bs            = self.F_tile.F_bs,
-                F_bsk           = self.F_tile.F_bsk,
-                F_bd            = self.F_tile.F_bd,
-                F_bdv           = self.F_tile.F_bdv,
-                F_vlayout       = LAYOUT_MAP[self.F_pipeline.F_vlayout],
-                F_spad          = BOOL_MAP[self.F_pipeline.F_spad],
-                F_skpad         = BOOL_MAP[self.F_pipeline.F_skpad],
-                F_dpad          = BOOL_MAP[self.F_pipeline.F_dpad],
-                F_dvpad         = BOOL_MAP[self.F_pipeline.F_dvpad],
-                F_rope          = ROPE_MAP[self.F_pipeline.F_rope],
-                F_pagedkv       = BOOL_MAP[self.F_pipeline.F_pagedkv],
-                F_occupancy     = self.F_tile.F_occupancy)
+        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_APPENDKV_KERNEL_BODY.format(
+            F_idx=self.F_idx,
+            F_hdim=self.F_hdim,
+            F_dtype=FWD_DTYPE_MAP[self.F_dtype],
+            F_bs=self.F_tile.F_bs,
+            F_bsk=self.F_tile.F_bsk,
+            F_bd=self.F_tile.F_bd,
+            F_bdv=self.F_tile.F_bdv,
+            F_vlayout=LAYOUT_MAP[self.F_pipeline.F_vlayout],
+            F_spad=BOOL_MAP[self.F_pipeline.F_spad],
+            F_skpad=BOOL_MAP[self.F_pipeline.F_skpad],
+            F_dpad=BOOL_MAP[self.F_pipeline.F_dpad],
+            F_dvpad=BOOL_MAP[self.F_pipeline.F_dvpad],
+            F_rope=ROPE_MAP[self.F_pipeline.F_rope],
+            F_pagedkv=BOOL_MAP[self.F_pipeline.F_pagedkv],
+            F_occupancy=self.F_tile.F_occupancy,
+        )
 
     @property
     def name(self) -> str:
         # TODO: we don't encode idx here
-        return f"fmha_fwd_appendkv_d{self.F_hdim}_{self.F_dtype}_" + \
-                self.F_tile.name + '_' + self.F_pipeline.name
+        return (
+            f"fmha_fwd_appendkv_d{self.F_hdim}_{self.F_dtype}_"
+            + self.F_tile.name
+            + "_"
+            + self.F_pipeline.name
+        )
 
     @property
     def filename(self) -> str:
@@ -243,40 +297,45 @@ class FmhaFwdAppendKVKernel:
 
     def api_trait(self) -> FmhaFwdAppendKVApiTrait:
         return FmhaFwdAppendKVApiTrait(
-                hdim=str(self.F_hdim),
-                dtype=self.F_dtype,
-                bs=self.F_tile.F_bs,
-                bsk=self.F_tile.F_bsk,
-                bd=self.F_tile.F_bd,
-                bdv=self.F_tile.F_bdv,
-                vlayout=self.F_pipeline.F_vlayout,
-                spad=self.F_pipeline.F_spad,
-                skpad=self.F_pipeline.F_skpad,
-                dpad=self.F_pipeline.F_dpad,
-                dvpad=self.F_pipeline.F_dvpad,
-                rope=self.F_pipeline.F_rope,
-                pagedkv=self.F_pipeline.F_pagedkv)
+            hdim=str(self.F_hdim),
+            dtype=self.F_dtype,
+            bs=self.F_tile.F_bs,
+            bsk=self.F_tile.F_bsk,
+            bd=self.F_tile.F_bd,
+            bdv=self.F_tile.F_bdv,
+            vlayout=self.F_pipeline.F_vlayout,
+            spad=self.F_pipeline.F_spad,
+            skpad=self.F_pipeline.F_skpad,
+            dpad=self.F_pipeline.F_dpad,
+            dvpad=self.F_pipeline.F_dvpad,
+            rope=self.F_pipeline.F_rope,
+            pagedkv=self.F_pipeline.F_pagedkv,
+        )
+
 
 # TODO: design a more practical way to do it
 # this is current supported tile size per hdim
-def get_fmha_fwd_appendkv_tile_dict_from_dtype(dtype : str) -> Optional[dict]:
-    if dtype == 'fp16' or dtype == 'bf16':
+def get_fmha_fwd_appendkv_tile_dict_from_dtype(dtype: str) -> Optional[dict]:
+    if dtype == "fp16" or dtype == "bf16":
         return {
-            '32'  : FmhaFwdAppendKVTileSize(64, 64,  32,  32, -1),
-            '64'  : FmhaFwdAppendKVTileSize(64, 64,  64,  64, -1),
-            '128' : FmhaFwdAppendKVTileSize(64, 64, 128, 128, -1),
-            '256' : FmhaFwdAppendKVTileSize(64, 64, 256, 256, -1),
+            "32": FmhaFwdAppendKVTileSize(64, 64, 32, 32, -1),
+            "64": FmhaFwdAppendKVTileSize(64, 64, 64, 64, -1),
+            "128": FmhaFwdAppendKVTileSize(64, 64, 128, 128, -1),
+            "256": FmhaFwdAppendKVTileSize(64, 64, 256, 256, -1),
         }
-    elif dtype == 'fp8' or dtype == 'bf8':
+    elif dtype == "fp8" or dtype == "bf8":
         return {
-            '64'  : FmhaFwdAppendKVTileSize(64, 64,  64,  64, -1),
-            '128' : FmhaFwdAppendKVTileSize(64, 64, 128, 128, -1),
-            '256' : FmhaFwdAppendKVTileSize(64, 64, 256, 256, -1)
+            "64": FmhaFwdAppendKVTileSize(64, 64, 64, 64, -1),
+            "128": FmhaFwdAppendKVTileSize(64, 64, 128, 128, -1),
+            "256": FmhaFwdAppendKVTileSize(64, 64, 256, 256, -1),
         }
     else:
         return None
 
-def get_fwd_appendkv_blobs(kernel_filter : Optional[str], receipt, mask_impl, optdim_list) -> Tuple[FmhaFwdAppendKVApiPool, List[FmhaFwdAppendKVKernel]]:
+
+def get_fwd_appendkv_blobs(
+    kernel_filter: Optional[str], receipt, mask_impl, optdim_list
+) -> Tuple[FmhaFwdAppendKVApiPool, List[FmhaFwdAppendKVKernel]]:
     # TODO: we don't support tuning yet, so pick up one value for vlayout/pipeline/pad
     #       support this in future
     def get_pipelines(dtype, hdim) -> List[FmhaFwdAppendKVPipeline]:
@@ -284,25 +343,50 @@ def get_fwd_appendkv_blobs(kernel_filter : Optional[str], receipt, mask_impl, op
         # TODO: the order of List matters! the later in this list will be also be checked later
         # TODO: currently for qr pipeline, let 't' padding to appear later!!
         # TODO: how to design this more generic?
-        squant = 't' if dtype == 'fp8' else 'f'
         pipelines = []
-        if dtype in ['fp16', 'bf16']:
+        if dtype in ["fp16", "bf16"]:
             # NOTICE: it will be very complicated if we consider all the hdim_q padding cases while
             #         applying rotary embedding, so I just use 't' in inter/half pipelines
-            for vlayout in ['row', 'col']:
+            for vlayout in ["row", "col"]:
                 for pagedkv in ["t", "f"]:
-                    pipelines.append(FmhaFwdAppendKVPipeline(vlayout, 'f', 't', 'f', 'f', 'no', pagedkv))
-                    pipelines.append(FmhaFwdAppendKVPipeline(vlayout, 't', 't', 't', 't', 'no', pagedkv))
+                    pipelines.append(
+                        FmhaFwdAppendKVPipeline(
+                            vlayout, "f", "t", "f", "f", "no", pagedkv
+                        )
+                    )
+                    pipelines.append(
+                        FmhaFwdAppendKVPipeline(
+                            vlayout, "t", "t", "t", "t", "no", pagedkv
+                        )
+                    )
 
-                    pipelines.append(FmhaFwdAppendKVPipeline(vlayout, 'f', 't', 't', 'f', 'inter', pagedkv))
-                    pipelines.append(FmhaFwdAppendKVPipeline(vlayout, 't', 't', 't', 't', 'inter', pagedkv))
+                    pipelines.append(
+                        FmhaFwdAppendKVPipeline(
+                            vlayout, "f", "t", "t", "f", "inter", pagedkv
+                        )
+                    )
+                    pipelines.append(
+                        FmhaFwdAppendKVPipeline(
+                            vlayout, "t", "t", "t", "t", "inter", pagedkv
+                        )
+                    )
 
-                    pipelines.append(FmhaFwdAppendKVPipeline(vlayout, 'f', 't', 't', 'f', 'half', pagedkv))
-                    pipelines.append(FmhaFwdAppendKVPipeline(vlayout, 't', 't', 't', 't', 'half', pagedkv))
-        elif dtype in ['fp8', 'bf8']:
+                    pipelines.append(
+                        FmhaFwdAppendKVPipeline(
+                            vlayout, "f", "t", "t", "f", "half", pagedkv
+                        )
+                    )
+                    pipelines.append(
+                        FmhaFwdAppendKVPipeline(
+                            vlayout, "t", "t", "t", "t", "half", pagedkv
+                        )
+                    )
+        elif dtype in ["fp8", "bf8"]:
             # rope/paged-kv is not supported
-            pipelines.append(FmhaFwdAppendKVPipeline('col', 't', 't', 't', 't', 'no', 'f'))
-        elif dtype in ['fp8fp16', 'fp8bf16']:
+            pipelines.append(
+                FmhaFwdAppendKVPipeline("col", "t", "t", "t", "t", "no", "f")
+            )
+        elif dtype in ["fp8fp16", "fp8bf16"]:
             # TODO
             None
         else:
@@ -314,19 +398,21 @@ def get_fwd_appendkv_blobs(kernel_filter : Optional[str], receipt, mask_impl, op
 
     for dtype in FWD_DTYPE_MAP.keys():
         d = get_fmha_fwd_appendkv_tile_dict_from_dtype(dtype)
-        if d == None:
+        if d is None:
             continue
         for hdim_str in d.keys():
             tile = d[hdim_str]
             hdim = int(hdim_str)
             for pipeline in get_pipelines(dtype, hdim):
-                k = FmhaFwdAppendKVKernel(F_idx=0,
-                                  F_hdim=hdim,
-                                  F_dtype=dtype,
-                                  F_tile=tile,
-                                  F_pipeline=pipeline,
-                                  mask_impl=mask_impl)
-                if kernel_filter != '':
+                k = FmhaFwdAppendKVKernel(
+                    F_idx=0,
+                    F_hdim=hdim,
+                    F_dtype=dtype,
+                    F_tile=tile,
+                    F_pipeline=pipeline,
+                    mask_impl=mask_impl,
+                )
+                if kernel_filter != "":
                     if not fnmatch.fnmatch(k.name, kernel_filter):
                         continue
                 if optdim_list != [-1]:
@@ -334,20 +420,20 @@ def get_fwd_appendkv_blobs(kernel_filter : Optional[str], receipt, mask_impl, op
                         continue
                 # 2 - Flash attention integration
                 if receipt == 2:
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= pipeline.F_vlayout == 'row'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= pipeline.F_vlayout == "row"
                     if not cond:
                         continue
                 # PyTorch integration
                 elif receipt == 4:
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= pipeline.F_vlayout == 'row'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= pipeline.F_vlayout == "row"
                     if not cond:
                         continue
 
                 # fp32 only
                 if receipt == 800 or receipt == 801:
-                    cond = dtype == 'fp32'
+                    cond = dtype == "fp32"
                     if not cond:
                         continue
 
@@ -356,21 +442,33 @@ def get_fwd_appendkv_blobs(kernel_filter : Optional[str], receipt, mask_impl, op
 
     return (api_pool, gen)
 
+
 def write_single_kernel(kernel: FmhaFwdAppendKVKernel, autogen_dir: Path) -> None:
     (autogen_dir / kernel.filename).write_text(kernel.template)
 
-def write_fwd_appendkv_api(api_pool : FmhaFwdAppendKVApiPool, autogen_dir: Path) -> None:
+
+def write_fwd_appendkv_api(api_pool: FmhaFwdAppendKVApiPool, autogen_dir: Path) -> None:
     (autogen_dir / FMHA_FWD_APPENDKV_API_FILENAME).write_text(api_pool.api)
 
-def write_blobs(output_dir : Path, kernel_filter : Optional[str], receipt, optdim_list, mask_impl) -> None:
-    api_pool, kernels = get_fwd_appendkv_blobs(kernel_filter, receipt, mask_impl, optdim_list)
+
+def write_blobs(
+    output_dir: Path, kernel_filter: Optional[str], receipt, optdim_list, mask_impl
+) -> None:
+    api_pool, kernels = get_fwd_appendkv_blobs(
+        kernel_filter, receipt, mask_impl, optdim_list
+    )
     for kernel in kernels:
         write_single_kernel(kernel, output_dir)
     write_fwd_appendkv_api(api_pool, output_dir)
 
-def list_blobs(file_path : Path, kernel_filter : Optional[str], receipt, optdim_list, mask_impl) -> None:
-    with file_path.open('a') as f:
-        _, kernels = get_fwd_appendkv_blobs(kernel_filter, receipt, mask_impl, optdim_list)
+
+def list_blobs(
+    file_path: Path, kernel_filter: Optional[str], receipt, optdim_list, mask_impl
+) -> None:
+    with file_path.open("a") as f:
+        _, kernels = get_fwd_appendkv_blobs(
+            kernel_filter, receipt, mask_impl, optdim_list
+        )
         for kernel in kernels:
             f.write(str(file_path.parent / GEN_DIR / kernel.filename) + "\n")
         f.write(str(file_path.parent / GEN_DIR / FMHA_FWD_APPENDKV_API_FILENAME) + "\n")
diff --git a/example/ck_tile/01_fmha/codegen/ops/fmha_fwd_splitkv.py b/example/ck_tile/01_fmha/codegen/ops/fmha_fwd_splitkv.py
index 281357ef1e..9a77bc8e94 100644
--- a/example/ck_tile/01_fmha/codegen/ops/fmha_fwd_splitkv.py
+++ b/example/ck_tile/01_fmha/codegen/ops/fmha_fwd_splitkv.py
@@ -9,41 +9,44 @@ import itertools
 from pathlib import Path
 from typing import List, Optional, Tuple, Union
 
-from codegen.cmake_config import *
-from codegen.cpp_symbol_map import *
+from codegen.cmake_config import GEN_DIR
+from codegen.cpp_symbol_map import (
+    PIPELINE_ENUM_MAP,
+    get_mask_check_map,
+    LAYOUT_MAP,
+    BIAS_CHECK_MAP,
+    MODE_MAP,
+    FWD_DTYPE_MAP,
+    BIAS_MAP,
+    get_mask_map,
+    BOOL_MAP,
+)
 
 from codegen.ops.fmha_fwd import (
     FmhaFwdTileSize,
-    FmhaFwdApiTrait,
     FMHA_FWD_KERNEL_HEADER,
     FMHA_FWD_API_PER_DTYPE,
     FMHA_FWD_API_PER_HDIM_CASE,
 )
 
 
-DTYPE_BITS = {
-    "fp32": 32,
-    "fp16": 16,
-    "bf16": 16,
-    "fp8" : 8,
-    "bf8" : 8
-}
+DTYPE_BITS = {"fp32": 32, "fp16": 16, "bf16": 16, "fp8": 8, "bf8": 8}
 
 K0_MAX_SUBMAX_MAP = {
-    32 : 32,
-    64 : 64,
-    96 : 128,
+    32: 32,
+    64: 64,
+    96: 128,
     128: 128,
     # 160: 160,
-    256: 256
+    256: 256,
 }
 
 FMHA_FWD_SPLITKV_PIPELINE_MAP = {
-    "qr" : "ck_tile::BlockFmhaFwdSplitKVPipelineQRKSVS",
-    "qr_nwarp_sshuffle" : "ck_tile::BlockFmhaFwdSplitKVPipelineNWarpSShuffleQRKSVS",
+    "qr": "ck_tile::BlockFmhaFwdSplitKVPipelineQRKSVS",
+    "qr_nwarp_sshuffle": "ck_tile::BlockFmhaFwdSplitKVPipelineNWarpSShuffleQRKSVS",
 }
 
-FMHA_FWD_SPLITKV_KERNEL_BODY="""
+FMHA_FWD_SPLITKV_KERNEL_BODY = """
 using fmha_dtype_{F_idx} = {F_dtype};
 using fmha_variant_{F_idx} = ck_tile::ComposedAttention<{F_logits} * ck_tile::LOGITS_SOFT_CAP, CK_TILE_FMHA_FWD_FAST_EXP2>;
 using fmha_mask_{F_idx} = {F_mask};
@@ -169,7 +172,7 @@ std::string fmha_fwd_splitkv_get_name_<trait_{F_idx}>()
 }}
 """
 
-FMHA_FWD_SPLITKV_COMBINE_KERNEL_BODY="""
+FMHA_FWD_SPLITKV_COMBINE_KERNEL_BODY = """
 using fmha_dtype_{F_idx} = {F_dtype};
 
 namespace {{
@@ -244,8 +247,8 @@ std::string fmha_fwd_splitkv_combine_get_name_<trait_{F_idx}>()
 }}
 """
 
-FMHA_FWD_SPLITKV_API_FILENAME="fmha_fwd_splitkv_api.cpp"
-FMHA_FWD_SPLITKV_API="""
+FMHA_FWD_SPLITKV_API_FILENAME = "fmha_fwd_splitkv_api.cpp"
+FMHA_FWD_SPLITKV_API = """
 #include <iostream>
 
 template<typename fmha_fwd_splitkv_traits_, typename fmha_fwd_splitkv_combine_traits_>
@@ -270,7 +273,7 @@ float fmha_fwd_splitkv(fmha_fwd_splitkv_traits t, fmha_fwd_splitkv_args a, const
 }}
 """
 
-FMHA_FWD_SPLITKV_API_INNER_DISPATCH="""            {F_if}((t.is_group_mode == {F_mode}) && (t.is_v_rowmajor == {F_vlayout}) && (t.has_logits_soft_cap == {F_logits}) && ({F_mask_check}) && (t.bias_type == {F_bias_check}) && (t.do_fp8_static_quant == {F_squant}) &&
+FMHA_FWD_SPLITKV_API_INNER_DISPATCH = """            {F_if}((t.is_group_mode == {F_mode}) && (t.is_v_rowmajor == {F_vlayout}) && (t.has_logits_soft_cap == {F_logits}) && ({F_mask_check}) && (t.bias_type == {F_bias_check}) && (t.do_fp8_static_quant == {F_squant}) &&
                         ((a.block_table_ptr != nullptr) == {F_pagedkv}) && ({F_scheck}) && ({F_skcheck}) && ({F_dcheck}) && ({F_dvcheck})) {{
                 using traits_ = fmha_fwd_splitkv_traits_<{F_hdim}, {F_dtype}, {F_mode}, {F_bm0}, {F_bn0}, {F_bk0}, {F_bn1}, {F_bk1}, {F_bk0max}, {F_vlayout}, {F_pipeline_enum}, {F_logits}, {F_mask}, {F_bias}, true, {F_squant}, {F_pagedkv}, {F_spad}, {F_skpad}, {F_dpad}, {F_dvpad}>;
 
@@ -298,172 +301,232 @@ FMHA_FWD_SPLITKV_API_INNER_DISPATCH="""            {F_if}((t.is_group_mode == {F
             }}
 """
 
+
 @dataclass
 class FmhaFwdSplitKVApiTrait:
-    pipeline_tag : str
+    pipeline_tag: str
     # sync with fmha_fwd_traits<>, to generate fallback calls
-    hdim      : str
-    dtype     : str  # data type
-    mode      : str  # value from MODE_MAP
-    bm0       : int  # tile size along q seqlen (block size)
-    bn0       : int  # tile size along qk seqlen
-    bk0       : int  # tile size along qk gemm unroll
-    bn1       : int  # tile size along v head_dim
-    bk1       : int  # tile size along kv gemm unroll
-    bk0max    : int
-    vlayout   : str
-    mask      : str
-    logits    : str
-    bias      : str  #
-    lse       : str  #
-    squant    : str  #
-    spad      : str
-    skpad     : str
-    dpad      : str
-    dvpad     : str
-    pagedkv   : str
+    hdim: str
+    dtype: str  # data type
+    mode: str  # value from MODE_MAP
+    bm0: int  # tile size along q seqlen (block size)
+    bn0: int  # tile size along qk seqlen
+    bk0: int  # tile size along qk gemm unroll
+    bn1: int  # tile size along v head_dim
+    bk1: int  # tile size along kv gemm unroll
+    bk0max: int
+    vlayout: str
+    mask: str
+    logits: str
+    bias: str  #
+    lse: str  #
+    squant: str  #
+    spad: str
+    skpad: str
+    dpad: str
+    dvpad: str
+    pagedkv: str
 
     @property
     def name(self) -> str:
-        return f'{self.hdim}-{self.dtype}-{self.mode}-{self.bm0}-{self.bn0}-{self.bk0}-{self.bn0}-{self.bk1}-{self.bk0max}-'+\
-                    f'{self.vlayout}-{self.logits}-{self.mask}-{self.bias}-{self.lse}-{self.squant}-{self.spad}-{self.skpad}-{self.dpad}-'+\
-                    f'{self.dvpad}-{self.pagedkv}'
+        return (
+            f"{self.hdim}-{self.dtype}-{self.mode}-{self.bm0}-{self.bn0}-{self.bk0}-{self.bn0}-{self.bk1}-{self.bk0max}-"
+            + f"{self.vlayout}-{self.logits}-{self.mask}-{self.bias}-{self.lse}-{self.squant}-{self.spad}-{self.skpad}-{self.dpad}-"
+            + f"{self.dvpad}-{self.pagedkv}"
+        )
 
     @property
     def scheck(self) -> str:
-        if self.mode == 'group': return 'true/*group mode spad always true*/'                  # group mode only generate spad/skpad == true
-        if self.pipeline_tag == 'qr_async':
-            if self.spad == 't' : return 'true' # always support
-            else :                return 'true'
-        elif self.pipeline_tag in ['qr', 'qr_nwarp_sshuffle']:
-            if self.spad == 't' : return f'true /*a.seqlen_q % {self.bm0} != 0*/'  # TODO: order of get_pipelines() matters! (ugly)
-            else :                return f'a.seqlen_q % {self.bm0} == 0'
-        else: assert False
+        if self.mode == "group":
+            return "true/*group mode spad always true*/"  # group mode only generate spad/skpad == true
+        if self.pipeline_tag == "qr_async":
+            if self.spad == "t":
+                return "true"  # always support
+            else:
+                return "true"
+        elif self.pipeline_tag in ["qr", "qr_nwarp_sshuffle"]:
+            if self.spad == "t":
+                return f"true /*a.seqlen_q % {self.bm0} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.seqlen_q % {self.bm0} == 0"
+        else:
+            assert False
 
     @property
     def skcheck(self) -> str:
-        if self.mode == 'group': return 'true/*group mode skpad always true*/'                  # group mode only generate spad/skpad == true
-        if self.pipeline_tag == 'qr_async':
-            if self.skpad == 't' : return f'a.seqlen_k == 0 || a.seqlen_k % {self.bn0} != 0'
-            else :                 return f'a.seqlen_k != 0 && a.seqlen_k % {self.bn0} == 0'
-        elif self.pipeline_tag in ['qr', 'qr_nwarp_sshuffle']:
-            if self.skpad == 't' : return f'true /*a.seqlen_k_ptr != nullptr || a.seqlen_k % {self.bn0} != 0*/' # TODO: order of get_pipelines() matters! (ugly)
-            else :                return f'a.seqlen_k_ptr == nullptr && a.seqlen_k % {self.bn0} == 0'
-        else: assert False
+        if self.mode == "group":
+            return "true/*group mode skpad always true*/"  # group mode only generate spad/skpad == true
+        if self.pipeline_tag == "qr_async":
+            if self.skpad == "t":
+                return f"a.seqlen_k == 0 || a.seqlen_k % {self.bn0} != 0"
+            else:
+                return f"a.seqlen_k != 0 && a.seqlen_k % {self.bn0} == 0"
+        elif self.pipeline_tag in ["qr", "qr_nwarp_sshuffle"]:
+            if self.skpad == "t":
+                return f"true /*a.seqlen_k_ptr != nullptr || a.seqlen_k % {self.bn0} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.seqlen_k_ptr == nullptr && a.seqlen_k % {self.bn0} == 0"
+        else:
+            assert False
 
     @property
     def dcheck(self) -> str:
-        if self.pipeline_tag == 'qr_async':
+        if self.pipeline_tag == "qr_async":
             vec = int((32 * 4) / DTYPE_BITS[self.dtype])
-            if self.dpad == 't': return f'a.hdim_q % {vec} == 0'
-            else :               assert False
-        elif self.pipeline_tag in ['qr', 'qr_nwarp_sshuffle']:
+            if self.dpad == "t":
+                return f"a.hdim_q % {vec} == 0"
+            else:
+                assert False
+        elif self.pipeline_tag in ["qr", "qr_nwarp_sshuffle"]:
             bk0submax = K0_MAX_SUBMAX_MAP[self.bk0max]
-            if self.dpad == 't': return f'true /*a.hdim_q % {bk0submax} != 0*/' # TODO: order of get_pipelines() matters! (ugly)
-            else :               return f'a.hdim_q % {bk0submax} == 0'
-        else:   assert False
+            if self.dpad == "t":
+                return f"true /*a.hdim_q % {bk0submax} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.hdim_q % {bk0submax} == 0"
+        else:
+            assert False
 
     @property
     def dvcheck(self) -> str:
-        if self.pipeline_tag == 'qr_async':
+        if self.pipeline_tag == "qr_async":
             vec = int((32 * 4) / DTYPE_BITS[self.dtype])
-            if self.dvpad == 't': return f'a.hdim_v % {vec} == 0'
-            else :                assert False
-        elif self.pipeline_tag in ['qr', 'qr_nwarp_sshuffle']:
+            if self.dvpad == "t":
+                return f"a.hdim_v % {vec} == 0"
+            else:
+                assert False
+        elif self.pipeline_tag in ["qr", "qr_nwarp_sshuffle"]:
             bk0submax = K0_MAX_SUBMAX_MAP[self.bk0max]
-            if self.dvpad == 't': return f'true /*a.hdim_v % {bk0submax} != 0*/' # TODO: order of get_pipelines() matters! (ugly)
-            else :                return f'a.hdim_v % {bk0submax} == 0'
-        else:   assert False
+            if self.dvpad == "t":
+                return f"true /*a.hdim_v % {bk0submax} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.hdim_v % {bk0submax} == 0"
+        else:
+            assert False
+
 
 @dataclass
 class FmhaFwdSplitKVPipeline:
-    tag : str
+    tag: str
 
-    F_vlayout   : str  # row/col
-    F_spad      : str  # true/false
-    F_skpad     : str  #
-    F_dpad      : str  #
-    F_dvpad     : str  #
-    F_logits    : str  # t/f
-    F_bias      : str  # true/false
-    F_lse       : str  #
-    F_squant    : str  #
-    F_pagedkv   : str  # t/f
-    F_mask      : str  # value from MASK_MAP
+    F_vlayout: str  # row/col
+    F_spad: str  # true/false
+    F_skpad: str  #
+    F_dpad: str  #
+    F_dvpad: str  #
+    F_logits: str  # t/f
+    F_bias: str  # true/false
+    F_lse: str  #
+    F_squant: str  #
+    F_pagedkv: str  # t/f
+    F_mask: str  # value from MASK_MAP
 
     @property
     def name(self) -> str:
         def pad_name() -> str:
-            n = ''
-            if self.F_spad == 't': n += 's'
-            if self.F_skpad == 't' : n += 'sk'
-            if self.F_dpad == 't' : n += 'd'
-            if self.F_dvpad == 't' : n += 'dv'
-            if n != '' : n = 'p' + n
+            n = ""
+            if self.F_spad == "t":
+                n += "s"
+            if self.F_skpad == "t":
+                n += "sk"
+            if self.F_dpad == "t":
+                n += "d"
+            if self.F_dvpad == "t":
+                n += "dv"
+            if n != "":
+                n = "p" + n
             return n
+
         pn = pad_name()
-        n = f'{self.tag}_v{self.F_vlayout[0]}'
-        if pn != '' : n += f'_{pn}'
-        else: n += '_npad'
-
-        if self.F_logits == 't' : n += '_logits'
-        else: n += '_nlogits'
-
-        if self.F_bias != 'no' : n += f'_{self.F_bias}'
-        else: n += '_nbias'
-
-        if self.F_mask[0:2] == 's_':
-            if self.F_mask == 's_mask': n += f'_mask'
-            else: n += '_nmask'
+        n = f"{self.tag}_v{self.F_vlayout[0]}"
+        if pn != "":
+            n += f"_{pn}"
         else:
-            if self.F_mask != 'no' : n += f'_m{self.F_mask[0]}'
-            else: n += '_nmask'
+            n += "_npad"
 
-        if self.F_lse == 't' : n += '_lse'
-        else: n += '_nlse'
+        if self.F_logits == "t":
+            n += "_logits"
+        else:
+            n += "_nlogits"
 
-        if self.F_squant == 't' : n += '_squant'
-        else: n += '_nsquant'
+        if self.F_bias != "no":
+            n += f"_{self.F_bias}"
+        else:
+            n += "_nbias"
 
-        if self.F_pagedkv == 't' : n += '_pagedkv'
-        else: n += '_npagedkv'
+        if self.F_mask[0:2] == "s_":
+            if self.F_mask == "s_mask":
+                n += "_mask"
+            else:
+                n += "_nmask"
+        else:
+            if self.F_mask != "no":
+                n += f"_m{self.F_mask[0]}"
+            else:
+                n += "_nmask"
+
+        if self.F_lse == "t":
+            n += "_lse"
+        else:
+            n += "_nlse"
+
+        if self.F_squant == "t":
+            n += "_squant"
+        else:
+            n += "_nsquant"
+
+        if self.F_pagedkv == "t":
+            n += "_pagedkv"
+        else:
+            n += "_npagedkv"
         return n
 
+
 @dataclass
 class FmhaFwdSplitKVCombinePipeline:
-    tag : str
+    tag: str
 
-    F_spad      : str  # true/false
-    F_dvpad     : str  #
-    F_lse       : str  #
-    F_squant    : str  #
+    F_spad: str  # true/false
+    F_dvpad: str  #
+    F_lse: str  #
+    F_squant: str  #
 
     @property
     def name(self) -> str:
         def pad_name() -> str:
-            n = ''
-            if self.F_spad == 't': n += 's'
-            if self.F_dvpad == 't' : n += 'dv'
-            if n != '' : n = 'p' + n
+            n = ""
+            if self.F_spad == "t":
+                n += "s"
+            if self.F_dvpad == "t":
+                n += "dv"
+            if n != "":
+                n = "p" + n
             return n
+
         pn = pad_name()
-        n = f'{self.tag}'
-        if pn != '' : n += f'_{pn}'
-        else: n += '_npad'
+        n = f"{self.tag}"
+        if pn != "":
+            n += f"_{pn}"
+        else:
+            n += "_npad"
 
-        if self.F_lse == 't' : n += '_lse'
-        else: n += '_nlse'
+        if self.F_lse == "t":
+            n += "_lse"
+        else:
+            n += "_nlse"
 
-        if self.F_squant == 't' : n += '_squant'
-        else: n += '_nsquant'
+        if self.F_squant == "t":
+            n += "_squant"
+        else:
+            n += "_nsquant"
         return n
 
+
 class FmhaFwdSplitKVApiPool:
     def __init__(self, mask_impl):
         self.pool = dict()
         self.mask_impl = mask_impl
 
-    def register_traits(self, trait : FmhaFwdSplitKVApiTrait) -> None:
+    def register_traits(self, trait: FmhaFwdSplitKVApiTrait) -> None:
         # TODO: do we need to check duplication?
         if trait.dtype not in self.pool.keys():
             self.pool[trait.dtype] = dict()
@@ -474,97 +537,132 @@ class FmhaFwdSplitKVApiPool:
 
     @property
     def api(self) -> str:
-        per_dtypes=str()
+        per_dtypes = str()
         for i, dtype in enumerate(self.pool.keys()):
-            per_hdim_case=str()
+            per_hdim_case = str()
             for j, hdim in enumerate(self.pool[dtype].keys()):
-                traits=self.pool[dtype][hdim]
-                inners=str()
+                traits = self.pool[dtype][hdim]
+                inners = str()
                 for k, trait in enumerate(traits):
-                    if_k = 'if' if k == 0 else 'else if'
-                    inners = inners + FMHA_FWD_SPLITKV_API_INNER_DISPATCH.format(F_if=if_k, F_mode=MODE_MAP[trait.mode], F_vlayout=LAYOUT_MAP[trait.vlayout],
-                                   F_pipeline_enum=PIPELINE_ENUM_MAP[trait.pipeline_tag], F_logits=BOOL_MAP[trait.logits], F_mask=get_mask_map(self.mask_impl)[trait.mask],
-                                   F_mask_check=get_mask_check_map(self.mask_impl)[trait.mask], F_bias_check=BIAS_CHECK_MAP[trait.bias], F_bias=BIAS_MAP[trait.bias],
-                                   F_lse=BOOL_MAP[trait.lse], F_squant=BOOL_MAP[trait.squant], F_pagedkv=BOOL_MAP[trait.pagedkv],
-                                   F_scheck=trait.scheck, F_skcheck=trait.skcheck, F_dcheck=trait.dcheck, F_dvcheck=trait.dvcheck,
-                                   F_spad=BOOL_MAP[trait.spad], F_skpad=BOOL_MAP[trait.skpad], F_dpad=BOOL_MAP[trait.dpad], F_dvpad=BOOL_MAP[trait.dvpad],
-                                   F_bm0=trait.bm0, F_bn0=trait.bn0, F_bk0=trait.bk0, F_bn1=trait.bn1, F_bk1=trait.bk1, F_bk0max=trait.bk0max,
-                                   F_hdim=hdim, F_dtype=FWD_DTYPE_MAP[dtype])
-                if_j = 'if' if j == 0 else 'else if'
-                per_hdim_case = per_hdim_case + FMHA_FWD_API_PER_HDIM_CASE.format(F_if=if_j, F_hdim=hdim, F_hdim_v=hdim, F_inner_dispatch=inners)
-            if_i = 'if' if i == 0 else 'else if'
-            per_dtypes = per_dtypes + FMHA_FWD_API_PER_DTYPE.format(F_if=if_i, F_dtype=dtype, F_hdim_case=per_hdim_case)
+                    if_k = "if" if k == 0 else "else if"
+                    inners = inners + FMHA_FWD_SPLITKV_API_INNER_DISPATCH.format(
+                        F_if=if_k,
+                        F_mode=MODE_MAP[trait.mode],
+                        F_vlayout=LAYOUT_MAP[trait.vlayout],
+                        F_pipeline_enum=PIPELINE_ENUM_MAP[trait.pipeline_tag],
+                        F_logits=BOOL_MAP[trait.logits],
+                        F_mask=get_mask_map(self.mask_impl)[trait.mask],
+                        F_mask_check=get_mask_check_map(self.mask_impl)[trait.mask],
+                        F_bias_check=BIAS_CHECK_MAP[trait.bias],
+                        F_bias=BIAS_MAP[trait.bias],
+                        F_lse=BOOL_MAP[trait.lse],
+                        F_squant=BOOL_MAP[trait.squant],
+                        F_pagedkv=BOOL_MAP[trait.pagedkv],
+                        F_scheck=trait.scheck,
+                        F_skcheck=trait.skcheck,
+                        F_dcheck=trait.dcheck,
+                        F_dvcheck=trait.dvcheck,
+                        F_spad=BOOL_MAP[trait.spad],
+                        F_skpad=BOOL_MAP[trait.skpad],
+                        F_dpad=BOOL_MAP[trait.dpad],
+                        F_dvpad=BOOL_MAP[trait.dvpad],
+                        F_bm0=trait.bm0,
+                        F_bn0=trait.bn0,
+                        F_bk0=trait.bk0,
+                        F_bn1=trait.bn1,
+                        F_bk1=trait.bk1,
+                        F_bk0max=trait.bk0max,
+                        F_hdim=hdim,
+                        F_dtype=FWD_DTYPE_MAP[dtype],
+                    )
+                if_j = "if" if j == 0 else "else if"
+                per_hdim_case = per_hdim_case + FMHA_FWD_API_PER_HDIM_CASE.format(
+                    F_if=if_j, F_hdim=hdim, F_hdim_v=hdim, F_inner_dispatch=inners
+                )
+            if_i = "if" if i == 0 else "else if"
+            per_dtypes = per_dtypes + FMHA_FWD_API_PER_DTYPE.format(
+                F_if=if_i, F_dtype=dtype, F_hdim_case=per_hdim_case
+            )
         if not per_dtypes:
             # empty string we add some ignore to suppress warning in api
-            per_dtypes += '    (void)t ; (void)s ; (void)a;'
-        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_SPLITKV_API.format(F_dispatch = per_dtypes)
+            per_dtypes += "    (void)t ; (void)s ; (void)a;"
+        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_SPLITKV_API.format(
+            F_dispatch=per_dtypes
+        )
+
 
 @dataclass
 class FmhaFwdSplitKVCombineTileSize:
-    F_bn1       : int  # tile size along v head_dim
-    F_occupancy : int  # occupancy, -1 will let pipeline decide the occupancy, other value will overwrite occupancy
+    F_bn1: int  # tile size along v head_dim
+    F_occupancy: int  # occupancy, -1 will let pipeline decide the occupancy, other value will overwrite occupancy
+
     @property
     def name(self) -> str:
-        return f"b{self.F_bn1}" +\
-            ("" if self.F_occupancy == -1 else f"_o{self.F_occupancy}")
+        return f"b{self.F_bn1}" + (
+            "" if self.F_occupancy == -1 else f"_o{self.F_occupancy}"
+        )
+
 
 @dataclass
 class FmhaFwdSplitKVKernel:
-    F_idx           : int  # this is not a tunable, but a counter to differentiate symbol
-    F_hdim          : int  # hdim
-    F_dtype         : str  # data type
-    F_mode          : str  # value from MODE_MAP
-    F_tile          : FmhaFwdTileSize
-    F_pipeline      : FmhaFwdSplitKVPipeline
-    mask_impl       : str
+    F_idx: int  # this is not a tunable, but a counter to differentiate symbol
+    F_hdim: int  # hdim
+    F_dtype: str  # data type
+    F_mode: str  # value from MODE_MAP
+    F_tile: FmhaFwdTileSize
+    F_pipeline: FmhaFwdSplitKVPipeline
+    mask_impl: str
 
     @property
     def template(self) -> str:
-        kernel_body = str()
-        return FMHA_FWD_KERNEL_HEADER + \
-            FMHA_FWD_SPLITKV_KERNEL_BODY.format(
-                F_idx           = self.F_idx,
-                F_hdim          = self.F_hdim,
-                F_dtype         = FWD_DTYPE_MAP[self.F_dtype],
-                F_bm0           = self.F_tile.F_bm0,
-                F_bn0           = self.F_tile.F_bn0,
-                F_bk0           = self.F_tile.F_bk0,
-                F_bn1           = self.F_tile.F_bn1,
-                F_bk1           = self.F_tile.F_bk1,
-                F_bk0max        = self.F_tile.F_bk0max,
-                F_rm0           = self.F_tile.F_rm0,
-                F_rn0           = self.F_tile.F_rn0,
-                F_rk0           = self.F_tile.F_rk0,
-                F_rm1           = self.F_tile.F_rm1,
-                F_rn1           = self.F_tile.F_rn1,
-                F_rk1           = self.F_tile.F_rk1,
-                F_wm0           = self.F_tile.F_wm0,
-                F_wn0           = self.F_tile.F_wn0,
-                F_wk0           = self.F_tile.F_wk0,
-                F_wm1           = self.F_tile.F_wm1,
-                F_wn1           = self.F_tile.F_wn1,
-                F_wk1           = self.F_tile.F_wk1,
-                F_vlayout       = LAYOUT_MAP[self.F_pipeline.F_vlayout],
-                F_spad          = BOOL_MAP[self.F_pipeline.F_spad],
-                F_skpad         = BOOL_MAP[self.F_pipeline.F_skpad],
-                F_dpad          = BOOL_MAP[self.F_pipeline.F_dpad],
-                F_dvpad         = BOOL_MAP[self.F_pipeline.F_dvpad],
-                F_logits        = BOOL_MAP[self.F_pipeline.F_logits],
-                F_bias          = BIAS_MAP[self.F_pipeline.F_bias],
-                F_lse           = BOOL_MAP[self.F_pipeline.F_lse],
-                F_squant        = BOOL_MAP[self.F_pipeline.F_squant],
-                F_pagedkv       = BOOL_MAP[self.F_pipeline.F_pagedkv],
-                F_occupancy     = self.F_tile.F_occupancy,
-                F_pipeline_enum = PIPELINE_ENUM_MAP[self.F_pipeline.tag],
-                F_mask          = get_mask_map(self.mask_impl)[self.F_pipeline.F_mask],
-                F_mode          = MODE_MAP[self.F_mode],
-                F_pipeline      = FMHA_FWD_SPLITKV_PIPELINE_MAP[self.F_pipeline.tag])
+        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_SPLITKV_KERNEL_BODY.format(
+            F_idx=self.F_idx,
+            F_hdim=self.F_hdim,
+            F_dtype=FWD_DTYPE_MAP[self.F_dtype],
+            F_bm0=self.F_tile.F_bm0,
+            F_bn0=self.F_tile.F_bn0,
+            F_bk0=self.F_tile.F_bk0,
+            F_bn1=self.F_tile.F_bn1,
+            F_bk1=self.F_tile.F_bk1,
+            F_bk0max=self.F_tile.F_bk0max,
+            F_rm0=self.F_tile.F_rm0,
+            F_rn0=self.F_tile.F_rn0,
+            F_rk0=self.F_tile.F_rk0,
+            F_rm1=self.F_tile.F_rm1,
+            F_rn1=self.F_tile.F_rn1,
+            F_rk1=self.F_tile.F_rk1,
+            F_wm0=self.F_tile.F_wm0,
+            F_wn0=self.F_tile.F_wn0,
+            F_wk0=self.F_tile.F_wk0,
+            F_wm1=self.F_tile.F_wm1,
+            F_wn1=self.F_tile.F_wn1,
+            F_wk1=self.F_tile.F_wk1,
+            F_vlayout=LAYOUT_MAP[self.F_pipeline.F_vlayout],
+            F_spad=BOOL_MAP[self.F_pipeline.F_spad],
+            F_skpad=BOOL_MAP[self.F_pipeline.F_skpad],
+            F_dpad=BOOL_MAP[self.F_pipeline.F_dpad],
+            F_dvpad=BOOL_MAP[self.F_pipeline.F_dvpad],
+            F_logits=BOOL_MAP[self.F_pipeline.F_logits],
+            F_bias=BIAS_MAP[self.F_pipeline.F_bias],
+            F_lse=BOOL_MAP[self.F_pipeline.F_lse],
+            F_squant=BOOL_MAP[self.F_pipeline.F_squant],
+            F_pagedkv=BOOL_MAP[self.F_pipeline.F_pagedkv],
+            F_occupancy=self.F_tile.F_occupancy,
+            F_pipeline_enum=PIPELINE_ENUM_MAP[self.F_pipeline.tag],
+            F_mask=get_mask_map(self.mask_impl)[self.F_pipeline.F_mask],
+            F_mode=MODE_MAP[self.F_mode],
+            F_pipeline=FMHA_FWD_SPLITKV_PIPELINE_MAP[self.F_pipeline.tag],
+        )
 
     @property
     def name(self) -> str:
         # TODO: we don't encode idx here
-        return f"fmha_fwd_splitkv_d{self.F_hdim}_{self.F_dtype}_{self.F_mode}_" + \
-                self.F_tile.name + '_' + self.F_pipeline.name
+        return (
+            f"fmha_fwd_splitkv_d{self.F_hdim}_{self.F_dtype}_{self.F_mode}_"
+            + self.F_tile.name
+            + "_"
+            + self.F_pipeline.name
+        )
 
     @property
     def filename(self) -> str:
@@ -572,103 +670,127 @@ class FmhaFwdSplitKVKernel:
 
     def api_trait(self) -> FmhaFwdSplitKVApiTrait:
         return FmhaFwdSplitKVApiTrait(
-                pipeline_tag=self.F_pipeline.tag,
-                hdim=str(self.F_hdim),
-                dtype=self.F_dtype,
-                mode=self.F_mode,
-                bm0=self.F_tile.F_bm0,
-                bn0=self.F_tile.F_bn0,
-                bk0=self.F_tile.F_bk0,
-                bn1=self.F_tile.F_bn1,
-                bk1=self.F_tile.F_bk1,
-                bk0max=self.F_tile.F_bk0max,
-                vlayout=self.F_pipeline.F_vlayout,
-                logits=self.F_pipeline.F_logits,
-                mask=self.F_pipeline.F_mask,
-                bias=self.F_pipeline.F_bias,
-                lse=self.F_pipeline.F_lse,
-                squant=self.F_pipeline.F_squant,
-                pagedkv=self.F_pipeline.F_pagedkv,
-                spad=self.F_pipeline.F_spad,
-                skpad=self.F_pipeline.F_skpad,
-                dpad=self.F_pipeline.F_dpad,
-                dvpad=self.F_pipeline.F_dvpad)
+            pipeline_tag=self.F_pipeline.tag,
+            hdim=str(self.F_hdim),
+            dtype=self.F_dtype,
+            mode=self.F_mode,
+            bm0=self.F_tile.F_bm0,
+            bn0=self.F_tile.F_bn0,
+            bk0=self.F_tile.F_bk0,
+            bn1=self.F_tile.F_bn1,
+            bk1=self.F_tile.F_bk1,
+            bk0max=self.F_tile.F_bk0max,
+            vlayout=self.F_pipeline.F_vlayout,
+            logits=self.F_pipeline.F_logits,
+            mask=self.F_pipeline.F_mask,
+            bias=self.F_pipeline.F_bias,
+            lse=self.F_pipeline.F_lse,
+            squant=self.F_pipeline.F_squant,
+            pagedkv=self.F_pipeline.F_pagedkv,
+            spad=self.F_pipeline.F_spad,
+            skpad=self.F_pipeline.F_skpad,
+            dpad=self.F_pipeline.F_dpad,
+            dvpad=self.F_pipeline.F_dvpad,
+        )
+
 
 @dataclass
 class FmhaFwdSplitKVCombineKernel:
-    F_idx           : int  # this is not a tunable, but a counter to differentiate symbol
-    F_hdim          : int  # hdim
-    F_dtype         : str  # data type
-    F_mode          : str  # value from MODE_MAP
-    F_tile          : FmhaFwdSplitKVCombineTileSize
-    F_pipeline      : FmhaFwdSplitKVCombinePipeline
+    F_idx: int  # this is not a tunable, but a counter to differentiate symbol
+    F_hdim: int  # hdim
+    F_dtype: str  # data type
+    F_mode: str  # value from MODE_MAP
+    F_tile: FmhaFwdSplitKVCombineTileSize
+    F_pipeline: FmhaFwdSplitKVCombinePipeline
 
     @property
     def template(self) -> str:
-        kernel_body = str()
-        return FMHA_FWD_KERNEL_HEADER + \
-            FMHA_FWD_SPLITKV_COMBINE_KERNEL_BODY.format(
-                F_idx           = self.F_idx,
-                F_hdim          = self.F_hdim,
-                F_dtype         = FWD_DTYPE_MAP[self.F_dtype],
-                F_bn1           = self.F_tile.F_bn1,
-                F_spad          = BOOL_MAP[self.F_pipeline.F_spad],
-                F_dvpad         = BOOL_MAP[self.F_pipeline.F_dvpad],
-                F_lse           = BOOL_MAP[self.F_pipeline.F_lse],
-                F_squant        = BOOL_MAP[self.F_pipeline.F_squant],
-                F_occupancy     = self.F_tile.F_occupancy,
-                F_mode          = MODE_MAP[self.F_mode])
+        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_SPLITKV_COMBINE_KERNEL_BODY.format(
+            F_idx=self.F_idx,
+            F_hdim=self.F_hdim,
+            F_dtype=FWD_DTYPE_MAP[self.F_dtype],
+            F_bn1=self.F_tile.F_bn1,
+            F_spad=BOOL_MAP[self.F_pipeline.F_spad],
+            F_dvpad=BOOL_MAP[self.F_pipeline.F_dvpad],
+            F_lse=BOOL_MAP[self.F_pipeline.F_lse],
+            F_squant=BOOL_MAP[self.F_pipeline.F_squant],
+            F_occupancy=self.F_tile.F_occupancy,
+            F_mode=MODE_MAP[self.F_mode],
+        )
 
     @property
     def name(self) -> str:
         # TODO: we don't encode idx here
-        return f"fmha_fwd_splitkv_combine_d{self.F_hdim}_{self.F_dtype}_{self.F_mode}_" + \
-                self.F_tile.name + '_' + self.F_pipeline.name
+        return (
+            f"fmha_fwd_splitkv_combine_d{self.F_hdim}_{self.F_dtype}_{self.F_mode}_"
+            + self.F_tile.name
+            + "_"
+            + self.F_pipeline.name
+        )
 
     @property
     def filename(self) -> str:
         return self.name + ".cpp"
 
+
 # TODO: design a more practical way to do it
 # this is current supported tile size per hdim
-def get_fmha_fwd_tile_dict_from_dtype(dtype : str) -> Optional[dict]:
-    if dtype == 'fp16' or dtype == 'bf16':
+def get_fmha_fwd_tile_dict_from_dtype(dtype: str) -> Optional[dict]:
+    if dtype == "fp16" or dtype == "bf16":
         return {
-            '32'  : FmhaFwdTileSize(32, 64,  16, 32,  32,  32,   2, 1, 1,  2, 1, 1,  16, 16, 16,  16, 16, 16,  -1),
-            '64'  : FmhaFwdTileSize(64, 64,  32, 64,  32,  64,   4, 1, 1,  4, 1, 1,  16, 16, 16,  16, 16, 16,  -1),
-            '96'  : FmhaFwdTileSize(64, 128, 32, 128, 32,  96,   4, 1, 1,  4, 1, 1,  16, 16, 16,  16, 16, 16,  -1),
-            '128' : FmhaFwdTileSize(64, 128, 32, 128, 32,  128,  4, 1, 1,  4, 1, 1,  16, 16, 16,  16, 16, 16,  -1),
+            "32": FmhaFwdTileSize(
+                32, 64, 16, 32, 32, 32, 2, 1, 1, 2, 1, 1, 16, 16, 16, 16, 16, 16, -1
+            ),
+            "64": FmhaFwdTileSize(
+                64, 64, 32, 64, 32, 64, 4, 1, 1, 4, 1, 1, 16, 16, 16, 16, 16, 16, -1
+            ),
+            "96": FmhaFwdTileSize(
+                64, 128, 32, 128, 32, 96, 4, 1, 1, 4, 1, 1, 16, 16, 16, 16, 16, 16, -1
+            ),
+            "128": FmhaFwdTileSize(
+                64, 128, 32, 128, 32, 128, 4, 1, 1, 4, 1, 1, 16, 16, 16, 16, 16, 16, -1
+            ),
             # '160' : FmhaFwdTileSize(64, 128, 32, 160, 32,  160,  4, 1, 1,  4, 1, 1,  16, 16, 16,  16, 16, 16,  -1),
-            '256' : FmhaFwdTileSize(64, 128, 32, 256, 32,  256,  4, 1, 1,  4, 1, 1,  16, 16, 16,  16, 16, 16,  -1),
+            "256": FmhaFwdTileSize(
+                64, 128, 32, 256, 32, 256, 4, 1, 1, 4, 1, 1, 16, 16, 16, 16, 16, 16, -1
+            ),
         }
-    elif dtype == 'fp8' or dtype == 'bf8':
+    elif dtype == "fp8" or dtype == "bf8":
         return {
-            '64'  : FmhaFwdTileSize(128, 64,  32, 64,  32,  64,   2, 1, 1,  2, 1, 1,  32, 32, 32,  32, 32, 32,  -1),
-            '128' : FmhaFwdTileSize(128, 128, 32, 128, 32,  128,  4, 1, 1,  4, 1, 1,  32, 32, 32,  32, 32, 32,  -1),
+            "64": FmhaFwdTileSize(
+                128, 64, 32, 64, 32, 64, 2, 1, 1, 2, 1, 1, 32, 32, 32, 32, 32, 32, -1
+            ),
+            "128": FmhaFwdTileSize(
+                128, 128, 32, 128, 32, 128, 4, 1, 1, 4, 1, 1, 32, 32, 32, 32, 32, 32, -1
+            ),
         }
     else:
         return None
 
-def get_fmha_fwd_splitkv_combine_tile_dict_from_dtype(dtype : str) -> Optional[dict]:
-    if dtype == 'fp16' or dtype == 'bf16':
+
+def get_fmha_fwd_splitkv_combine_tile_dict_from_dtype(dtype: str) -> Optional[dict]:
+    if dtype == "fp16" or dtype == "bf16":
         return {
-            '32'  : FmhaFwdSplitKVCombineTileSize(32,  -1),
-            '64'  : FmhaFwdSplitKVCombineTileSize(32,  -1),
-            '96'  : FmhaFwdSplitKVCombineTileSize(32,  -1),
-            '128' : FmhaFwdSplitKVCombineTileSize(32,  -1),
+            "32": FmhaFwdSplitKVCombineTileSize(32, -1),
+            "64": FmhaFwdSplitKVCombineTileSize(32, -1),
+            "96": FmhaFwdSplitKVCombineTileSize(32, -1),
+            "128": FmhaFwdSplitKVCombineTileSize(32, -1),
             # '160' : FmhaFwdSplitKVCombineTileSize(32,  -1),
-            '256' : FmhaFwdSplitKVCombineTileSize(32,  -1),
-    }
-    elif dtype == 'fp8' or dtype == 'bf8':
+            "256": FmhaFwdSplitKVCombineTileSize(32, -1),
+        }
+    elif dtype == "fp8" or dtype == "bf8":
         return {
-            '64'  : FmhaFwdSplitKVCombineTileSize(32,  -1),
-            '128' : FmhaFwdSplitKVCombineTileSize(32,  -1),
-            '256' : FmhaFwdSplitKVCombineTileSize(32,  -1),
+            "64": FmhaFwdSplitKVCombineTileSize(32, -1),
+            "128": FmhaFwdSplitKVCombineTileSize(32, -1),
+            "256": FmhaFwdSplitKVCombineTileSize(32, -1),
         }
     else:
         return None
 
-def get_fwd_splitkv_blobs(kernel_filter : Optional[str], receipt, mask_impl, optdim_list) -> Tuple[FmhaFwdSplitKVApiPool, List[FmhaFwdSplitKVKernel]]:
+
+def get_fwd_splitkv_blobs(
+    kernel_filter: Optional[str], receipt, mask_impl, optdim_list
+) -> Tuple[FmhaFwdSplitKVApiPool, List[FmhaFwdSplitKVKernel]]:
     Pipeline = FmhaFwdSplitKVPipeline
     Kernel = FmhaFwdSplitKVKernel
 
@@ -679,25 +801,164 @@ def get_fwd_splitkv_blobs(kernel_filter : Optional[str], receipt, mask_impl, opt
         # TODO: the order of List matters! the later in this list will be also be checked later
         # TODO: currently for qr pipeline, let 't' padding to appear later!!
         # TODO: how to design this more generic?
-        squant = 't' if dtype == 'fp8' else 'f'
+        squant = "t" if dtype == "fp8" else "f"
         pipelines = []
-        if dtype in ['fp16', 'bf16']:
-            for logits, mask, bias, pagedkv in itertools.product(["t", "f"], get_mask_map(mask_impl).keys(), BIAS_MAP.keys(), ["t", "f"]):
-                pipelines.append(Pipeline('qr', 'row', 'f', 't', 'f', 'f', logits, bias, 't', squant, pagedkv, mask))
-                pipelines.append(Pipeline('qr', 'col', 'f', 't', 'f', 'f', logits, bias, 't', squant, pagedkv, mask))
+        if dtype in ["fp16", "bf16"]:
+            for logits, mask, bias, pagedkv in itertools.product(
+                ["t", "f"], get_mask_map(mask_impl).keys(), BIAS_MAP.keys(), ["t", "f"]
+            ):
+                pipelines.append(
+                    Pipeline(
+                        "qr",
+                        "row",
+                        "f",
+                        "t",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        "t",
+                        squant,
+                        pagedkv,
+                        mask,
+                    )
+                )
+                pipelines.append(
+                    Pipeline(
+                        "qr",
+                        "col",
+                        "f",
+                        "t",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        "t",
+                        squant,
+                        pagedkv,
+                        mask,
+                    )
+                )
 
-                pipelines.append(Pipeline('qr', 'row', 't', 'f', 'f', 'f', logits, bias, 't', squant, pagedkv, mask))
-                pipelines.append(Pipeline('qr', 'col', 't', 'f', 'f', 'f', logits, bias, 't', squant, pagedkv, mask))
+                pipelines.append(
+                    Pipeline(
+                        "qr",
+                        "row",
+                        "t",
+                        "f",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        "t",
+                        squant,
+                        pagedkv,
+                        mask,
+                    )
+                )
+                pipelines.append(
+                    Pipeline(
+                        "qr",
+                        "col",
+                        "t",
+                        "f",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        "t",
+                        squant,
+                        pagedkv,
+                        mask,
+                    )
+                )
 
-                pipelines.append(Pipeline('qr', 'row', 't', 't', 'f', 'f', logits, bias, 't', squant, pagedkv, mask))
-                pipelines.append(Pipeline('qr', 'col', 't', 't', 'f', 'f', logits, bias, 't', squant, pagedkv, mask))
+                pipelines.append(
+                    Pipeline(
+                        "qr",
+                        "row",
+                        "t",
+                        "t",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        "t",
+                        squant,
+                        pagedkv,
+                        mask,
+                    )
+                )
+                pipelines.append(
+                    Pipeline(
+                        "qr",
+                        "col",
+                        "t",
+                        "t",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        "t",
+                        squant,
+                        pagedkv,
+                        mask,
+                    )
+                )
 
-                pipelines.append(Pipeline('qr', 'row', 't', 't', 't', 't', logits, bias, 't', squant, pagedkv, mask))
-                pipelines.append(Pipeline('qr', 'col', 't', 't', 't', 't', logits, bias, 't', squant, pagedkv, mask))
-        elif dtype in ['fp8', 'bf8']:
-            for logits, mask, bias in itertools.product(["t", "f"], get_mask_map(mask_impl).keys(), BIAS_MAP.keys()):
-                pipelines.append(Pipeline('qr', 'col', 'f', 'f', 'f', 'f', logits, bias, 't', squant, 'f', mask))
-        elif dtype in ['fp8fp16', 'fp8bf16']:
+                pipelines.append(
+                    Pipeline(
+                        "qr",
+                        "row",
+                        "t",
+                        "t",
+                        "t",
+                        "t",
+                        logits,
+                        bias,
+                        "t",
+                        squant,
+                        pagedkv,
+                        mask,
+                    )
+                )
+                pipelines.append(
+                    Pipeline(
+                        "qr",
+                        "col",
+                        "t",
+                        "t",
+                        "t",
+                        "t",
+                        logits,
+                        bias,
+                        "t",
+                        squant,
+                        pagedkv,
+                        mask,
+                    )
+                )
+        elif dtype in ["fp8", "bf8"]:
+            for logits, mask, bias in itertools.product(
+                ["t", "f"], get_mask_map(mask_impl).keys(), BIAS_MAP.keys()
+            ):
+                pipelines.append(
+                    Pipeline(
+                        "qr",
+                        "col",
+                        "f",
+                        "f",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        "t",
+                        squant,
+                        "f",
+                        mask,
+                    )
+                )
+        elif dtype in ["fp8fp16", "fp8bf16"]:
             # TODO
             None
         else:
@@ -709,28 +970,33 @@ def get_fwd_splitkv_blobs(kernel_filter : Optional[str], receipt, mask_impl, opt
 
     for dtype in FWD_DTYPE_MAP.keys():
         d = get_fmha_fwd_tile_dict_from_dtype(dtype)
-        if d == None:
+        if d is None:
             continue
-        #for hdim_str, mode, mask, bias, lse in itertools.product(d.keys(), MODE_MAP.keys(), MASK_MAP.keys(), ["t", "f"], ["t", "f"]):
+        # for hdim_str, mode, mask, bias, lse in itertools.product(d.keys(), MODE_MAP.keys(), MASK_MAP.keys(), ["t", "f"], ["t", "f"]):
         for hdim_str, mode in itertools.product(d.keys(), MODE_MAP.keys()):
             tile = d[hdim_str]
             hdim = int(hdim_str)
             for pipeline in get_pipelines(dtype, hdim):
                 if mode == "group":
-                    if pipeline.F_spad != 't' or pipeline.F_skpad != 't':
+                    if pipeline.F_spad != "t" or pipeline.F_skpad != "t":
                         # in group mode, spad/skpad must be true, since we can't predict if seqlen of current batch need pad or not
                         continue
                 # logits_soft_cap is only allowed if no bias
-                if not ((pipeline.F_logits == 't' and pipeline.F_bias == 'no') or pipeline.F_logits == 'f'):
+                if not (
+                    (pipeline.F_logits == "t" and pipeline.F_bias == "no")
+                    or pipeline.F_logits == "f"
+                ):
                     continue
-                k = Kernel(F_idx=0,
-                           F_hdim=hdim,
-                           F_dtype=dtype,
-                           F_mode=mode,
-                           F_tile=tile,
-                           F_pipeline=pipeline,
-                           mask_impl=mask_impl)
-                if kernel_filter != '':
+                k = Kernel(
+                    F_idx=0,
+                    F_hdim=hdim,
+                    F_dtype=dtype,
+                    F_mode=mode,
+                    F_tile=tile,
+                    F_pipeline=pipeline,
+                    mask_impl=mask_impl,
+                )
+                if kernel_filter != "":
                     if not fnmatch.fnmatch(k.name, kernel_filter):
                         continue
                 if optdim_list != [-1]:
@@ -738,40 +1004,40 @@ def get_fwd_splitkv_blobs(kernel_filter : Optional[str], receipt, mask_impl, opt
                         continue
                 # Flash attention integration
                 if receipt == 2:
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_bias in ['no', 'alibi']
-                    cond &= pipeline.F_squant == 'f'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_bias in ["no", "alibi"]
+                    cond &= pipeline.F_squant == "f"
                     if not cond:
                         continue
                 # PyTorch integration
                 elif receipt == 4:
-                    cond = dtype in ['fp16, bf16']
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_bias in ['no', 'bias']
-                    cond &= pipeline.F_squant == 'f'
-                    cond &= mode == 'batch'
+                    cond = dtype in ["fp16, bf16"]
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_bias in ["no", "bias"]
+                    cond &= pipeline.F_squant == "f"
+                    cond &= mode == "batch"
                     if not cond:
                         continue
                 # Aiter(mha_varlen_fwd) integration
                 elif receipt == 200:
-                    cond = dtype in ['fp16', 'bf16']
+                    cond = dtype in ["fp16", "bf16"]
                     cond &= mode == "group"
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_squant == 'f'
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_squant == "f"
                     if not cond:
                         continue
                 # aiter::mha_fwd_splikv C++ api integration
                 elif receipt == 600:
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_squant == 'f'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_squant == "f"
                     if not cond:
                         continue
 
                 # fp32 only
                 if receipt == 800 or receipt == 801:
-                    cond = dtype == 'fp32'
+                    cond = dtype == "fp32"
                     if not cond:
                         continue
 
@@ -780,7 +1046,10 @@ def get_fwd_splitkv_blobs(kernel_filter : Optional[str], receipt, mask_impl, opt
 
     return (api_pool, gen)
 
-def get_fwd_splitkv_combine_blobs(kernel_filter : Optional[str], receipt, optdim_list) -> List[FmhaFwdSplitKVCombineKernel]:
+
+def get_fwd_splitkv_combine_blobs(
+    kernel_filter: Optional[str], receipt, optdim_list
+) -> List[FmhaFwdSplitKVCombineKernel]:
     Pipeline = FmhaFwdSplitKVCombinePipeline
     Kernel = FmhaFwdSplitKVCombineKernel
 
@@ -791,14 +1060,16 @@ def get_fwd_splitkv_combine_blobs(kernel_filter : Optional[str], receipt, optdim
         # TODO: the order of List matters! the later in this list will be also be checked later
         # TODO: currently for qr pipeline, let 't' padding to appear later!!
         # TODO: how to design this more generic?
-        squant = 't' if dtype == 'fp8' else 'f'
+        squant = "t" if dtype == "fp8" else "f"
         pipelines = []
-        if dtype in ['fp16', 'bf16']:
-            for spad, dvpad, lse in itertools.product(["t", "f"], ["t", "f"], ["t", "f"]):
-                pipelines.append(Pipeline('unused', spad, dvpad, lse, squant))
-        elif dtype in ['fp8', 'bf8']:
+        if dtype in ["fp16", "bf16"]:
+            for spad, dvpad, lse in itertools.product(
+                ["t", "f"], ["t", "f"], ["t", "f"]
+            ):
+                pipelines.append(Pipeline("unused", spad, dvpad, lse, squant))
+        elif dtype in ["fp8", "bf8"]:
             # no need lse kernels
-            pipelines.append(Pipeline('unused', 'f', 'f', 'f', squant))
+            pipelines.append(Pipeline("unused", "f", "f", "f", squant))
         else:
             assert False
         return pipelines
@@ -807,24 +1078,26 @@ def get_fwd_splitkv_combine_blobs(kernel_filter : Optional[str], receipt, optdim
 
     for dtype in FWD_DTYPE_MAP.keys():
         d = get_fmha_fwd_splitkv_combine_tile_dict_from_dtype(dtype)
-        if d == None:
+        if d is None:
             continue
-        #for hdim_str, mode, mask, bias, lse in itertools.product(d.keys(), MODE_MAP.keys(), MASK_MAP.keys(), ["t", "f"], ["t", "f"]):
+        # for hdim_str, mode, mask, bias, lse in itertools.product(d.keys(), MODE_MAP.keys(), MASK_MAP.keys(), ["t", "f"], ["t", "f"]):
         for hdim_str, mode in itertools.product(d.keys(), MODE_MAP.keys()):
             tile = d[hdim_str]
             hdim = int(hdim_str)
             for pipeline in get_pipelines(dtype, hdim):
                 if mode == "group":
-                    if pipeline.F_spad != 't':
+                    if pipeline.F_spad != "t":
                         # in group mode, spad/skpad must be true, since we can't predict if seqlen of current batch need pad or not
                         continue
-                k = Kernel(F_idx=0,
-                           F_hdim=hdim,
-                           F_dtype=dtype,
-                           F_mode=mode,
-                           F_tile=tile,
-                           F_pipeline=pipeline)
-                if kernel_filter != '':
+                k = Kernel(
+                    F_idx=0,
+                    F_hdim=hdim,
+                    F_dtype=dtype,
+                    F_mode=mode,
+                    F_tile=tile,
+                    F_pipeline=pipeline,
+                )
+                if kernel_filter != "":
                     if not fnmatch.fnmatch(k.name, kernel_filter):
                         continue
                 if optdim_list != [-1]:
@@ -832,19 +1105,19 @@ def get_fwd_splitkv_combine_blobs(kernel_filter : Optional[str], receipt, optdim
                         continue
                 # Aiter(mha_varlen_fwd) integration
                 if receipt == 200:
-                    cond = dtype in ['fp16', 'bf16']
+                    cond = dtype in ["fp16", "bf16"]
                     cond &= mode == "group"
                     if not cond:
                         continue
                 # aiter::mha_fwd_splikv C++ api integration
                 elif receipt == 600:
-                    cond = dtype in ['fp16', 'bf16']
+                    cond = dtype in ["fp16", "bf16"]
                     if not cond:
                         continue
 
                 # fp32 only
                 if receipt == 800 or receipt == 801:
-                    cond = dtype == 'fp32'
+                    cond = dtype == "fp32"
                     if not cond:
                         continue
 
@@ -852,34 +1125,48 @@ def get_fwd_splitkv_combine_blobs(kernel_filter : Optional[str], receipt, optdim
 
     return gen
 
-def write_single_kernel(kernel: Union[FmhaFwdSplitKVKernel, FmhaFwdSplitKVCombineKernel], autogen_dir: Path) -> None:
+
+def write_single_kernel(
+    kernel: Union[FmhaFwdSplitKVKernel, FmhaFwdSplitKVCombineKernel], autogen_dir: Path
+) -> None:
     (autogen_dir / kernel.filename).write_text(kernel.template)
 
-def write_fwd_splitkv_api(api_pool : FmhaFwdSplitKVApiPool, autogen_dir: Path) -> None:
+
+def write_fwd_splitkv_api(api_pool: FmhaFwdSplitKVApiPool, autogen_dir: Path) -> None:
     file_path = autogen_dir / FMHA_FWD_SPLITKV_API_FILENAME
     file_path.write_text(api_pool.api)
 
-def write_blobs(output_dir : Path, filter_list : str, receipt, optdim_list, mask_impl) -> None:
-    filter_list = filter_list.split('@')
-    filter_list.extend([''] * (2 - len(filter_list)))
+
+def write_blobs(
+    output_dir: Path, filter_list: str, receipt, optdim_list, mask_impl
+) -> None:
+    filter_list = filter_list.split("@")
+    filter_list.extend([""] * (2 - len(filter_list)))
 
     kernels = get_fwd_splitkv_combine_blobs(filter_list[0], receipt, optdim_list)
     for kernel in kernels:
         write_single_kernel(kernel, output_dir)
-    api_pool, kernels = get_fwd_splitkv_blobs(filter_list[1], receipt, mask_impl, optdim_list)
+    api_pool, kernels = get_fwd_splitkv_blobs(
+        filter_list[1], receipt, mask_impl, optdim_list
+    )
     for kernel in kernels:
         write_single_kernel(kernel, output_dir)
     write_fwd_splitkv_api(api_pool, output_dir)
 
-def list_blobs(file_path : Path, filter_list : str, receipt, optdim_list, mask_impl) -> None:
-    filter_list = filter_list.split('@')
-    filter_list.extend([''] * (2 - len(filter_list)))
 
-    with file_path.open('a') as f:
+def list_blobs(
+    file_path: Path, filter_list: str, receipt, optdim_list, mask_impl
+) -> None:
+    filter_list = filter_list.split("@")
+    filter_list.extend([""] * (2 - len(filter_list)))
+
+    with file_path.open("a") as f:
         kernels = get_fwd_splitkv_combine_blobs(filter_list[0], receipt, optdim_list)
         for kernel in kernels:
             f.write(str(file_path.parent / GEN_DIR / kernel.filename) + "\n")
-        _, kernels = get_fwd_splitkv_blobs(filter_list[1], receipt, mask_impl, optdim_list)
+        _, kernels = get_fwd_splitkv_blobs(
+            filter_list[1], receipt, mask_impl, optdim_list
+        )
         for kernel in kernels:
             f.write(str(file_path.parent / GEN_DIR / kernel.filename) + "\n")
         f.write(str(file_path.parent / GEN_DIR / FMHA_FWD_SPLITKV_API_FILENAME) + "\n")
diff --git a/example/ck_tile/01_fmha/codegen/ops/fmha_pagedkv_prefill.py b/example/ck_tile/01_fmha/codegen/ops/fmha_pagedkv_prefill.py
index 3624b7b387..55b0160a71 100644
--- a/example/ck_tile/01_fmha/codegen/ops/fmha_pagedkv_prefill.py
+++ b/example/ck_tile/01_fmha/codegen/ops/fmha_pagedkv_prefill.py
@@ -9,28 +9,26 @@ import itertools
 from pathlib import Path
 from typing import List, Optional, Tuple
 
-from codegen.cmake_config import *
-from codegen.cpp_symbol_map import *
+from codegen.cmake_config import GEN_DIR
+from codegen.cpp_symbol_map import (
+    LAYOUT_MAP,
+    BIAS_CHECK_MAP,
+    get_mask_check_map,
+    MODE_MAP,
+    get_mask_map,
+    BIAS_MAP,
+    FWD_DTYPE_MAP,
+    BOOL_MAP,
+    PIPELINE_ENUM_MAP,
+)
 
 
-DTYPE_BITS = {
-    "fp32": 32,
-    "fp16": 16,
-    "bf16": 16,
-    "fp8" : 8,
-    "bf8" : 8
-}
+DTYPE_BITS = {"fp32": 32, "fp16": 16, "bf16": 16, "fp8": 8, "bf8": 8}
 
-K0_MAX_SUBMAX_MAP = {
-    32 : 32,
-    64 : 64,
-    96 : 128,
-    128: 128,
-    256: 256
-}
+K0_MAX_SUBMAX_MAP = {32: 32, 64: 64, 96: 128, 128: 128, 256: 256}
 
 FMHA_FWD_PAGEDKV_PIPELINE_MAP = {
-    "qr_pagedkv" : "ck_tile::BlockFmhaFwdPagedKVPipelineQRKSVS"
+    "qr_pagedkv": "ck_tile::BlockFmhaFwdPagedKVPipelineQRKSVS"
 }
 
 FMHA_FWD_KERNEL_HEADER = """// SPDX-License-Identifier: MIT
@@ -40,7 +38,7 @@ FMHA_FWD_KERNEL_HEADER = """// SPDX-License-Identifier: MIT
 #include "fmha_fwd.hpp"
 """
 
-FMHA_FWD_KERNEL_BODY="""
+FMHA_FWD_KERNEL_BODY = """
 using fmha_dtype_{F_idx} = {F_dtype};
 
 using fmha_block_tile_{F_idx} = ck_tile::sequence<{F_bm0}, {F_bn0}, {F_bk0}, {F_bn1}, {F_bk1}, {F_bk0max}>;
@@ -115,8 +113,8 @@ float fmha_fwd_pagedkv_<trait_{F_idx}>(const ck_tile::stream_config& s, fmha_fwd
 }}
 """
 
-FMHA_FWD_API_FILENAME="fmha_fwd_pagedkv_api.cpp"
-FMHA_FWD_API="""
+FMHA_FWD_API_FILENAME = "fmha_fwd_pagedkv_api.cpp"
+FMHA_FWD_API = """
 float fmha_fwd_pagedkv(fmha_fwd_pagedkv_traits& t, fmha_fwd_pagedkv_args& a, const ck_tile::stream_config& s){{
     float r = -1;
 {F_dispatch}
@@ -124,164 +122,215 @@ float fmha_fwd_pagedkv(fmha_fwd_pagedkv_traits& t, fmha_fwd_pagedkv_args& a, con
 }}
 """
 
-FMHA_FWD_API_PER_DTYPE="""    {F_if}(t.data_type.compare(\"{F_dtype}\") == 0){{
+FMHA_FWD_API_PER_DTYPE = """    {F_if}(t.data_type.compare(\"{F_dtype}\") == 0){{
 {F_hdim_case}
     }}
 """
-FMHA_FWD_API_PER_HDIM_CASE="""        {F_if} (t.hdim_q <= {F_hdim} && t.hdim_v <= {F_hdim_v}) {{
+FMHA_FWD_API_PER_HDIM_CASE = """        {F_if} (t.hdim_q <= {F_hdim} && t.hdim_v <= {F_hdim_v}) {{
 {F_inner_dispatch}
         }}
 """
 
-FMHA_FWD_API_INNER_DISPATCH="""            {F_if}((t.is_group_mode == {F_mode}) && (t.is_v_rowmajor == {F_vlayout}) && (t.has_logits_soft_cap == {F_logits}) && ({F_mask_check}) && (t.bias_type == {F_bias_check}) && (t.has_lse == {F_lse})  && (t.use_pagedkv == {F_pagedkv}) && (t.do_fp8_static_quant == {F_squant}) && (t.skip_min_seqlen_q == {F_skip}) &&
+FMHA_FWD_API_INNER_DISPATCH = """            {F_if}((t.is_group_mode == {F_mode}) && (t.is_v_rowmajor == {F_vlayout}) && (t.has_logits_soft_cap == {F_logits}) && ({F_mask_check}) && (t.bias_type == {F_bias_check}) && (t.has_lse == {F_lse})  && (t.use_pagedkv == {F_pagedkv}) && (t.do_fp8_static_quant == {F_squant}) && (t.skip_min_seqlen_q == {F_skip}) &&
                         ({F_scheck}) && ({F_skcheck}) && ({F_dcheck}) && ({F_dvcheck})) {{
                 using trait_ = fmha_fwd_pagedkv_traits_<{F_hdim}, {F_dtype}, {F_mode}, {F_bm0}, {F_bn0}, {F_bk0}, {F_bn1}, {F_bk1}, {F_bk0max}, {F_vlayout}, {F_pipeline_enum}, {F_logits}, {F_mask}, {F_bias}, {F_lse}, {F_pagedkv}, {F_squant}, {F_spad}, {F_skpad}, {F_dpad}, {F_dvpad}, {F_skip}>;
                 return fmha_fwd_pagedkv_<trait_>(s, a);
             }}
 """
 
+
 @dataclass
 class FmhaFwdApiTrait:
-    pipeline_tag : str
+    pipeline_tag: str
     # sync with fmha_fwd_traits<>, to generate fallback calls
-    hdim      : str
-    dtype     : str  # data type
-    mode      : str  # value from MODE_MAP
-    bm0       : int  # tile size along q seqlen (block size)
-    bn0       : int  # tile size along qk seqlen
-    bk0       : int  # tile size along qk gemm unroll
-    bn1       : int  # tile size along v head_dim
-    bk1       : int  # tile size along kv gemm unroll
-    bk0max    : int
-    vlayout   : str
-    logits    : str
-    mask      : str
-    bias      : str  #
-    lse       : str  #
-    pagedkv   : str
-    squant    : str  #
-    spad      : str
-    skpad     : str
-    dpad      : str
-    dvpad     : str
-    skip      : str
+    hdim: str
+    dtype: str  # data type
+    mode: str  # value from MODE_MAP
+    bm0: int  # tile size along q seqlen (block size)
+    bn0: int  # tile size along qk seqlen
+    bk0: int  # tile size along qk gemm unroll
+    bn1: int  # tile size along v head_dim
+    bk1: int  # tile size along kv gemm unroll
+    bk0max: int
+    vlayout: str
+    logits: str
+    mask: str
+    bias: str  #
+    lse: str  #
+    pagedkv: str
+    squant: str  #
+    spad: str
+    skpad: str
+    dpad: str
+    dvpad: str
+    skip: str
 
     @property
     def name(self) -> str:
-        return f'{self.hdim}-{self.dtype}-{self.mode}-{self.bm0}-{self.bn0}-{self.bk0}-{self.bn0}-{self.bk1}-{self.bk0max}-'+\
-                    f'{self.vlayout}-{self.logits}-{self.mask}-{self.bias}-{self.lse}-{self.pagedkv}-{self.squant}-{self.spad}-{self.skpad}-{self.dpad}-{self.dvpad}-{self.skip}'
+        return (
+            f"{self.hdim}-{self.dtype}-{self.mode}-{self.bm0}-{self.bn0}-{self.bk0}-{self.bn0}-{self.bk1}-{self.bk0max}-"
+            + f"{self.vlayout}-{self.logits}-{self.mask}-{self.bias}-{self.lse}-{self.pagedkv}-{self.squant}-{self.spad}-{self.skpad}-{self.dpad}-{self.dvpad}-{self.skip}"
+        )
 
     @property
     def scheck(self) -> str:
-        if self.mode == 'group': return 'true/*group mode spad always true*/'                  # group mode only generate spad/skpad == true
-        if self.pipeline_tag == 'qr_async':
-            if self.spad == 't' : return 'true' # always support
-            else :                return 'true'
-        elif self.pipeline_tag in ['qr_pagedkv', 'qs']:
-            if self.spad == 't' : return f'true /*a.seqlen_q % {self.bm0} != 0*/'  # TODO: order of get_pipelines() matters! (ugly)
-            else :                return f'a.seqlen_q % {self.bm0} == 0'
-        else: assert False
+        if self.mode == "group":
+            return "true/*group mode spad always true*/"  # group mode only generate spad/skpad == true
+        if self.pipeline_tag == "qr_async":
+            if self.spad == "t":
+                return "true"  # always support
+            else:
+                return "true"
+        elif self.pipeline_tag in ["qr_pagedkv", "qs"]:
+            if self.spad == "t":
+                return f"true /*a.seqlen_q % {self.bm0} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.seqlen_q % {self.bm0} == 0"
+        else:
+            assert False
 
     @property
     def skcheck(self) -> str:
-        if self.mode == 'group': return 'true/*group mode skpad always true*/'                  # group mode only generate spad/skpad == true
-        if self.pipeline_tag == 'qr_async':
-            if self.skpad == 't' : return f'a.seqlen_k == 0 || a.seqlen_k % {self.bn0} != 0'
-            else :                 return f'a.seqlen_k != 0 && a.seqlen_k % {self.bn0} == 0'
-        elif self.pipeline_tag in ['qr_pagedkv', 'qs']:
-            if self.skpad == 't' : return f'true /*a.seqlen_k_ptr != nullptr || a.seqlen_k % {self.bn0} != 0*/' # TODO: order of get_pipelines() matters! (ugly)
-            else :                return f'a.seqlen_k_ptr == nullptr && a.seqlen_k % {self.bn0} == 0'
-        else: assert False
+        if self.mode == "group":
+            return "true/*group mode skpad always true*/"  # group mode only generate spad/skpad == true
+        if self.pipeline_tag == "qr_async":
+            if self.skpad == "t":
+                return f"a.seqlen_k == 0 || a.seqlen_k % {self.bn0} != 0"
+            else:
+                return f"a.seqlen_k != 0 && a.seqlen_k % {self.bn0} == 0"
+        elif self.pipeline_tag in ["qr_pagedkv", "qs"]:
+            if self.skpad == "t":
+                return f"true /*a.seqlen_k_ptr != nullptr || a.seqlen_k % {self.bn0} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.seqlen_k_ptr == nullptr && a.seqlen_k % {self.bn0} == 0"
+        else:
+            assert False
 
     @property
     def dcheck(self) -> str:
-        if self.pipeline_tag == 'qr_async':
+        if self.pipeline_tag == "qr_async":
             vec = int((32 * 4) / DTYPE_BITS[self.dtype])
-            if self.dpad == 't': return f'a.hdim_q % {vec} == 0'
-            else :               assert False
-        elif self.pipeline_tag in ['qr_pagedkv', 'qs']:
+            if self.dpad == "t":
+                return f"a.hdim_q % {vec} == 0"
+            else:
+                assert False
+        elif self.pipeline_tag in ["qr_pagedkv", "qs"]:
             bk0submax = K0_MAX_SUBMAX_MAP[self.bk0max]
-            if self.dpad == 't': return f'true /*a.hdim_q % {bk0submax} != 0*/' # TODO: order of get_pipelines() matters! (ugly)
-            else :               return f'a.hdim_q % {bk0submax} == 0'
-        else:   assert False
+            if self.dpad == "t":
+                return f"true /*a.hdim_q % {bk0submax} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.hdim_q % {bk0submax} == 0"
+        else:
+            assert False
 
     @property
     def dvcheck(self) -> str:
-        if self.pipeline_tag == 'qr_async':
+        if self.pipeline_tag == "qr_async":
             vec = int((32 * 4) / DTYPE_BITS[self.dtype])
-            if self.dvpad == 't': return f'a.hdim_v % {vec} == 0'
-            else :                assert False
-        elif self.pipeline_tag in ['qr_pagedkv', 'qs']:
+            if self.dvpad == "t":
+                return f"a.hdim_v % {vec} == 0"
+            else:
+                assert False
+        elif self.pipeline_tag in ["qr_pagedkv", "qs"]:
             bk0submax = K0_MAX_SUBMAX_MAP[self.bk0max]
-            if self.dvpad == 't': return f'true /*a.hdim_v % {bk0submax} != 0*/' # TODO: order of get_pipelines() matters! (ugly)
-            else :                return f'a.hdim_v % {bk0submax} == 0'
-        else:   assert False
+            if self.dvpad == "t":
+                return f"true /*a.hdim_v % {bk0submax} != 0*/"  # TODO: order of get_pipelines() matters! (ugly)
+            else:
+                return f"a.hdim_v % {bk0submax} == 0"
+        else:
+            assert False
+
 
 @dataclass
 class FmhaFwdPipeline:
-    tag : str
+    tag: str
 
-    F_vlayout   : str  # row/col
-    F_spad      : str  # true/false
-    F_skpad     : str  #
-    F_dpad      : str  #
-    F_dvpad     : str  #
-    F_logits    : str  # t/f
-    F_bias      : str  # true/false
-    F_lse       : str  #
-    F_pagedkv   : str  #
-    F_squant    : str  #
-    F_mask      : str  # value from MASK_MAP
-    F_skip      : str  # true/false
+    F_vlayout: str  # row/col
+    F_spad: str  # true/false
+    F_skpad: str  #
+    F_dpad: str  #
+    F_dvpad: str  #
+    F_logits: str  # t/f
+    F_bias: str  # true/false
+    F_lse: str  #
+    F_pagedkv: str  #
+    F_squant: str  #
+    F_mask: str  # value from MASK_MAP
+    F_skip: str  # true/false
 
     @property
     def name(self) -> str:
         def pad_name() -> str:
-            n = ''
-            if self.F_spad == 't': n += 's'
-            if self.F_skpad == 't' : n += 'sk'
-            if self.F_dpad == 't' : n += 'd'
-            if self.F_dvpad == 't' : n += 'dv'
-            if n != '' : n = 'p' + n
+            n = ""
+            if self.F_spad == "t":
+                n += "s"
+            if self.F_skpad == "t":
+                n += "sk"
+            if self.F_dpad == "t":
+                n += "d"
+            if self.F_dvpad == "t":
+                n += "dv"
+            if n != "":
+                n = "p" + n
             return n
+
         pn = pad_name()
-        n = f'{self.tag}_v{self.F_vlayout[0]}'
-        if pn != '' : n += f'_{pn}'
-        else: n += '_npad'
-
-        if self.F_logits == 't' : n += '_logits'
-        else: n += '_nlogits'
-
-        if self.F_bias != 'no' : n += f'_{self.F_bias}'
-        else: n += '_nbias'
-
-        if self.F_mask[0:2] == 's_':
-            if self.F_mask == 's_mask': n += f'_mask'
-            else: n += '_nmask'
+        n = f"{self.tag}_v{self.F_vlayout[0]}"
+        if pn != "":
+            n += f"_{pn}"
         else:
-            if self.F_mask != 'no' : n += f'_m{self.F_mask[0]}'
-            else: n += '_nmask'
+            n += "_npad"
 
-        if self.F_lse == 't' : n += '_lse'
-        else: n += '_nlse'
+        if self.F_logits == "t":
+            n += "_logits"
+        else:
+            n += "_nlogits"
 
-        if self.F_skip == 't' : n += '_skip'
-        else: n += '_nskip'
+        if self.F_bias != "no":
+            n += f"_{self.F_bias}"
+        else:
+            n += "_nbias"
 
-        if self.F_squant == 't' : n += '_squant'
-        else: n += '_nsquant'
+        if self.F_mask[0:2] == "s_":
+            if self.F_mask == "s_mask":
+                n += "_mask"
+            else:
+                n += "_nmask"
+        else:
+            if self.F_mask != "no":
+                n += f"_m{self.F_mask[0]}"
+            else:
+                n += "_nmask"
 
-        if self.F_pagedkv == 't' : n += '_pagedkv'
-        else: n += '_npagedkv'
+        if self.F_lse == "t":
+            n += "_lse"
+        else:
+            n += "_nlse"
+
+        if self.F_skip == "t":
+            n += "_skip"
+        else:
+            n += "_nskip"
+
+        if self.F_squant == "t":
+            n += "_squant"
+        else:
+            n += "_nsquant"
+
+        if self.F_pagedkv == "t":
+            n += "_pagedkv"
+        else:
+            n += "_npagedkv"
 
         return n
 
+
 class FmhaFwdApiPool:
     def __init__(self, mask_impl):
         self.pool = dict()
         self.mask_impl = mask_impl
 
-    def register_traits(self, trait : FmhaFwdApiTrait) -> None:
+    def register_traits(self, trait: FmhaFwdApiTrait) -> None:
         # TODO: do we need to check duplication?
         if trait.dtype not in self.pool.keys():
             self.pool[trait.dtype] = dict()
@@ -292,117 +341,152 @@ class FmhaFwdApiPool:
 
     @property
     def api(self) -> str:
-        per_dtypes=str()
+        per_dtypes = str()
         for i, dtype in enumerate(self.pool.keys()):
-            per_hdim_case=str()
+            per_hdim_case = str()
             for j, hdim in enumerate(self.pool[dtype].keys()):
-                traits=self.pool[dtype][hdim]
-                inners=str()
+                traits = self.pool[dtype][hdim]
+                inners = str()
                 for k, trait in enumerate(traits):
-                    if_k = 'if' if k == 0 else 'else if'
-                    inners = inners + FMHA_FWD_API_INNER_DISPATCH.format(F_if=if_k, F_mode=MODE_MAP[trait.mode], F_vlayout=LAYOUT_MAP[trait.vlayout],
-                                   F_pipeline_enum=PIPELINE_ENUM_MAP[trait.pipeline_tag], F_logits=BOOL_MAP[trait.logits], F_mask=get_mask_map(self.mask_impl)[trait.mask],
-                                   F_mask_check=get_mask_check_map(self.mask_impl)[trait.mask], F_bias_check=BIAS_CHECK_MAP[trait.bias], F_bias=BIAS_MAP[trait.bias],
-                                   F_lse=BOOL_MAP[trait.lse], F_pagedkv=BOOL_MAP[trait.pagedkv], F_skip=BOOL_MAP[trait.skip],
-                                   F_squant=BOOL_MAP[trait.squant], F_scheck=trait.scheck, F_skcheck=trait.skcheck, F_dcheck=trait.dcheck, F_dvcheck=trait.dvcheck,
-                                   F_spad=BOOL_MAP[trait.spad], F_skpad=BOOL_MAP[trait.skpad], F_dpad=BOOL_MAP[trait.dpad], F_dvpad=BOOL_MAP[trait.dvpad],
-                                   F_bm0=trait.bm0, F_bn0=trait.bn0, F_bk0=trait.bk0, F_bn1=trait.bn1, F_bk1=trait.bk1, F_bk0max=trait.bk0max,
-                                   F_hdim=hdim, F_dtype=FWD_DTYPE_MAP[dtype])
-                if_j = 'if' if j == 0 else 'else if'
-                per_hdim_case = per_hdim_case + FMHA_FWD_API_PER_HDIM_CASE.format(F_if=if_j, F_hdim=hdim, F_hdim_v=trait.bn1, F_inner_dispatch=inners)
-            if_i = 'if' if i == 0 else 'else if'
-            per_dtypes = per_dtypes + FMHA_FWD_API_PER_DTYPE.format(F_if=if_i, F_dtype=dtype, F_hdim_case=per_hdim_case)
+                    if_k = "if" if k == 0 else "else if"
+                    inners = inners + FMHA_FWD_API_INNER_DISPATCH.format(
+                        F_if=if_k,
+                        F_mode=MODE_MAP[trait.mode],
+                        F_vlayout=LAYOUT_MAP[trait.vlayout],
+                        F_pipeline_enum=PIPELINE_ENUM_MAP[trait.pipeline_tag],
+                        F_logits=BOOL_MAP[trait.logits],
+                        F_mask=get_mask_map(self.mask_impl)[trait.mask],
+                        F_mask_check=get_mask_check_map(self.mask_impl)[trait.mask],
+                        F_bias_check=BIAS_CHECK_MAP[trait.bias],
+                        F_bias=BIAS_MAP[trait.bias],
+                        F_lse=BOOL_MAP[trait.lse],
+                        F_pagedkv=BOOL_MAP[trait.pagedkv],
+                        F_skip=BOOL_MAP[trait.skip],
+                        F_squant=BOOL_MAP[trait.squant],
+                        F_scheck=trait.scheck,
+                        F_skcheck=trait.skcheck,
+                        F_dcheck=trait.dcheck,
+                        F_dvcheck=trait.dvcheck,
+                        F_spad=BOOL_MAP[trait.spad],
+                        F_skpad=BOOL_MAP[trait.skpad],
+                        F_dpad=BOOL_MAP[trait.dpad],
+                        F_dvpad=BOOL_MAP[trait.dvpad],
+                        F_bm0=trait.bm0,
+                        F_bn0=trait.bn0,
+                        F_bk0=trait.bk0,
+                        F_bn1=trait.bn1,
+                        F_bk1=trait.bk1,
+                        F_bk0max=trait.bk0max,
+                        F_hdim=hdim,
+                        F_dtype=FWD_DTYPE_MAP[dtype],
+                    )
+                if_j = "if" if j == 0 else "else if"
+                per_hdim_case = per_hdim_case + FMHA_FWD_API_PER_HDIM_CASE.format(
+                    F_if=if_j, F_hdim=hdim, F_hdim_v=trait.bn1, F_inner_dispatch=inners
+                )
+            if_i = "if" if i == 0 else "else if"
+            per_dtypes = per_dtypes + FMHA_FWD_API_PER_DTYPE.format(
+                F_if=if_i, F_dtype=dtype, F_hdim_case=per_hdim_case
+            )
         if not per_dtypes:
             # empty string we add some ignore to suppress warning in api
-            per_dtypes += '    (void)t ; (void)s ; (void)a;'
-        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_API.format(F_dispatch = per_dtypes)
+            per_dtypes += "    (void)t ; (void)s ; (void)a;"
+        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_API.format(F_dispatch=per_dtypes)
+
 
 @dataclass
 class FmhaFwdTileSize:
-    F_bm0       : int  # tile size along q seqlen (block size)
-    F_bn0       : int  # tile size along k seqlen
-    F_bk0       : int  # tile size along qk gemm unroll
-    F_bn1       : int  # tile size along v head_dim
-    F_bk1       : int  # tile size along kv gemm unroll
-    F_bk0max    : int  # total length of K0, used for pipeline that need load Q at once (or repeately load Q as a whole tile)
-    F_rm0       : int  # number of warps for gemm0 along q seqlen
-    F_rn0       : int  # number of warps for gemm0 along k seqlen
-    F_rk0       : int  # number of warps for gemm0 along head dim q (not used)
-    F_rm1       : int  # number of warps for gemm1 along q seqlen
-    F_rn1       : int  # number of warps for gemm1 along head dim v
-    F_rk1       : int  # number of warps for gemm1 along k seqlen (not used)
-    F_wm0       : int  # gemm0 warp size along m
-    F_wn0       : int  # gemm0 warp size along n
-    F_wk0       : int  # gemm0 warp size along k
-    F_wm1       : int  # gemm1 warp size along m
-    F_wn1       : int  # gemm1 warp size along n
-    F_wk1       : int  # gemm1 warp size along k
-    F_occupancy : int  # occupancy, -1 will let pipeline decide the occupancy, other value will overwrite occupancy
+    F_bm0: int  # tile size along q seqlen (block size)
+    F_bn0: int  # tile size along k seqlen
+    F_bk0: int  # tile size along qk gemm unroll
+    F_bn1: int  # tile size along v head_dim
+    F_bk1: int  # tile size along kv gemm unroll
+    F_bk0max: int  # total length of K0, used for pipeline that need load Q at once (or repeately load Q as a whole tile)
+    F_rm0: int  # number of warps for gemm0 along q seqlen
+    F_rn0: int  # number of warps for gemm0 along k seqlen
+    F_rk0: int  # number of warps for gemm0 along head dim q (not used)
+    F_rm1: int  # number of warps for gemm1 along q seqlen
+    F_rn1: int  # number of warps for gemm1 along head dim v
+    F_rk1: int  # number of warps for gemm1 along k seqlen (not used)
+    F_wm0: int  # gemm0 warp size along m
+    F_wn0: int  # gemm0 warp size along n
+    F_wk0: int  # gemm0 warp size along k
+    F_wm1: int  # gemm1 warp size along m
+    F_wn1: int  # gemm1 warp size along n
+    F_wk1: int  # gemm1 warp size along k
+    F_occupancy: int  # occupancy, -1 will let pipeline decide the occupancy, other value will overwrite occupancy
+
     @property
     def name(self) -> str:
-        return f"b{self.F_bm0}x{self.F_bn0}x{self.F_bk0}x{self.F_bn1}x{self.F_bk1}x{self.F_bk0max}" +\
-        f"_r{self.F_rm0}x{self.F_rn0}x{self.F_rk0}_r{self.F_rm1}x{self.F_rn1}x{self.F_rk1}" +\
-        f"_w{self.F_wm0}x{self.F_wn0}x{self.F_wk0}_w{self.F_wm1}x{self.F_wn1}x{self.F_wk1}" +\
-        ("" if self.F_occupancy == -1 else f"_o{self.F_occupancy}")
+        return (
+            f"b{self.F_bm0}x{self.F_bn0}x{self.F_bk0}x{self.F_bn1}x{self.F_bk1}x{self.F_bk0max}"
+            + f"_r{self.F_rm0}x{self.F_rn0}x{self.F_rk0}_r{self.F_rm1}x{self.F_rn1}x{self.F_rk1}"
+            + f"_w{self.F_wm0}x{self.F_wn0}x{self.F_wk0}_w{self.F_wm1}x{self.F_wn1}x{self.F_wk1}"
+            + ("" if self.F_occupancy == -1 else f"_o{self.F_occupancy}")
+        )
+
 
 @dataclass
 class FmhaFwdKernel:
-    F_idx           : int  # this is not a tunable, but a counter to differentiate symbol
-    F_hdim          : int  # hdim
-    F_dtype         : str  # data type
-    F_mode          : str  # value from MODE_MAP
-    F_tile          : FmhaFwdTileSize
-    F_pipeline      : FmhaFwdPipeline
-    mask_impl       : str
+    F_idx: int  # this is not a tunable, but a counter to differentiate symbol
+    F_hdim: int  # hdim
+    F_dtype: str  # data type
+    F_mode: str  # value from MODE_MAP
+    F_tile: FmhaFwdTileSize
+    F_pipeline: FmhaFwdPipeline
+    mask_impl: str
 
     @property
     def template(self) -> str:
-        kernel_body = str()
-        return FMHA_FWD_KERNEL_HEADER + \
-            FMHA_FWD_KERNEL_BODY.format(
-                F_idx           = self.F_idx,
-                F_hdim          = self.F_hdim,
-                F_dtype         = FWD_DTYPE_MAP[self.F_dtype],
-                F_bm0           = self.F_tile.F_bm0,
-                F_bn0           = self.F_tile.F_bn0,
-                F_bk0           = self.F_tile.F_bk0,
-                F_bn1           = self.F_tile.F_bn1,
-                F_bk1           = self.F_tile.F_bk1,
-                F_bk0max        = self.F_tile.F_bk0max,
-                F_rm0           = self.F_tile.F_rm0,
-                F_rn0           = self.F_tile.F_rn0,
-                F_rk0           = self.F_tile.F_rk0,
-                F_rm1           = self.F_tile.F_rm1,
-                F_rn1           = self.F_tile.F_rn1,
-                F_rk1           = self.F_tile.F_rk1,
-                F_wm0           = self.F_tile.F_wm0,
-                F_wn0           = self.F_tile.F_wn0,
-                F_wk0           = self.F_tile.F_wk0,
-                F_wm1           = self.F_tile.F_wm1,
-                F_wn1           = self.F_tile.F_wn1,
-                F_wk1           = self.F_tile.F_wk1,
-                F_vlayout       = LAYOUT_MAP[self.F_pipeline.F_vlayout],
-                F_spad          = BOOL_MAP[self.F_pipeline.F_spad],
-                F_skpad         = BOOL_MAP[self.F_pipeline.F_skpad],
-                F_dpad          = BOOL_MAP[self.F_pipeline.F_dpad],
-                F_dvpad         = BOOL_MAP[self.F_pipeline.F_dvpad],
-                F_logits        = BOOL_MAP[self.F_pipeline.F_logits],
-                F_bias          = BIAS_MAP[self.F_pipeline.F_bias],
-                F_lse           = BOOL_MAP[self.F_pipeline.F_lse],
-                F_pagedkv       = BOOL_MAP[self.F_pipeline.F_pagedkv],
-                F_squant        = BOOL_MAP[self.F_pipeline.F_squant],
-                F_skip          = BOOL_MAP[self.F_pipeline.F_skip],
-                F_occupancy     = self.F_tile.F_occupancy,
-                F_pipeline_enum = PIPELINE_ENUM_MAP[self.F_pipeline.tag],
-                F_mask          = get_mask_map(self.mask_impl)[self.F_pipeline.F_mask],
-                F_mode          = MODE_MAP[self.F_mode],
-                F_pipeline      = FMHA_FWD_PAGEDKV_PIPELINE_MAP[self.F_pipeline.tag])
+        return FMHA_FWD_KERNEL_HEADER + FMHA_FWD_KERNEL_BODY.format(
+            F_idx=self.F_idx,
+            F_hdim=self.F_hdim,
+            F_dtype=FWD_DTYPE_MAP[self.F_dtype],
+            F_bm0=self.F_tile.F_bm0,
+            F_bn0=self.F_tile.F_bn0,
+            F_bk0=self.F_tile.F_bk0,
+            F_bn1=self.F_tile.F_bn1,
+            F_bk1=self.F_tile.F_bk1,
+            F_bk0max=self.F_tile.F_bk0max,
+            F_rm0=self.F_tile.F_rm0,
+            F_rn0=self.F_tile.F_rn0,
+            F_rk0=self.F_tile.F_rk0,
+            F_rm1=self.F_tile.F_rm1,
+            F_rn1=self.F_tile.F_rn1,
+            F_rk1=self.F_tile.F_rk1,
+            F_wm0=self.F_tile.F_wm0,
+            F_wn0=self.F_tile.F_wn0,
+            F_wk0=self.F_tile.F_wk0,
+            F_wm1=self.F_tile.F_wm1,
+            F_wn1=self.F_tile.F_wn1,
+            F_wk1=self.F_tile.F_wk1,
+            F_vlayout=LAYOUT_MAP[self.F_pipeline.F_vlayout],
+            F_spad=BOOL_MAP[self.F_pipeline.F_spad],
+            F_skpad=BOOL_MAP[self.F_pipeline.F_skpad],
+            F_dpad=BOOL_MAP[self.F_pipeline.F_dpad],
+            F_dvpad=BOOL_MAP[self.F_pipeline.F_dvpad],
+            F_logits=BOOL_MAP[self.F_pipeline.F_logits],
+            F_bias=BIAS_MAP[self.F_pipeline.F_bias],
+            F_lse=BOOL_MAP[self.F_pipeline.F_lse],
+            F_pagedkv=BOOL_MAP[self.F_pipeline.F_pagedkv],
+            F_squant=BOOL_MAP[self.F_pipeline.F_squant],
+            F_skip=BOOL_MAP[self.F_pipeline.F_skip],
+            F_occupancy=self.F_tile.F_occupancy,
+            F_pipeline_enum=PIPELINE_ENUM_MAP[self.F_pipeline.tag],
+            F_mask=get_mask_map(self.mask_impl)[self.F_pipeline.F_mask],
+            F_mode=MODE_MAP[self.F_mode],
+            F_pipeline=FMHA_FWD_PAGEDKV_PIPELINE_MAP[self.F_pipeline.tag],
+        )
 
     @property
     def name(self) -> str:
         # TODO: we don't encode idx here
-        return f"fmha_fwd_pagedkv_d{self.F_hdim}_{self.F_dtype}_{self.F_mode}_" + \
-                self.F_tile.name + '_' + self.F_pipeline.name
+        return (
+            f"fmha_fwd_pagedkv_d{self.F_hdim}_{self.F_dtype}_{self.F_mode}_"
+            + self.F_tile.name
+            + "_"
+            + self.F_pipeline.name
+        )
 
     @property
     def filename(self) -> str:
@@ -410,51 +494,64 @@ class FmhaFwdKernel:
 
     def api_trait(self) -> FmhaFwdApiTrait:
         return FmhaFwdApiTrait(
-                pipeline_tag=self.F_pipeline.tag,
-                hdim=str(self.F_hdim),
-                dtype=self.F_dtype,
-                mode=self.F_mode,
-                bm0=self.F_tile.F_bm0,
-                bn0=self.F_tile.F_bn0,
-                bk0=self.F_tile.F_bk0,
-                bn1=self.F_tile.F_bn1,
-                bk1=self.F_tile.F_bk1,
-                bk0max=self.F_tile.F_bk0max,
-                vlayout=self.F_pipeline.F_vlayout,
-                mask=self.F_pipeline.F_mask,
-                logits=self.F_pipeline.F_logits,
-                bias=self.F_pipeline.F_bias,
-                lse=self.F_pipeline.F_lse,
-                pagedkv=self.F_pipeline.F_pagedkv,
-                squant=self.F_pipeline.F_squant,
-                spad=self.F_pipeline.F_spad,
-                skpad=self.F_pipeline.F_skpad,
-                dpad=self.F_pipeline.F_dpad,
-                dvpad=self.F_pipeline.F_dvpad,
-                skip=self.F_pipeline.F_skip)
+            pipeline_tag=self.F_pipeline.tag,
+            hdim=str(self.F_hdim),
+            dtype=self.F_dtype,
+            mode=self.F_mode,
+            bm0=self.F_tile.F_bm0,
+            bn0=self.F_tile.F_bn0,
+            bk0=self.F_tile.F_bk0,
+            bn1=self.F_tile.F_bn1,
+            bk1=self.F_tile.F_bk1,
+            bk0max=self.F_tile.F_bk0max,
+            vlayout=self.F_pipeline.F_vlayout,
+            mask=self.F_pipeline.F_mask,
+            logits=self.F_pipeline.F_logits,
+            bias=self.F_pipeline.F_bias,
+            lse=self.F_pipeline.F_lse,
+            pagedkv=self.F_pipeline.F_pagedkv,
+            squant=self.F_pipeline.F_squant,
+            spad=self.F_pipeline.F_spad,
+            skpad=self.F_pipeline.F_skpad,
+            dpad=self.F_pipeline.F_dpad,
+            dvpad=self.F_pipeline.F_dvpad,
+            skip=self.F_pipeline.F_skip,
+        )
+
 
 # TODO: design a more practical way to do it
 # this is current supported tile size per hdim
-def get_fmha_fwd_tile_dict_from_dtype(dtype : str) -> Optional[dict]:
-    if dtype == 'fp16' or dtype == 'bf16':
+def get_fmha_fwd_tile_dict_from_dtype(dtype: str) -> Optional[dict]:
+    if dtype == "fp16" or dtype == "bf16":
         return {
             # '32'  : FmhaFwdTileSize(128, 64,  16, 32,  32,  32,   2, 1, 1,  2, 1, 1,  32, 32, 16,  32, 32, 16,  -1),
             # '64'  : FmhaFwdTileSize(128, 64,  32, 64,  32,  64,   4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,  -1),
-        ### '96'  : FmhaFwdTileSize(128, 128, 32, 128, 32,  96,   4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,  -1),
-            '128' : FmhaFwdTileSize(128, 128, 32, 128, 32,  128,  4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,  -1),
+            ### '96'  : FmhaFwdTileSize(128, 128, 32, 128, 32,  96,   4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,  -1),
+            "128": FmhaFwdTileSize(
+                128, 128, 32, 128, 32, 128, 4, 1, 1, 4, 1, 1, 32, 32, 16, 32, 32, 16, -1
+            ),
             # '192' : FmhaFwdTileSize(128, 128, 32, 128, 32,  192,  4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,  -1),
             # '256' : FmhaFwdTileSize(128, 128, 32, 256, 32,  256,  4, 1, 1,  4, 1, 1,  32, 32, 16,  32, 32, 16,  -1),
         }
-    elif dtype == 'fp8' or dtype == 'bf8':
+    elif dtype == "fp8" or dtype == "bf8":
         return {
-            '64'  : FmhaFwdTileSize(128, 64,  32, 64,  32,  64,   2, 1, 1,  2, 1, 1,  32, 32, 32,  32, 32, 32,  -1),
-            '128' : FmhaFwdTileSize(128, 128, 32, 128, 32,  128,  4, 1, 1,  4, 1, 1,  32, 32, 32,  32, 32, 32,  -1),
-            '256' : FmhaFwdTileSize(128, 128, 32, 256, 32,  256,  4, 1, 1,  4, 1, 1,  32, 32, 32,  32, 32, 32,  -1),
+            "64": FmhaFwdTileSize(
+                128, 64, 32, 64, 32, 64, 2, 1, 1, 2, 1, 1, 32, 32, 32, 32, 32, 32, -1
+            ),
+            "128": FmhaFwdTileSize(
+                128, 128, 32, 128, 32, 128, 4, 1, 1, 4, 1, 1, 32, 32, 32, 32, 32, 32, -1
+            ),
+            "256": FmhaFwdTileSize(
+                128, 128, 32, 256, 32, 256, 4, 1, 1, 4, 1, 1, 32, 32, 32, 32, 32, 32, -1
+            ),
         }
     else:
         return None
 
-def get_fwd_blobs(kernel_filter : Optional[str], receipt, optdim_list, mask_impl) -> Tuple[FmhaFwdApiPool, List[FmhaFwdKernel]]:
+
+def get_fwd_blobs(
+    kernel_filter: Optional[str], receipt, optdim_list, mask_impl
+) -> Tuple[FmhaFwdApiPool, List[FmhaFwdKernel]]:
     # TODO: we don't support tuning yet, so pick up one value for vlayout/pipeline/pad
     #       support this in future
     def get_pipelines(dtype, hdim) -> List[FmhaFwdPipeline]:
@@ -462,18 +559,90 @@ def get_fwd_blobs(kernel_filter : Optional[str], receipt, optdim_list, mask_impl
         # TODO: the order of List matters! the later in this list will be also be checked later
         # TODO: currently for qr_pagedkv pipeline, let 't' padding to appear later!!
         # TODO: how to design this more generic?
-        squant = 't' if dtype == 'fp8' else 'f'
+        squant = "t" if dtype == "fp8" else "f"
         pipelines = []
-        if dtype in ['fp16', 'bf16']:
-            for logits, mask, bias,  pagedkv, skip in itertools.product(["t", "f"], get_mask_map(mask_impl).keys(), BIAS_MAP.keys(),  ["t"], ["f"]):
-                pipelines.append(FmhaFwdPipeline('qr_pagedkv', 'row', 't', 'f', 'f', 'f', logits, bias, 'f',  pagedkv, squant, mask, skip))
-                pipelines.append(FmhaFwdPipeline('qr_pagedkv', 'row', 't', 't', 'f', 'f', logits, bias, 'f',  pagedkv, squant, mask, skip))
-        elif dtype in ['fp8', 'bf8']:
+        if dtype in ["fp16", "bf16"]:
+            for logits, mask, bias, pagedkv, skip in itertools.product(
+                ["t", "f"],
+                get_mask_map(mask_impl).keys(),
+                BIAS_MAP.keys(),
+                ["t"],
+                ["f"],
+            ):
+                pipelines.append(
+                    FmhaFwdPipeline(
+                        "qr_pagedkv",
+                        "row",
+                        "t",
+                        "f",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        "f",
+                        pagedkv,
+                        squant,
+                        mask,
+                        skip,
+                    )
+                )
+                pipelines.append(
+                    FmhaFwdPipeline(
+                        "qr_pagedkv",
+                        "row",
+                        "t",
+                        "t",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        "f",
+                        pagedkv,
+                        squant,
+                        mask,
+                        skip,
+                    )
+                )
+        elif dtype in ["fp8", "bf8"]:
             # no need lse/dropout kernels
-            for logits, mask, bias in itertools.product(["t", "f"], get_mask_map(mask_impl).keys(), BIAS_MAP.keys()):
-                pipelines.append(FmhaFwdPipeline('qr_pagedkv', 'row', 'f', 'f', 'f', 'f', logits, bias, 'f', 't', squant, mask, 'f'))
-                pipelines.append(FmhaFwdPipeline('qr_pagedkv', 'row', 't', 't', 'f', 'f', logits, bias, 'f', 't', squant, mask, 'f'))
-        elif dtype in ['fp8fp16', 'fp8bf16']:
+            for logits, mask, bias in itertools.product(
+                ["t", "f"], get_mask_map(mask_impl).keys(), BIAS_MAP.keys()
+            ):
+                pipelines.append(
+                    FmhaFwdPipeline(
+                        "qr_pagedkv",
+                        "row",
+                        "f",
+                        "f",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        "f",
+                        "t",
+                        squant,
+                        mask,
+                        "f",
+                    )
+                )
+                pipelines.append(
+                    FmhaFwdPipeline(
+                        "qr_pagedkv",
+                        "row",
+                        "t",
+                        "t",
+                        "f",
+                        "f",
+                        logits,
+                        bias,
+                        "f",
+                        "t",
+                        squant,
+                        mask,
+                        "f",
+                    )
+                )
+        elif dtype in ["fp8fp16", "fp8bf16"]:
             # TODO
             None
         else:
@@ -485,9 +654,9 @@ def get_fwd_blobs(kernel_filter : Optional[str], receipt, optdim_list, mask_impl
 
     for dtype in FWD_DTYPE_MAP.keys():
         d = get_fmha_fwd_tile_dict_from_dtype(dtype)
-        if d == None:
+        if d is None:
             continue
-        #for hdim_str, mode, mask, bias, lse in itertools.product(d.keys(), MODE_MAP.keys(), MASK_MAP.keys(), ["t", "f"], ["t", "f"]):
+        # for hdim_str, mode, mask, bias, lse in itertools.product(d.keys(), MODE_MAP.keys(), MASK_MAP.keys(), ["t", "f"], ["t", "f"]):
         for hdim_str, mode in itertools.product(d.keys(), MODE_MAP.keys()):
             tile = d[hdim_str]
             hdim = int(hdim_str)
@@ -495,24 +664,29 @@ def get_fwd_blobs(kernel_filter : Optional[str], receipt, optdim_list, mask_impl
                 # if pipeline.F_pagedkv == 'f':
                 #     continue
                 if mode == "group":
-                    if pipeline.F_spad != 't' or pipeline.F_skpad != 't':
+                    if pipeline.F_spad != "t" or pipeline.F_skpad != "t":
                         # in group mode, spad/skpad must be true, since we can't predict if seqlen of current batch need pad or not
                         continue
                 if hdim == 192 and tile.F_bn1 == 128:
                     # NOTE: this is used to speedup deepseek prefill case, we don't gen training
-                    if pipeline.F_bias != 'no' or pipeline.F_lse == 't' :
+                    if pipeline.F_bias != "no" or pipeline.F_lse == "t":
                         continue
                 # logits_soft_cap is only allowed if no bias
-                if not ((pipeline.F_logits == 't' and pipeline.F_bias == 'no') or pipeline.F_logits == 'f'):
+                if not (
+                    (pipeline.F_logits == "t" and pipeline.F_bias == "no")
+                    or pipeline.F_logits == "f"
+                ):
                     continue
-                k = FmhaFwdKernel(F_idx=0,
-                                  F_hdim=hdim,
-                                  F_dtype=dtype,
-                                  F_mode=mode,
-                                  F_tile=tile,
-                                  F_pipeline=pipeline,
-                                  mask_impl=mask_impl)
-                if kernel_filter != '':
+                k = FmhaFwdKernel(
+                    F_idx=0,
+                    F_hdim=hdim,
+                    F_dtype=dtype,
+                    F_mode=mode,
+                    F_tile=tile,
+                    F_pipeline=pipeline,
+                    mask_impl=mask_impl,
+                )
+                if kernel_filter != "":
                     if not fnmatch.fnmatch(k.name, kernel_filter):
                         continue
                 if optdim_list != [-1]:
@@ -520,49 +694,49 @@ def get_fwd_blobs(kernel_filter : Optional[str], receipt, optdim_list, mask_impl
                         continue
                 # 2 - Flash attention integration
                 if receipt in (2, 3):
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_bias in ['no', 'alibi']
-                    cond &= pipeline.F_squant == 'f'
-                    cond &= pipeline.F_skip == 'f'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_bias in ["no", "alibi"]
+                    cond &= pipeline.F_squant == "f"
+                    cond &= pipeline.F_skip == "f"
                     if not cond:
                         continue
                 # PyTorch integration
                 elif receipt == 4:
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_bias in ['no', 'bias']
-                    cond &= pipeline.F_squant == 'f'
-                    cond &= pipeline.F_skip == 'f'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_bias in ["no", "bias"]
+                    cond &= pipeline.F_squant == "f"
+                    cond &= pipeline.F_skip == "f"
                     if not cond:
                         continue
                 # Aiter(mha_fwd) integration
                 elif receipt == 100:
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= mode == 'batch'
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_squant == 'f'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= mode == "batch"
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_squant == "f"
                     if not cond:
                         continue
                 # Aiter(mha_varlen_fwd) integration
                 elif receipt == 200:
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= mode == 'group'
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_squant == 'f'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= mode == "group"
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_squant == "f"
                     if not cond:
                         continue
                 # aiter::mha_fwd C++ api integration
                 elif receipt == 600:
-                    cond = dtype in ['fp16', 'bf16']
-                    cond &= pipeline.F_vlayout == 'row'
-                    cond &= pipeline.F_squant == 'f'
+                    cond = dtype in ["fp16", "bf16"]
+                    cond &= pipeline.F_vlayout == "row"
+                    cond &= pipeline.F_squant == "f"
                     if not cond:
                         continue
 
                 # fp32 only
                 if receipt == 800 or receipt == 801:
-                    cond = dtype == 'fp32'
+                    cond = dtype == "fp32"
                     if not cond:
                         continue
 
@@ -571,20 +745,28 @@ def get_fwd_blobs(kernel_filter : Optional[str], receipt, optdim_list, mask_impl
 
     return (api_pool, gen)
 
+
 def write_single_fwd_kernel(kernel: FmhaFwdKernel, autogen_dir: Path) -> None:
     (autogen_dir / kernel.filename).write_text(kernel.template)
 
-def write_fwd_api(api_pool : FmhaFwdApiPool, autogen_dir: Path) -> None:
+
+def write_fwd_api(api_pool: FmhaFwdApiPool, autogen_dir: Path) -> None:
     (autogen_dir / FMHA_FWD_API_FILENAME).write_text(api_pool.api)
 
-def write_blobs(output_dir : Path, kernel_filter : str, receipt, optdim_list, mask_impl) -> None:
+
+def write_blobs(
+    output_dir: Path, kernel_filter: str, receipt, optdim_list, mask_impl
+) -> None:
     api_pool, kernels = get_fwd_blobs(kernel_filter, receipt, optdim_list, mask_impl)
     for kernel in kernels:
         write_single_fwd_kernel(kernel, output_dir)
     write_fwd_api(api_pool, output_dir)
 
-def list_blobs(file_path : Path, kernel_filter : str, receipt, optdim_list, mask_impl) -> None:
-    with file_path.open('a') as f:
+
+def list_blobs(
+    file_path: Path, kernel_filter: str, receipt, optdim_list, mask_impl
+) -> None:
+    with file_path.open("a") as f:
         _, kernels = get_fwd_blobs(kernel_filter, receipt, optdim_list, mask_impl)
         for kernel in kernels:
             f.write(str(file_path.parent / GEN_DIR / kernel.filename) + "\n")
diff --git a/example/ck_tile/01_fmha/generate.py b/example/ck_tile/01_fmha/generate.py
index 0317330511..fce37061f6 100644
--- a/example/ck_tile/01_fmha/generate.py
+++ b/example/ck_tile/01_fmha/generate.py
@@ -6,30 +6,45 @@ import argparse
 from enum import IntEnum
 from pathlib import Path
 import pkgutil
-import sys
 from typing import List, Optional
 
 import codegen.ops
-from codegen.cmake_config import *
+from codegen.cmake_config import GEN_DIR
 
 
 class HandlerId(IntEnum):
     LIST_BLOBS = 0
     WRITE_BLOBS = 1
 
+
 # inspect all modules under 'codegen.ops' and register API handlers
 ops = []
 for importer, module_name, _ in pkgutil.iter_modules(codegen.ops.__path__):
-    full_module_name = '%s.%s' % (codegen.ops.__name__, module_name)
+    full_module_name = "%s.%s" % (codegen.ops.__name__, module_name)
     ops.append(importer.find_spec(module_name).loader.load_module(module_name))
-unwanted_prefix = 'fmha_'
+unwanted_prefix = "fmha_"
 handlers = dict(
-    [(op.__name__[len(unwanted_prefix):] if op.__name__.startswith(unwanted_prefix) else op.__name__,
-        (op.list_blobs, op.write_blobs)) for op in ops]
+    [
+        (
+            op.__name__[len(unwanted_prefix) :]
+            if op.__name__.startswith(unwanted_prefix)
+            else op.__name__,
+            (op.list_blobs, op.write_blobs),
+        )
+        for op in ops
+    ]
 )
 assert 0 < len(handlers)
 
-def write_blobs(output_dir: Optional[str], api_list : List[str], filters_list : List[str], optdim_list : List[int], receipt, mask_impl) -> None:
+
+def write_blobs(
+    output_dir: Optional[str],
+    api_list: List[str],
+    filters_list: List[str],
+    optdim_list: List[int],
+    receipt,
+    mask_impl,
+) -> None:
     if output_dir is None:
         output_dir = Path(__file__).parent
     else:
@@ -41,8 +56,16 @@ def write_blobs(output_dir: Optional[str], api_list : List[str], filters_list :
         handler = handlers[api][HandlerId.WRITE_BLOBS]
         handler(output_dir, kernel_filter, receipt, optdim_list, mask_impl)
 
+
 # list all the files that will be generated
-def list_blobs(output_file : Optional[str], api_list : List[str], filters_list : List[str], optdim_list : List[int], receipt, mask_impl) -> None:
+def list_blobs(
+    output_file: Optional[str],
+    api_list: List[str],
+    filters_list: List[str],
+    optdim_list: List[int],
+    receipt,
+    mask_impl,
+) -> None:
     assert output_file is not None
     file_path = Path(output_file)
 
@@ -53,6 +76,7 @@ def list_blobs(output_file : Optional[str], api_list : List[str], filters_list :
         handler = handlers[api][HandlerId.LIST_BLOBS]
         handler(file_path, kernel_filter, receipt, optdim_list, mask_impl)
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         prog="generate",
@@ -60,32 +84,29 @@ if __name__ == "__main__":
     )
     parser.add_argument(
         "-d",
-        "--direction", # we keep 'direction' option for backward compatibility
+        "--direction",  # we keep 'direction' option for backward compatibility
         "-a",
         "--api",
-        default='fwd',
+        default="fwd",
         required=False,
-        help="supply API(s) to generate (default: fwd). separated by comma."
+        help="supply API(s) to generate (default: fwd). separated by comma.",
     )
     parser.add_argument(
         "-o",
         "--output_dir",
         required=False,
-        help="write all the blobs into a directory"
+        help="write all the blobs into a directory",
     )
     parser.add_argument(
-        "-l",
-        "--list_blobs",
-        required=False,
-        help="list all the kernels to a file"
+        "-l", "--list_blobs", required=False, help="list all the kernels to a file"
     )
     # TODO: if using filter, must apply same value to output_dir and list_blobs
     parser.add_argument(
         "-f",
         "--filter",
-        default='',
+        default="",
         required=False,
-        help="filter out kernels that need to generate, using fnmatch module"
+        help="filter out kernels that need to generate, using fnmatch module",
     )
 
     parser.add_argument(
@@ -93,7 +114,7 @@ if __name__ == "__main__":
         "--mask",
         default="simplified",
         required=False,
-        help="mask implementation, simplified/generic"
+        help="mask implementation, simplified/generic",
     )
 
     parser.add_argument(
@@ -101,32 +122,46 @@ if __name__ == "__main__":
         "--receipt",
         default=0,
         required=False,
-        help="codegen receipt. 0: generate only 8xhdim coverage\n"  + \
-             "  1: generate more instance to cover all hdim\n"  + \
-             "  2: Only generate instance for Flash attention integration\n"  + \
-             "  4: Only generate instance for PyTorch integration\n" + \
-             "  100-199: Only generate instance for Aiter(mha_fwd) integration\n" + \
-             "  200-299: Only generate instance for Aiter(mha_varlen_fwd) integration\n" + \
-             "  300-399: Only generate instance for Aiter(mha_bwd) integration\n" + \
-             "  400-499: Only generate instance for Aiter(mha_varlen_bwd) integration\n" + \
-             "  600-699: Only generate instance for aiter::mha_fwd && aiter::mha_fwd_splitkv && aiter::mha_bwd C++ api integration"
+        help="codegen receipt. 0: generate only 8xhdim coverage\n"
+        + "  1: generate more instance to cover all hdim\n"
+        + "  2: Only generate instance for Flash attention integration\n"
+        + "  4: Only generate instance for PyTorch integration\n"
+        + "  100-199: Only generate instance for Aiter(mha_fwd) integration\n"
+        + "  200-299: Only generate instance for Aiter(mha_varlen_fwd) integration\n"
+        + "  300-399: Only generate instance for Aiter(mha_bwd) integration\n"
+        + "  400-499: Only generate instance for Aiter(mha_varlen_bwd) integration\n"
+        + "  600-699: Only generate instance for aiter::mha_fwd && aiter::mha_fwd_splitkv && aiter::mha_bwd C++ api integration",
     )
 
     parser.add_argument(
         "--optdim",
-        default='-1',
+        default="-1",
         required=False,
-        help="only optimize the hdim in the list. separated by comma. -1 is the default choice" + \
-              "eg. --optdim=32,64,128,256"
+        help="only optimize the hdim in the list. separated by comma. -1 is the default choice"
+        + "eg. --optdim=32,64,128,256",
     )
 
     args = parser.parse_args()
-    api_list = args.direction.split(',')
-    filter_list = args.filter.split(',')
-    filter_list.extend([''] * (len(api_list) - len(filter_list)))
-    optdim_list = [int(hdim) for hdim in args.optdim.split(',')]
+    api_list = args.direction.split(",")
+    filter_list = args.filter.split(",")
+    filter_list.extend([""] * (len(api_list) - len(filter_list)))
+    optdim_list = [int(hdim) for hdim in args.optdim.split(",")]
 
     if args.list_blobs is not None:
-        list_blobs(args.list_blobs, api_list, filter_list, optdim_list, int(args.receipt), mask_impl=args.mask)
+        list_blobs(
+            args.list_blobs,
+            api_list,
+            filter_list,
+            optdim_list,
+            int(args.receipt),
+            mask_impl=args.mask,
+        )
     else:
-        write_blobs(args.output_dir, api_list, filter_list, optdim_list, int(args.receipt), mask_impl=args.mask)
+        write_blobs(
+            args.output_dir,
+            api_list,
+            filter_list,
+            optdim_list,
+            int(args.receipt),
+            mask_impl=args.mask,
+        )
diff --git a/example/ck_tile/02_layernorm2d/generate.py b/example/ck_tile/02_layernorm2d/generate.py
index 5f589db8d0..c90948db55 100644
--- a/example/ck_tile/02_layernorm2d/generate.py
+++ b/example/ck_tile/02_layernorm2d/generate.py
@@ -6,47 +6,50 @@ import argparse
 from enum import IntEnum
 from pathlib import Path
 import sys
-from typing import List, Optional, Any
+from typing import List, Any
 import functools
 import itertools
 import copy
 from dataclasses import dataclass
 
-def get_if_str(idx, total, lase_else = True):
+
+def get_if_str(idx, total, lase_else=True):
     if idx == 0:
-        return 'if'
+        return "if"
     elif idx < total - 1:
-        return 'else if'
+        return "else if"
     else:
         if lase_else:
-            return 'else'
+            return "else"
         else:
-            return 'else if'
+            return "else if"
 
-XBIAS_ENUM_STR_MAP = [
-    'no',
-    'xbias']      # pre-norm add bias
+
+XBIAS_ENUM_STR_MAP = ["no", "xbias"]  # pre-norm add bias
 
 FUSED_ADD_ENUM_STR_MAP = [
-    'no',
-    'pras',      # pre-norm
-    'pra' ]      # post-norm
+    "no",
+    "pras",  # pre-norm
+    "pra",
+]  # post-norm
 
-FUSED_FUSED_SWEEP_STR_MAP = [
-    'no',
-    'dquant' ]
+FUSED_FUSED_SWEEP_STR_MAP = ["no", "dquant"]
+
+DATA_TYPE_MAP = {
+    "fp32": "float",
+    "fp16": "ck_tile::fp16_t",
+    "bf16": "ck_tile::bf16_t",
+    "int8": "ck_tile::int8_t",
+    "fp8": "ck_tile::fp8_t",
+}
 
-DATA_TYPE_MAP = {'fp32' : 'float',
-                 'fp16' : 'ck_tile::fp16_t',
-                 'bf16' : 'ck_tile::bf16_t',
-                 'int8' : 'ck_tile::int8_t',
-                 'fp8'  : 'ck_tile::fp8_t'}
 
 def BOOL_MAP(b_) -> str:
     if b_:
-        return 'true'
+        return "true"
     else:
-        return 'false'
+        return "false"
+
 
 class layernorm_fwd_codegen:
     API_TRAITS_DEFINE = """
@@ -268,15 +271,15 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t,
 
 """
 
-    API_PER_DTYPE="""    {F_if}(t.prec_i == \"{F_i_type}\" && t.prec_o == \"{F_o_type}\"){{
+    API_PER_DTYPE = """    {F_if}(t.prec_i == \"{F_i_type}\" && t.prec_o == \"{F_o_type}\"){{
 {F_per_n_case}
     }}
 """
-    API_PER_N_CASE="""        {F_if} {F_N_COND} {{
+    API_PER_N_CASE = """        {F_if} {F_N_COND} {{
 {F_inner_dispatch}
         }}
 """
-    API_INNER_CASE="""            {F_if} {F_VEC_COND}
+    API_INNER_CASE = """            {F_if} {F_VEC_COND}
                 r={F_instance_func}(s, a);
 """
 
@@ -313,138 +316,141 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t,
 
     @dataclass
     class k_traits:
-        F_kPadN : bool
-        F_kSaveMeanInvStd : bool
-        F_kTwoPass : bool
-        F_kXbias : Any #: layernorm_fwd_codegen.k_bias_enum
-        F_kFusedAdd : Any #: layernorm_fwd_codegen.k_fuesd_add_enum
-        F_kFusedQuant : Any  #: layernorm_fwd_codegen.k_fused_sweep_enum
+        F_kPadN: bool
+        F_kSaveMeanInvStd: bool
+        F_kTwoPass: bool
+        F_kXbias: Any  #: layernorm_fwd_codegen.k_bias_enum
+        F_kFusedAdd: Any  #: layernorm_fwd_codegen.k_fuesd_add_enum
+        F_kFusedQuant: Any  #: layernorm_fwd_codegen.k_fused_sweep_enum
 
     @dataclass
     class k_shape:
-        F_BlockTile    : List[int]
-        F_WarpPerBlock : List[int]
-        F_WarpTile     : List[int]
-        F_Vector_      : List[int]
+        F_BlockTile: List[int]
+        F_WarpPerBlock: List[int]
+        F_WarpTile: List[int]
+        F_Vector_: List[int]
+
         @property
         def F_BlockSize(self) -> int:
-            return functools.reduce(lambda a, b: a*b, self.F_WarpTile)
+            return functools.reduce(lambda a, b: a * b, self.F_WarpTile)
 
     @dataclass
     class k_problem:
-        F_XDataType       : str
-        F_XBiasDataType   : str
-        F_GammaDataType   : str
-        F_BetaDataType    : str
-        F_ComputeDataType : str
-        F_YDataType       : str
-        F_MeanDataType    : str
-        F_InvStdDataType  : str
-        F_BlockShape      : str
-        F_Traits          : Any #k_traits
+        F_XDataType: str
+        F_XBiasDataType: str
+        F_GammaDataType: str
+        F_BetaDataType: str
+        F_ComputeDataType: str
+        F_YDataType: str
+        F_MeanDataType: str
+        F_InvStdDataType: str
+        F_BlockShape: str
+        F_Traits: Any  # k_traits
 
     @dataclass
     class k_pipeline_one_pass:
-        F_Problem         : Any #k_problem
-    
+        F_Problem: Any  # k_problem
+
     @dataclass
     class k_pipeline_two_pass:
-        F_Problem         : Any #k_problem
+        F_Problem: Any  # k_problem
 
     @dataclass
     class default_2d_epilogue_problem:
-        F_AccDataType : str
-        F_ODataType : str
-        F_kPadM : bool
-        F_kPadN : bool
+        F_AccDataType: str
+        F_ODataType: str
+        F_kPadM: bool
+        F_kPadN: bool
 
     @dataclass
     class default_2d_epilogue:
-        F_problem : Any
+        F_problem: Any
 
     @dataclass
     class k_kernel:
-        F_pipeline : Any
-        F_epilogue : Any
+        F_pipeline: Any
+        F_epilogue: Any
 
     @dataclass
     class h_traits:
-        F_XDataType : str
-        F_YDataType : str
-        F_SmoothScaleDataType : str
-        F_YScaleDataType : str
-        F_Repeat_M : int
-        F_Repeat_N : int
-        F_ThreadPerBlock_M : int
-        F_ThreadPerBlock_N : int
-        F_Vector_N : int
-        F_kPadN : bool
-        F_kSaveMeanInvStd_ : bool
-        F_kFastFDiv_ : bool
-        F_kWelford_ : bool
-        F_kTwoPass_ : bool
-        F_kXbias_ : int
-        F_kFusedAdd : int
-        F_kFusedQuant : int
+        F_XDataType: str
+        F_YDataType: str
+        F_SmoothScaleDataType: str
+        F_YScaleDataType: str
+        F_Repeat_M: int
+        F_Repeat_N: int
+        F_ThreadPerBlock_M: int
+        F_ThreadPerBlock_N: int
+        F_Vector_N: int
+        F_kPadN: bool
+        F_kSaveMeanInvStd_: bool
+        F_kFastFDiv_: bool
+        F_kWelford_: bool
+        F_kTwoPass_: bool
+        F_kXbias_: int
+        F_kFusedAdd: int
+        F_kFusedQuant: int
 
         @property
-        def trait_name(self) ->str:
-            t_ = f'{DATA_TYPE_MAP[self.F_XDataType]}, {DATA_TYPE_MAP[self.F_YDataType]}, {DATA_TYPE_MAP[self.F_SmoothScaleDataType]}, {DATA_TYPE_MAP[self.F_YScaleDataType]}, {self.F_Repeat_M:2}, {self.F_Repeat_N:2}, {self.F_ThreadPerBlock_M:2}, {self.F_ThreadPerBlock_N:4}'
-            t_ += f', {self.F_Vector_N:2}, {BOOL_MAP(self.F_kPadN):5}, {BOOL_MAP(self.F_kSaveMeanInvStd_):5}, {BOOL_MAP(self.F_kFastFDiv_):5}, {BOOL_MAP(self.F_kWelford_):5}'
-            t_ += f', {BOOL_MAP(self.F_kTwoPass_):5}, {self.F_kXbias:4}, {self.F_kFusedAdd:4}, {self.F_kFusedQuant:4}'
+        def trait_name(self) -> str:
+            t_ = f"{DATA_TYPE_MAP[self.F_XDataType]}, {DATA_TYPE_MAP[self.F_YDataType]}, {DATA_TYPE_MAP[self.F_SmoothScaleDataType]}, {DATA_TYPE_MAP[self.F_YScaleDataType]}, {self.F_Repeat_M:2}, {self.F_Repeat_N:2}, {self.F_ThreadPerBlock_M:2}, {self.F_ThreadPerBlock_N:4}"
+            t_ += f", {self.F_Vector_N:2}, {BOOL_MAP(self.F_kPadN):5}, {BOOL_MAP(self.F_kSaveMeanInvStd_):5}, {BOOL_MAP(self.F_kFastFDiv_):5}, {BOOL_MAP(self.F_kWelford_):5}"
+            t_ += f", {BOOL_MAP(self.F_kTwoPass_):5}, {self.F_kXbias:4}, {self.F_kFusedAdd:4}, {self.F_kFusedQuant:4}"
             return t_
 
         # string when calling this kernel
         @property
         def call_name(self) -> str:
-            return f'layernorm2d_fwd_<traits_<{self.trait_name}>>'
+            return f"layernorm2d_fwd_<traits_<{self.trait_name}>>"
 
         # string when define this kernel
         @property
         def def_name(self) -> str:
-            return f'template float layernorm2d_fwd_<traits_<{self.trait_name}>>(const S&, A);'
+            return f"template float layernorm2d_fwd_<traits_<{self.trait_name}>>(const S&, A);"
 
     # this class hold kernel under same source file
     @dataclass
     class h_instance:
-        F_DataTypePair : str
-        F_N : str
-        F_xbias : int
-        F_add : int
-        F_sweep : int
-        instance_list : List[Any] # List[h_traits]
+        F_DataTypePair: str
+        F_N: str
+        F_xbias: int
+        F_add: int
+        F_sweep: int
+        instance_list: List[Any]  # List[h_traits]
 
         @property
         def name(self) -> str:
-            prec_i, prec_o = self.F_DataTypePair.split(',')
-            dtype_str = f'{prec_i}' if prec_i == prec_o else f'{prec_i}_{prec_o}'
-            nnn = f'layernorm2d_fwd_{dtype_str}_n{self.F_N}'
+            prec_i, prec_o = self.F_DataTypePair.split(",")
+            dtype_str = f"{prec_i}" if prec_i == prec_o else f"{prec_i}_{prec_o}"
+            nnn = f"layernorm2d_fwd_{dtype_str}_n{self.F_N}"
             if self.F_xbias != 0:
-                nnn = nnn + '_' + XBIAS_ENUM_STR_MAP[self.F_xbias] 
+                nnn = nnn + "_" + XBIAS_ENUM_STR_MAP[self.F_xbias]
             if self.F_add != 0:
-                nnn = nnn + '_' + FUSED_ADD_ENUM_STR_MAP[self.F_add]
+                nnn = nnn + "_" + FUSED_ADD_ENUM_STR_MAP[self.F_add]
             if self.F_sweep != 0:
-                nnn = nnn + '_' + FUSED_FUSED_SWEEP_STR_MAP[self.F_sweep]
+                nnn = nnn + "_" + FUSED_FUSED_SWEEP_STR_MAP[self.F_sweep]
             return nnn
 
         @property
-        def instance_name(self) ->str:
+        def instance_name(self) -> str:
             return self.name
 
         @property
-        def content(self) ->str:
-            instance_defs = ''
+        def content(self) -> str:
+            instance_defs = ""
             for ins in self.instance_list:
-                instance_defs += ins.def_name + '\n'
-            return layernorm_fwd_codegen.INSTANCE_BASE.format(F_instance_def=instance_defs)
+                instance_defs += ins.def_name + "\n"
+            return layernorm_fwd_codegen.INSTANCE_BASE.format(
+                F_instance_def=instance_defs
+            )
 
     @property
     def name_api(self) -> str:
-        return 'layernorm2d_fwd_api'
+        return "layernorm2d_fwd_api"
 
     @property
     def name_common_header(self) -> str:
-        return 'layernorm2d_fwd_api_common'
+        return "layernorm2d_fwd_api_common"
 
     def content_api(self, args) -> str:
         # 1 sort based on dtype
@@ -457,40 +463,64 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t,
                 t_dtype_dict[blob.F_DataTypePair][blob.F_N] = []
             t_dtype_dict[blob.F_DataTypePair][blob.F_N].append(blob)
 
-        d_str = ''
+        d_str = ""
         for i_d, dtype_ in enumerate(t_dtype_dict):
             blob_per_t = t_dtype_dict[dtype_]
-            n_str = ''
+            n_str = ""
             for i_n, n_ in enumerate(blob_per_t):
                 blob_per_n = blob_per_t[n_]
                 inner_str = ""
                 for i_b, b_ in enumerate(blob_per_n):
                     # generate single kernel instance file
-                    #vec_str = ""
+                    # vec_str = ""
                     for i_ins, ins in enumerate(b_.instance_list):
                         idx_in_n = i_b * len(b_.instance_list) + i_ins
                         len_in_n = len(blob_per_n) * len(b_.instance_list)
                         # _if = 'if' if i_ins == 0 else 'else if'
                         if ins.F_kFusedQuant == 0:
-                            _sweep_cond = 't.fused_quant == {f_fused_sweep}'.format(f_fused_sweep = ins.F_kFusedQuant)
+                            _sweep_cond = "t.fused_quant == {f_fused_sweep}".format(
+                                f_fused_sweep=ins.F_kFusedQuant
+                            )
                         elif ins.F_kFusedQuant == 1:
-                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sm == \"{f_sx_type}\" && t.prec_sy == \"{f_sy_type}\")'.format(
-                                f_fused_sweep = ins.F_kFusedQuant, f_sx_type=ins.F_SmoothScaleDataType, f_sy_type=ins.F_YScaleDataType)
+                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sm == "{f_sx_type}" && t.prec_sy == "{f_sy_type}")'.format(
+                                f_fused_sweep=ins.F_kFusedQuant,
+                                f_sx_type=ins.F_SmoothScaleDataType,
+                                f_sy_type=ins.F_YScaleDataType,
+                            )
                         elif ins.F_kFusedQuant == 2:
-                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sy == \"{f_sy_type}\")'.format(
-                                f_fused_sweep = ins.F_kFusedQuant, f_sy_type=ins.F_YScaleDataType)
-                        _cond = '((a.n % {f_vec_n} == 0) && (t.xbias == {f_xbias}) && (t.fused_add == {f_fused_add}) && ({f_sweep_cond}))'.format(
-                                        f_vec_n = ins.F_Vector_N, f_xbias = ins.F_kXbias, f_fused_add = ins.F_kFusedAdd,
-                                        f_sweep_cond = _sweep_cond)
-                        inner_str += self.API_INNER_CASE.format(F_if = get_if_str(idx_in_n, len_in_n, False),
-                                            F_VEC_COND = _cond, F_instance_func=ins.call_name)
-                    #inner_str = inner_str + vec_str
-                n_cnd = f'(a.n <= {n_})' if isinstance(n_, int) else ''
-                n_str += self.API_PER_N_CASE.format(F_if = get_if_str(i_n, len(blob_per_t), not isinstance(n_, int)), F_N_COND=n_cnd, F_inner_dispatch=inner_str)
-            prec_i, prec_o = dtype_.split(',')
-            d_str += self.API_PER_DTYPE.format(F_if = get_if_str(i_d, len(t_dtype_dict), False), F_i_type=prec_i, F_o_type=prec_o, F_per_n_case=n_str)
+                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sy == "{f_sy_type}")'.format(
+                                f_fused_sweep=ins.F_kFusedQuant,
+                                f_sy_type=ins.F_YScaleDataType,
+                            )
+                        _cond = "((a.n % {f_vec_n} == 0) && (t.xbias == {f_xbias}) && (t.fused_add == {f_fused_add}) && ({f_sweep_cond}))".format(
+                            f_vec_n=ins.F_Vector_N,
+                            f_xbias=ins.F_kXbias,
+                            f_fused_add=ins.F_kFusedAdd,
+                            f_sweep_cond=_sweep_cond,
+                        )
+                        inner_str += self.API_INNER_CASE.format(
+                            F_if=get_if_str(idx_in_n, len_in_n, False),
+                            F_VEC_COND=_cond,
+                            F_instance_func=ins.call_name,
+                        )
+                    # inner_str = inner_str + vec_str
+                n_cnd = f"(a.n <= {n_})" if isinstance(n_, int) else ""
+                n_str += self.API_PER_N_CASE.format(
+                    F_if=get_if_str(i_n, len(blob_per_t), not isinstance(n_, int)),
+                    F_N_COND=n_cnd,
+                    F_inner_dispatch=inner_str,
+                )
+            prec_i, prec_o = dtype_.split(",")
+            d_str += self.API_PER_DTYPE.format(
+                F_if=get_if_str(i_d, len(t_dtype_dict), False),
+                F_i_type=prec_i,
+                F_o_type=prec_o,
+                F_per_n_case=n_str,
+            )
 
-        api_base = self.API_BASE.format(F_traits_define=self.API_TRAITS_DEFINE, F_dispatch=d_str)
+        api_base = self.API_BASE.format(
+            F_traits_define=self.API_TRAITS_DEFINE, F_dispatch=d_str
+        )
         return api_base
 
     @property
@@ -501,83 +531,982 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t,
         h_traits = layernorm_fwd_codegen.h_traits
         h_instance = layernorm_fwd_codegen.h_instance
 
-        dynamic_quant_out_dtype = ['int8', 'fp8']
+        dynamic_quant_out_dtype = ["int8", "fp8"]
         # some predefined support range
         # (prec_i,prec_o) for simplicity this string will be used as key for dict
-        scale_list = [('fp32,fp32')]
-        dtype_list = [('fp16,fp16'), ('bf16,bf16'),
-                        ('fp16,int8'), ('bf16,int8'),
-                        ('fp16,fp8'), ('bf16,fp8')] # NOTE: only fused-dynamic-quant use int8 or fp8 out
-        types_8bit = ('int8', 'fp8')
-        types_16bit = ('int16', 'fp16', 'bf16')
-        #fused_add_list = [0, 1, 2]
-        #fused_sweep_list = [0, 1, 2] # NOTE: only single pass can use fused dynamic quant
+        scale_list = [("fp32,fp32")]
+        dtype_list = [
+            ("fp16,fp16"),
+            ("bf16,bf16"),
+            ("fp16,int8"),
+            ("bf16,int8"),
+            ("fp16,fp8"),
+            ("bf16,fp8"),
+        ]  # NOTE: only fused-dynamic-quant use int8 or fp8 out
+        types_8bit = ("int8", "fp8")
+        types_16bit = ("int16", "fp16", "bf16")
+        # fused_add_list = [0, 1, 2]
+        # fused_sweep_list = [0, 1, 2] # NOTE: only single pass can use fused dynamic quant
         xbias_list = [0, 1]
         fused_add_list = [0, 1]
-        fused_sweep_list = [0, 1] # NOTE: only single pass can use fused dynamic quant
+        fused_sweep_list = [0, 1]  # NOTE: only single pass can use fused dynamic quant
         #                                                       rm  rn  tm   tn  vn  pd     mv     fdiv  welford   2p     xbias    add   sweep
-        h_trait_dict = {'64'  : [ h_traits('x', 'y', 'xs', 'ys', 1,  1,  8,  8,  8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  1,  4,  16, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  1,  4,  64, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '128' : [ h_traits('x', 'y', 'xs', 'ys', 1,  1,  4,  16, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  1,  4,  64, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  2,  4,  64, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '256' : [ h_traits('x', 'y', 'xs', 'ys', 1,  1,  4,  64, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  2,  4,  64, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  4,  64, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '512' : [ h_traits('x', 'y', 'xs', 'ys', 1,  1,  4,  64, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  2,  4,  64, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  4,  64, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  8,  4,  64, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '768' : [ h_traits('x', 'y', 'xs', 'ys', 1,  3,  4,  64, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  6,  4,  64, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1, 12,  4,  64, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '1024' :[ h_traits('x', 'y', 'xs', 'ys', 1,  1,  2, 128, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  2,  2, 128, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  2, 128, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1, 256, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '1536' :[ h_traits('x', 'y', 'xs', 'ys', 1,  3,  4,  64, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  3,  2, 128, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  3,  1, 256, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  6,  1, 256, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '2048' :[ h_traits('x', 'y', 'xs', 'ys', 1,  1,  1, 256, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  2,  1, 256, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1, 256, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  8,  1, 256, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '3072' :[ h_traits('x', 'y', 'xs', 'ys', 1,  3,  1, 128, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  3,  1, 256, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  6,  1, 256, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  3,  1,1024, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '4096' :[ h_traits('x', 'y', 'xs', 'ys', 1,  2,  1, 256, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1, 256, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  2,  1,1024, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1,1024, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '6144' :[ h_traits('x', 'y', 'xs', 'ys', 1,  3,  1, 256, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  3,  1, 512, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  3,  1,1024, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  6,  1,1024, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '8192' :[ h_traits('x', 'y', 'xs', 'ys', 1,  4,  1, 256, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1, 512, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1,1024, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  8,  1,1024, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        'big'  :[ h_traits('x', 'y', 'xs', 'ys', 1,  1,  1,1024, 8,  True,  False, True, True,    True,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1, 256, 4,  True,  False, True, True,    True,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1, 12,  1, 256, 2,  True,  False, True, True,    True,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1,1024, 1,  True,  False, True, True,    True,   0,    0,    0)]}
+        h_trait_dict = {
+            "64": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    8,
+                    8,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    4,
+                    16,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "128": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    4,
+                    16,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    4,
+                    64,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    2,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "256": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    4,
+                    64,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    2,
+                    4,
+                    64,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "512": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    4,
+                    64,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    2,
+                    4,
+                    64,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    4,
+                    64,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    8,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "768": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    4,
+                    64,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    6,
+                    4,
+                    64,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    12,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "1024": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    2,
+                    128,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    2,
+                    2,
+                    128,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    2,
+                    128,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    256,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "1536": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    4,
+                    64,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    2,
+                    128,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    1,
+                    256,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    6,
+                    1,
+                    256,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "2048": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    1,
+                    256,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    2,
+                    1,
+                    256,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    256,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    8,
+                    1,
+                    256,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "3072": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    1,
+                    128,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    1,
+                    256,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    6,
+                    1,
+                    256,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "4096": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    2,
+                    1,
+                    256,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    256,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    2,
+                    1,
+                    1024,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "6144": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    1,
+                    256,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    1,
+                    512,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    1,
+                    1024,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    6,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "8192": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    256,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    512,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    1024,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    8,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "big": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    1,
+                    1024,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    True,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    256,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    True,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    12,
+                    1,
+                    256,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    True,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    True,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+        }
         total_blob = list()
         for hs_key in h_trait_dict:
             hs = h_trait_dict[hs_key]
             current_n = hs[0].F_Repeat_N * hs[0].F_ThreadPerBlock_N * hs[0].F_Vector_N
-            for dtype, scale_type, xbias, fused_add, fused_quant in itertools.product(dtype_list, scale_list, xbias_list, fused_add_list, fused_sweep_list):
-                prec_i, prec_o = dtype.split(',')
-                scale_sm, scale_y = scale_type.split(',')
+            for dtype, scale_type, xbias, fused_add, fused_quant in itertools.product(
+                dtype_list, scale_list, xbias_list, fused_add_list, fused_sweep_list
+            ):
+                prec_i, prec_o = dtype.split(",")
+                scale_sm, scale_y = scale_type.split(",")
                 if prec_o in dynamic_quant_out_dtype and fused_quant != 1:
-                    continue # skip non dynamic quant case
-                if fused_quant == 1 and hs_key == 'big':
+                    continue  # skip non dynamic quant case
+                if fused_quant == 1 and hs_key == "big":
                     continue
                 current_hs = list()
                 for chs_ in hs:
-                    h_ = copy.copy(chs_) # copy the base instance out
+                    h_ = copy.copy(chs_)  # copy the base instance out
                     h_.F_XDataType = prec_i
                     h_.F_YDataType = prec_o
                     h_.F_SmoothScaleDataType = scale_sm
@@ -587,29 +1516,33 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t,
                     h_.F_kFusedQuant = fused_quant
                     # disable welford update for 8bit and 16 bit smallN
                     if not h_.F_kTwoPass_:
-                        #disable 16 bit when set args disable_16b_welford
+                        # disable 16 bit when set args disable_16b_welford
                         if args.disable_16b_welford and prec_i in types_16bit:
                             h_.F_kWelford_ = False
-                        #disable 8bit by default
+                        # disable 8bit by default
                         elif prec_i in types_8bit or prec_o in types_8bit:
                             h_.F_kWelford_ = False
-                        #disable 16bit small N
-                        elif prec_i in types_16bit and hs_key == '64':
+                        # disable 16bit small N
+                        elif prec_i in types_16bit and hs_key == "64":
                             h_.F_kWelford_ = False
-                    current_hs.append(h_) # + "\n"
-                #f.write(str(f.parent / GEN_DIR / (blobs.api_common_header_
-                current_n_str = 'big' if hs_key == 'big' else current_n
-                total_blob.append(h_instance(dtype, current_n_str, xbias, fused_add, fused_quant, current_hs))
+                    current_hs.append(h_)  # + "\n"
+                # f.write(str(f.parent / GEN_DIR / (blobs.api_common_header_
+                current_n_str = "big" if hs_key == "big" else current_n
+                total_blob.append(
+                    h_instance(
+                        dtype, current_n_str, xbias, fused_add, fused_quant, current_hs
+                    )
+                )
         return total_blob
 
     def list_blobs(self, args) -> None:
         w_p = Path(self.working_path)
-        list_p = w_p / 'layernorm2d_fwd_blobs.txt'
+        list_p = w_p / "layernorm2d_fwd_blobs.txt"
         blobs = self.get_blobs(args)
-        with list_p.open('w') as list_f:
+        with list_p.open("w") as list_f:
             # api related file
-            list_f.write(str(w_p / (self.name_api + ".cpp"))  + "\n")
-            list_f.write(str(w_p / (self.name_common_header + ".hpp"))  + "\n")
+            list_f.write(str(w_p / (self.name_api + ".cpp")) + "\n")
+            list_f.write(str(w_p / (self.name_common_header + ".hpp")) + "\n")
             # kernel instance file
             for b in blobs:
                 list_f.write(str(w_p / (b.name + ".cpp")) + "\n")
@@ -618,24 +1551,28 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t,
         w_p = Path(self.working_path)
         w_str = self.content_api(args)
         (w_p / (self.name_api + ".cpp")).write_text(w_str)
-        (w_p / (self.name_common_header + ".hpp")).write_text(self.content_common_header)
+        (w_p / (self.name_common_header + ".hpp")).write_text(
+            self.content_common_header
+        )
         blobs = self.get_blobs(args)
         for b in blobs:
             (w_p / (b.name + ".cpp")).write_text(b.content)
 
+
 def list_blobs(args):
-    api_list = args.api.split(',')
+    api_list = args.api.split(",")
     for api in api_list:
-        if api == 'fwd':
+        if api == "fwd":
             layernorm_fwd_codegen(args.working_path, args.filter).list_blobs(args)
 
 
 def gen_blobs(args):
-    api_list = args.api.split(',')
+    api_list = args.api.split(",")
     for api in api_list:
-        if api == 'fwd':
+        if api == "fwd":
             layernorm_fwd_codegen(args.working_path, args.filter).gen_blobs(args)
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         prog="generate",
@@ -644,9 +1581,9 @@ if __name__ == "__main__":
     parser.add_argument(
         "-a",
         "--api",
-        default='fwd[all]',
+        default="fwd[all]",
         required=False,
-        help="supply API(s) to generate (default: fwd). separated by comma."
+        help="supply API(s) to generate (default: fwd). separated by comma.",
     )
 
     # the directory for list_blobs/gen_blobs to write files into
@@ -655,7 +1592,7 @@ if __name__ == "__main__":
         "--working_path",
         default="./",
         required=False,
-        help="the path where all the blobs are going to be generated"
+        help="the path where all the blobs are going to be generated",
     )
 
     # this script have 2 modes
@@ -667,15 +1604,15 @@ if __name__ == "__main__":
     parser.add_argument(
         "-l",
         "--list_blobs",
-        action='store_true',
-        help="list all the kernels to a file, "
+        action="store_true",
+        help="list all the kernels to a file, ",
     )
 
     parser.add_argument(
         "-g",
         "--gen_blobs",
-        action='store_true',
-        help="generate all kernels into different tile"
+        action="store_true",
+        help="generate all kernels into different tile",
     )
 
     # TODO: if using filter, must apply same value to output_dir and list_blobs
@@ -683,7 +1620,7 @@ if __name__ == "__main__":
         "-f",
         "--filter",
         required=False,
-        help="filter out kernels that need to generate, using fnmatch module"
+        help="filter out kernels that need to generate, using fnmatch module",
     )
 
     parser.add_argument(
@@ -691,29 +1628,27 @@ if __name__ == "__main__":
         "--traits",
         default="all",
         required=False,
-        help="enable/disable some feature. default generate all"
+        help="enable/disable some feature. default generate all",
     )
 
     parser.add_argument(
-        "-r",
-        "--receipt",
-        default=0,
-        required=False,
-        help="codegen receipt."
+        "-r", "--receipt", default=0, required=False, help="codegen receipt."
     )
 
     parser.add_argument(
         "--disable_16b_welford",
         default=False,
         required=False,
-        help="enable/disable welford for 16bit datatype n > 64"
+        help="enable/disable welford for 16bit datatype n > 64",
     )
 
     args = parser.parse_args()
 
     # print(f'{args.list_blobs}-{args.gen_blobs}')
-    if (args.gen_blobs and args.list_blobs) or ((not args.gen_blobs) and (not args.list_blobs)):
-        print('gen_blobs/list_blobs must specify only one option')
+    if (args.gen_blobs and args.list_blobs) or (
+        (not args.gen_blobs) and (not args.list_blobs)
+    ):
+        print("gen_blobs/list_blobs must specify only one option")
         sys.exit()
 
     p = Path(args.working_path)
diff --git a/example/ck_tile/10_rmsnorm2d/generate.py b/example/ck_tile/10_rmsnorm2d/generate.py
index 75d7abd0ad..88e58aba5f 100644
--- a/example/ck_tile/10_rmsnorm2d/generate.py
+++ b/example/ck_tile/10_rmsnorm2d/generate.py
@@ -6,45 +6,51 @@ import argparse
 from enum import IntEnum
 from pathlib import Path
 import sys
-from typing import List, Optional, Any
+from typing import List, Any
 import functools
 import itertools
 import copy
 from dataclasses import dataclass
 
 
-def get_if_str(idx, total, lase_else = True):
+def get_if_str(idx, total, lase_else=True):
     if idx == 0:
-        return 'if'
+        return "if"
     elif idx < total - 1:
-        return 'else if'
+        return "else if"
     else:
         if lase_else:
-            return 'else'
+            return "else"
         else:
-            return 'else if'
+            return "else if"
+
 
 FUSED_ADD_ENUM_STR_MAP = [
-    'no',
-    'pras',      # pre-norm
-    'pra' ]      # post-norm
+    "no",
+    "pras",  # pre-norm
+    "pra",
+]  # post-norm
 
 FUSED_FUSED_SWEEP_STR_MAP = [
-    'no',
-    'sdquant',  # smooth dynamic quant
-    'dquant' ]  # dynamic quant (without sm_scale)
+    "no",
+    "sdquant",  # smooth dynamic quant
+    "dquant",
+]  # dynamic quant (without sm_scale)
+
+DATA_TYPE_MAP = {
+    "fp32": "float",
+    "fp16": "ck_tile::fp16_t",
+    "bf16": "ck_tile::bf16_t",
+    "int8": "ck_tile::int8_t",
+    "fp8": "ck_tile::fp8_t",
+}
 
-DATA_TYPE_MAP = {'fp32' : 'float',
-                 'fp16' : 'ck_tile::fp16_t',
-                 'bf16' : 'ck_tile::bf16_t',
-                 'int8' : 'ck_tile::int8_t',
-                 'fp8'  : 'ck_tile::fp8_t'}
 
 def BOOL_MAP(b_) -> str:
     if b_:
-        return 'true'
+        return "true"
     else:
-        return 'false'
+        return "false"
 
 
 class rmsnorm_fwd_codegen:
@@ -326,139 +332,142 @@ float rmsnorm2d_fwd(rmsnorm2d_fwd_traits t,
 
     @dataclass
     class k_traits:
-        F_kPadN : bool
-        F_kSaveMeanInvStd : bool
-        F_kTwoPass : bool
-        F_kFusedAdd : Any
-        F_kFusedQuant : Any
+        F_kPadN: bool
+        F_kSaveMeanInvStd: bool
+        F_kTwoPass: bool
+        F_kFusedAdd: Any
+        F_kFusedQuant: Any
 
     @dataclass
     class k_shape:
-        F_BlockTile    : List[int]
-        F_WarpPerBlock : List[int]
-        F_WarpTile     : List[int]
-        F_Vector_      : List[int]
+        F_BlockTile: List[int]
+        F_WarpPerBlock: List[int]
+        F_WarpTile: List[int]
+        F_Vector_: List[int]
+
         @property
         def F_BlockSize(self) -> int:
-            return functools.reduce(lambda a, b: a*b, self.F_WarpTile)
+            return functools.reduce(lambda a, b: a * b, self.F_WarpTile)
 
     @dataclass
     class k_problem:
-        F_XDataType       : str
-        F_GammaDataType   : str
-        F_ComputeDataType : str
-        F_YDataType       : str
-        F_InvRmsDataType  : str
-        F_BlockShape      : str
-        F_Traits          : Any #k_traits
+        F_XDataType: str
+        F_GammaDataType: str
+        F_ComputeDataType: str
+        F_YDataType: str
+        F_InvRmsDataType: str
+        F_BlockShape: str
+        F_Traits: Any  # k_traits
 
     @dataclass
     class k_pipeline_one_pass:
-        F_Problem         : Any #k_problem
+        F_Problem: Any  # k_problem
 
     @dataclass
     class k_pipeline_two_pass:
-        F_Problem         : Any #k_problem
+        F_Problem: Any  # k_problem
 
     @dataclass
     class default_2d_epilogue_problem:
-        F_AccDataType : str
-        F_ODataType : str
-        F_kPadM : bool
-        F_kPadN : bool
+        F_AccDataType: str
+        F_ODataType: str
+        F_kPadM: bool
+        F_kPadN: bool
 
     @dataclass
     class default_2d_epilogue:
-        F_problem : Any
+        F_problem: Any
 
     @dataclass
     class k_kernel:
-        F_pipeline : Any
-        F_epilogue : Any
+        F_pipeline: Any
+        F_epilogue: Any
 
     @dataclass
     class h_traits:
-        F_XDataType : str
-        F_YDataType : str
-        F_SmoothScaleDataType : str
-        F_YScaleDataType : str
-        F_UnquantYDataType : str
-        F_Repeat_M : int
-        F_Repeat_N : int
-        F_ThreadPerBlock_M : int
-        F_ThreadPerBlock_N : int
-        F_Vector_N : int
-        F_kPadN : bool
-        F_kSaveInvRms : bool
+        F_XDataType: str
+        F_YDataType: str
+        F_SmoothScaleDataType: str
+        F_YScaleDataType: str
+        F_UnquantYDataType: str
+        F_Repeat_M: int
+        F_Repeat_N: int
+        F_ThreadPerBlock_M: int
+        F_ThreadPerBlock_N: int
+        F_Vector_N: int
+        F_kPadN: bool
+        F_kSaveInvRms: bool
         F_kSaveUnquant: bool
-        F_kTwoPass : bool
-        F_kFusedAdd : int
-        F_kFusedQuant : int
-        F_use_model_sensitive_rmsnorm : int
+        F_kTwoPass: bool
+        F_kFusedAdd: int
+        F_kFusedQuant: int
+        F_use_model_sensitive_rmsnorm: int
 
         @property
-        def trait_name(self) ->str:
-            t_ = f'{DATA_TYPE_MAP[self.F_XDataType]}, {DATA_TYPE_MAP[self.F_YDataType]}, {DATA_TYPE_MAP[self.F_SmoothScaleDataType]}, {DATA_TYPE_MAP[self.F_YScaleDataType]}, {DATA_TYPE_MAP[self.F_UnquantYDataType]}, {self.F_Repeat_M:2}, {self.F_Repeat_N:2}, {self.F_ThreadPerBlock_M:2}, {self.F_ThreadPerBlock_N:4}'
-            t_ += f', {self.F_Vector_N:2}, {BOOL_MAP(self.F_kPadN):5}, {BOOL_MAP(self.F_kSaveInvRms):5}, {BOOL_MAP(self.F_kSaveUnquant):5}'
-            t_ += f', {BOOL_MAP(self.F_kTwoPass):5}, {self.F_kFusedAdd:4}, {self.F_kFusedQuant:4}, {self.F_use_model_sensitive_rmsnorm:4}'
+        def trait_name(self) -> str:
+            t_ = f"{DATA_TYPE_MAP[self.F_XDataType]}, {DATA_TYPE_MAP[self.F_YDataType]}, {DATA_TYPE_MAP[self.F_SmoothScaleDataType]}, {DATA_TYPE_MAP[self.F_YScaleDataType]}, {DATA_TYPE_MAP[self.F_UnquantYDataType]}, {self.F_Repeat_M:2}, {self.F_Repeat_N:2}, {self.F_ThreadPerBlock_M:2}, {self.F_ThreadPerBlock_N:4}"
+            t_ += f", {self.F_Vector_N:2}, {BOOL_MAP(self.F_kPadN):5}, {BOOL_MAP(self.F_kSaveInvRms):5}, {BOOL_MAP(self.F_kSaveUnquant):5}"
+            t_ += f", {BOOL_MAP(self.F_kTwoPass):5}, {self.F_kFusedAdd:4}, {self.F_kFusedQuant:4}, {self.F_use_model_sensitive_rmsnorm:4}"
             return t_
 
         # string when calling this kernel
         @property
         def call_name(self) -> str:
-            return f'rmsnorm2d_fwd_<traits_<{self.trait_name}>>'
+            return f"rmsnorm2d_fwd_<traits_<{self.trait_name}>>"
 
         # string when define this kernel
         @property
         def def_name(self) -> str:
-            return f'template float rmsnorm2d_fwd_<traits_<{self.trait_name}>>(const S&, A);'
+            return f"template float rmsnorm2d_fwd_<traits_<{self.trait_name}>>(const S&, A);"
 
     # this class hold kernel under same source file
     @dataclass
     class h_instance:
-        F_DataTypePair : str
-        F_N : str
-        F_add : int
-        F_sweep : int
-        F_saveunquant : bool
-        F_use_model_sensitive_rmsnorm : int
-        instance_list : List[Any] # List[h_traits]
+        F_DataTypePair: str
+        F_N: str
+        F_add: int
+        F_sweep: int
+        F_saveunquant: bool
+        F_use_model_sensitive_rmsnorm: int
+        instance_list: List[Any]  # List[h_traits]
 
         @property
         def name(self) -> str:
-            prec_i, prec_o = self.F_DataTypePair.split(',')
-            dtype_str = f'{prec_i}' if prec_i == prec_o else f'{prec_i}_{prec_o}'
-            nnn = f'rmsnorm2d_fwd_{dtype_str}_n{self.F_N}'
+            prec_i, prec_o = self.F_DataTypePair.split(",")
+            dtype_str = f"{prec_i}" if prec_i == prec_o else f"{prec_i}_{prec_o}"
+            nnn = f"rmsnorm2d_fwd_{dtype_str}_n{self.F_N}"
             if self.F_add != 0:
-                nnn = nnn + '_' + FUSED_ADD_ENUM_STR_MAP[self.F_add]
+                nnn = nnn + "_" + FUSED_ADD_ENUM_STR_MAP[self.F_add]
             if self.F_sweep != 0:
-                nnn = nnn + '_' + FUSED_FUSED_SWEEP_STR_MAP[self.F_sweep]
+                nnn = nnn + "_" + FUSED_FUSED_SWEEP_STR_MAP[self.F_sweep]
             if self.F_saveunquant:
-                nnn = nnn + '_saveunquant'
+                nnn = nnn + "_saveunquant"
             if self.F_use_model_sensitive_rmsnorm == 0:
-                nnn = nnn + '_nsm'
+                nnn = nnn + "_nsm"
             elif self.F_use_model_sensitive_rmsnorm == 1:
-                nnn = nnn + '_t5ml'
+                nnn = nnn + "_t5ml"
             return nnn
 
         @property
-        def instance_name(self) ->str:
+        def instance_name(self) -> str:
             return self.name
 
         @property
-        def content(self) ->str:
-            instance_defs = ''
+        def content(self) -> str:
+            instance_defs = ""
             for ins in self.instance_list:
-                instance_defs += ins.def_name + '\n'
-            return rmsnorm_fwd_codegen.INSTANCE_BASE.format(F_instance_def=instance_defs)
+                instance_defs += ins.def_name + "\n"
+            return rmsnorm_fwd_codegen.INSTANCE_BASE.format(
+                F_instance_def=instance_defs
+            )
 
     @property
     def name_api(self) -> str:
-        return 'rmsnorm2d_fwd_api'
+        return "rmsnorm2d_fwd_api"
 
     @property
     def name_common_header(self) -> str:
-        return 'rmsnorm2d_fwd_api_common'
+        return "rmsnorm2d_fwd_api_common"
 
     @property
     def content_api(self) -> str:
@@ -472,40 +481,66 @@ float rmsnorm2d_fwd(rmsnorm2d_fwd_traits t,
                 t_dtype_dict[blob.F_DataTypePair][blob.F_N] = []
             t_dtype_dict[blob.F_DataTypePair][blob.F_N].append(blob)
 
-        d_str = ''
+        d_str = ""
         for i_d, dtype_ in enumerate(t_dtype_dict):
             blob_per_t = t_dtype_dict[dtype_]
-            n_str = ''
+            n_str = ""
             for i_n, n_ in enumerate(blob_per_t):
                 blob_per_n = blob_per_t[n_]
                 inner_str = ""
                 for i_b, b_ in enumerate(blob_per_n):
                     # generate single kernel instance file
-                    #vec_str = ""
+                    # vec_str = ""
                     for i_ins, ins in enumerate(b_.instance_list):
                         idx_in_n = i_b * len(b_.instance_list) + i_ins
                         len_in_n = len(blob_per_n) * len(b_.instance_list)
                         # _if = 'if' if i_ins == 0 else 'else if'
                         if ins.F_kFusedQuant == 0:
-                            _sweep_cond = 't.fused_quant == {f_fused_sweep}'.format(f_fused_sweep = ins.F_kFusedQuant)
+                            _sweep_cond = "t.fused_quant == {f_fused_sweep}".format(
+                                f_fused_sweep=ins.F_kFusedQuant
+                            )
                         elif ins.F_kFusedQuant == 1:
-                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sm == \"{f_sx_type}\" && t.prec_sy == \"{f_sy_type}\" && t.save_unquant == {f_suq})'.format(
-                                f_fused_sweep = ins.F_kFusedQuant, f_sx_type=ins.F_SmoothScaleDataType, f_sy_type=ins.F_YScaleDataType, f_suq=BOOL_MAP(ins.F_kSaveUnquant))
+                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sm == "{f_sx_type}" && t.prec_sy == "{f_sy_type}" && t.save_unquant == {f_suq})'.format(
+                                f_fused_sweep=ins.F_kFusedQuant,
+                                f_sx_type=ins.F_SmoothScaleDataType,
+                                f_sy_type=ins.F_YScaleDataType,
+                                f_suq=BOOL_MAP(ins.F_kSaveUnquant),
+                            )
                         elif ins.F_kFusedQuant == 2:
-                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sy == \"{f_sy_type}\" && t.save_unquant == {f_suq})'.format(
-                                f_fused_sweep = ins.F_kFusedQuant, f_sy_type=ins.F_YScaleDataType, f_suq=BOOL_MAP(ins.F_kSaveUnquant))
-                        _cond = '((a.n % {f_vec_n} == 0) && (t.fused_add == {f_fused_add}) && ({f_sweep_cond}) && (t.use_model_sensitive_rmsnorm == {f_use_model_sensitive_rmsnorm}) )'.format(
-                                        f_vec_n = ins.F_Vector_N, f_fused_add = ins.F_kFusedAdd,
-                                        f_sweep_cond = _sweep_cond, f_use_model_sensitive_rmsnorm = ins.F_use_model_sensitive_rmsnorm)
-                        inner_str += self.API_INNER_CASE.format(F_if = get_if_str(idx_in_n, len_in_n, False),
-                                            F_VEC_COND = _cond, F_instance_func=ins.call_name)
-                    #inner_str = inner_str + vec_str
-                n_cnd = f'(a.n <= {n_})' if (i_n < len(blob_per_t) - 1) else ''
-                n_str += self.API_PER_N_CASE.format(F_if = get_if_str(i_n, len(blob_per_t)), F_N_COND=n_cnd, F_inner_dispatch=inner_str)
-            prec_i, prec_o = dtype_.split(',')
-            d_str += self.API_PER_DTYPE.format(F_if = get_if_str(i_d, len(t_dtype_dict), False), F_i_type=prec_i, F_o_type=prec_o, F_per_n_case=n_str)
+                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sy == "{f_sy_type}" && t.save_unquant == {f_suq})'.format(
+                                f_fused_sweep=ins.F_kFusedQuant,
+                                f_sy_type=ins.F_YScaleDataType,
+                                f_suq=BOOL_MAP(ins.F_kSaveUnquant),
+                            )
+                        _cond = "((a.n % {f_vec_n} == 0) && (t.fused_add == {f_fused_add}) && ({f_sweep_cond}) && (t.use_model_sensitive_rmsnorm == {f_use_model_sensitive_rmsnorm}) )".format(
+                            f_vec_n=ins.F_Vector_N,
+                            f_fused_add=ins.F_kFusedAdd,
+                            f_sweep_cond=_sweep_cond,
+                            f_use_model_sensitive_rmsnorm=ins.F_use_model_sensitive_rmsnorm,
+                        )
+                        inner_str += self.API_INNER_CASE.format(
+                            F_if=get_if_str(idx_in_n, len_in_n, False),
+                            F_VEC_COND=_cond,
+                            F_instance_func=ins.call_name,
+                        )
+                    # inner_str = inner_str + vec_str
+                n_cnd = f"(a.n <= {n_})" if (i_n < len(blob_per_t) - 1) else ""
+                n_str += self.API_PER_N_CASE.format(
+                    F_if=get_if_str(i_n, len(blob_per_t)),
+                    F_N_COND=n_cnd,
+                    F_inner_dispatch=inner_str,
+                )
+            prec_i, prec_o = dtype_.split(",")
+            d_str += self.API_PER_DTYPE.format(
+                F_if=get_if_str(i_d, len(t_dtype_dict), False),
+                F_i_type=prec_i,
+                F_o_type=prec_o,
+                F_per_n_case=n_str,
+            )
 
-        api_base = self.API_BASE.format(F_traits_define=self.API_TRAITS_DEFINE, F_dispatch=d_str)
+        api_base = self.API_BASE.format(
+            F_traits_define=self.API_TRAITS_DEFINE, F_dispatch=d_str
+        )
         return api_base
 
     @property
@@ -516,150 +551,2081 @@ float rmsnorm2d_fwd(rmsnorm2d_fwd_traits t,
         h_traits = rmsnorm_fwd_codegen.h_traits
         h_instance = rmsnorm_fwd_codegen.h_instance
 
-        dynamic_quant_out_dtype = ['int8', 'fp8']
+        dynamic_quant_out_dtype = ["int8", "fp8"]
         # some predefined support range
         # (prec_i,prec_o) for simplicity this string will be used as key for dict
-        scale_list = [('fp32,fp32')]
-        dtype_list = [('fp16,fp16'), ('bf16,bf16'),
-                        ('fp16,int8'), ('bf16,int8'),
-                        ('fp16,fp8'), ('bf16,fp8')] # NOTE: only fused-dynamic-quant use int8 out
-        #fused_add_list = [0, 1, 2]
-        #fused_sweep_list = [0, 1, 2] # NOTE: only single pass can use fused (smooth) dynamic quant
+        scale_list = [("fp32,fp32")]
+        dtype_list = [
+            ("fp16,fp16"),
+            ("bf16,bf16"),
+            ("fp16,int8"),
+            ("bf16,int8"),
+            ("fp16,fp8"),
+            ("bf16,fp8"),
+        ]  # NOTE: only fused-dynamic-quant use int8 out
+        # fused_add_list = [0, 1, 2]
+        # fused_sweep_list = [0, 1, 2] # NOTE: only single pass can use fused (smooth) dynamic quant
         fused_add_list = [0, 1]
-        fused_sweep_list = [0, 1, 2] # NOTE: only single pass can use fused (smooth) dynamic quant
+        fused_sweep_list = [
+            0,
+            1,
+            2,
+        ]  # NOTE: only single pass can use fused (smooth) dynamic quant
         bool_list = [False, True]
 
         h_trait_dicts = {
             0: {
                 #                                              rm  rn  tm   tn  vn  pd     mv     unquant  2p     add    sweep  srm
-                '64'   :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  8,  8,  8,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  16, 4,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  64, 1,  True,  False, False,   False,   0,    0,    0)],
-                '128'  :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  16, 8,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  64, 2,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  4,  64, 1,  True,  False, False,   False,   0,    0,    0)],
-                '256'  :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  64, 4,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  4,  64, 2,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  4,  64, 1,  True,  False, False,   False,   0,    0,    0)],
-                '512'  :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  64, 8,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  4,  64, 4,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  4,  64, 2,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  8,  4,  64, 1,  True,  False, False,   False,   0,    0,    0)],
-                '640'  :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  5,  4,  64, 2,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  5,  4, 128, 1,  True,  False, False,   False,   0,    0,    0)],
-                '768'  :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  4,  64, 4,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  6,  4,  64, 2,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1, 12,  4,  64, 1,  True,  False, False,   False,   0,    0,    0)],
-                '1024' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  2,  64, 8,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  2,  64, 4,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  8,  2,  64, 2,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 1,  True,  False, False,   False,   0,    0,    0)],
-                '1536' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  4,  64, 8,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  2, 128, 4,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1, 256, 2,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  6,  1, 256, 1,  True,  False, False,   False,   0,    0,    0)],
-                '2048' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  1, 256, 8,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  1, 256, 4,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 2,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  8,  1, 256, 1,  True,  False, False,   False,   0,    0,    0)],
-                '3072' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1, 128, 8,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1, 256, 4,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  6,  1, 256, 2,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1,1024, 1,  True,  False, False,   False,   0,    0,    0)],
-                '4096' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  1, 256, 8,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 4,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  1,1024, 2,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1,1024, 1,  True,  False, False,   False,   0,    0,    0)],
-                '6144' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1, 256, 8,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1, 512, 4,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1,1024, 2,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  6,  1,1024, 1,  True,  False, False,   False,   0,    0,    0)],
-                '8192' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 8,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 512, 4,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1,1024, 2,  True,  False, False,   False,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  8,  1,1024, 1,  True,  False, False,   False,   0,    0,    0)],
-                'big'  :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  1,1024, 8,  True,  False, False,    True,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 4,  True,  False, False,    True,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1, 12,  1, 256, 2,  True,  False, False,    True,   0,    0,    0),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1,1024, 1,  True,  False, False,    True,   0,    0,    0)]
+                "64": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        8,
+                        8,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        4,
+                        16,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        4,
+                        64,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                ],
+                "128": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        4,
+                        16,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        4,
+                        64,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        2,
+                        4,
+                        64,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                ],
+                "256": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        4,
+                        64,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        2,
+                        4,
+                        64,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        4,
+                        64,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                ],
+                "512": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        4,
+                        64,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        2,
+                        4,
+                        64,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        4,
+                        64,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        8,
+                        4,
+                        64,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                ],
+                "640": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        5,
+                        4,
+                        64,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        5,
+                        4,
+                        128,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                ],
+                "768": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        4,
+                        64,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        6,
+                        4,
+                        64,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        12,
+                        4,
+                        64,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                ],
+                "1024": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        2,
+                        2,
+                        64,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        2,
+                        64,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        8,
+                        2,
+                        64,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        256,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                ],
+                "1536": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        4,
+                        64,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        2,
+                        128,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        1,
+                        256,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        6,
+                        1,
+                        256,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                ],
+                "2048": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        1,
+                        256,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        2,
+                        1,
+                        256,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        256,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        8,
+                        1,
+                        256,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                ],
+                "3072": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        1,
+                        128,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        1,
+                        256,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        6,
+                        1,
+                        256,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        1,
+                        1024,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                ],
+                "4096": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        2,
+                        1,
+                        256,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        256,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        2,
+                        1,
+                        1024,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        1024,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                ],
+                "6144": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        1,
+                        256,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        1,
+                        512,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        1,
+                        1024,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        6,
+                        1,
+                        1024,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                ],
+                "8192": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        256,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        512,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        1024,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        8,
+                        1,
+                        1024,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        0,
+                    ),
+                ],
+                "big": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        1,
+                        1024,
+                        8,
+                        True,
+                        False,
+                        False,
+                        True,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        256,
+                        4,
+                        True,
+                        False,
+                        False,
+                        True,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        12,
+                        1,
+                        256,
+                        2,
+                        True,
+                        False,
+                        False,
+                        True,
+                        0,
+                        0,
+                        0,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        1024,
+                        1,
+                        True,
+                        False,
+                        False,
+                        True,
+                        0,
+                        0,
+                        0,
+                    ),
+                ],
             },
             1: {
                 #                                               rm  rn  tm   tn  vn  pd     mv     unquant  2p     add    sweep srm
-                '64'   :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  8,  8,  8,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  16, 4,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  64, 1,  True,  False, False,   False,   0,    0,    1)],
-                '128'  :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  16, 8,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  64, 2,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  4,  64, 1,  True,  False, False,   False,   0,    0,    1)],
-                '256'  :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  8,  32, 8,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  64, 4,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  4,  64, 2,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  4,  64, 1,  True,  False, False,   False,   0,    0,    1)],
-                '512'  :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  64, 8,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  4,  64, 4,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  4,  64, 2,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  8,  4,  64, 1,  True,  False, False,   False,   0,    0,    1)],
-                '640'  :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  2, 128, 8,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  5,  4,  64, 2,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  5,  4, 128, 1,  True,  False, False,   False,   0,    0,    1)],
-                '768'  :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  2, 128, 8,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  4,  64, 4,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  6,  4,  64, 2,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1, 12,  4,  64, 1,  True,  False, False,   False,   0,    0,    1)],
-                '1024' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  2, 128, 8,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  2,  64, 4,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  8,  2,  64, 2,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 1,  True,  False, False,   False,   0,    0,    1)],
-                '1536' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  1, 256, 8,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  2, 128, 4,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1, 256, 2,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  6,  1, 256, 1,  True,  False, False,   False,   0,    0,    1)],
-                '2048' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  1, 256, 8,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  1, 256, 4,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 2,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  8,  1, 256, 1,  True,  False, False,   False,   0,    0,    1)],
-                '3072' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  1, 256, 8,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1, 256, 4,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  6,  1, 256, 2,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1,1024, 1,  True,  False, False,   False,   0,    0,    1)],
-                '4096' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  1, 256, 8,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 4,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  1,1024, 2,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1,1024, 1,  True,  False, False,   False,   0,    0,    1)],
-                '6144' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1, 256, 8,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1, 512, 4,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1,1024, 2,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  6,  1,1024, 1,  True,  False, False,   False,   0,    0,    1)],
-                '8192' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 8,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 512, 4,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1,1024, 2,  True,  False, False,   False,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  8,  1,1024, 1,  True,  False, False,   False,   0,    0,    1)],
-                'big'  :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  1,1024, 8,  True,  False, False,    True,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 4,  True,  False, False,    True,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1, 12,  1, 256, 2,  True,  False, False,    True,   0,    0,    1),
-                          h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1,1024, 1,  True,  False, False,    True,   0,    0,    1)]
-            }
+                "64": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        8,
+                        8,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        4,
+                        16,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        4,
+                        64,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                ],
+                "128": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        4,
+                        16,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        4,
+                        64,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        2,
+                        4,
+                        64,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                ],
+                "256": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        8,
+                        32,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        4,
+                        64,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        2,
+                        4,
+                        64,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        4,
+                        64,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                ],
+                "512": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        4,
+                        64,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        2,
+                        4,
+                        64,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        4,
+                        64,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        8,
+                        4,
+                        64,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                ],
+                "640": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        2,
+                        128,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        5,
+                        4,
+                        64,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        5,
+                        4,
+                        128,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                ],
+                "768": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        2,
+                        128,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        4,
+                        64,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        6,
+                        4,
+                        64,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        12,
+                        4,
+                        64,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                ],
+                "1024": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        2,
+                        128,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        2,
+                        64,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        8,
+                        2,
+                        64,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        256,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                ],
+                "1536": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        1,
+                        256,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        2,
+                        128,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        1,
+                        256,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        6,
+                        1,
+                        256,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                ],
+                "2048": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        1,
+                        256,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        2,
+                        1,
+                        256,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        256,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        8,
+                        1,
+                        256,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                ],
+                "3072": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        2,
+                        1,
+                        256,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        1,
+                        256,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        6,
+                        1,
+                        256,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        1,
+                        1024,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                ],
+                "4096": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        2,
+                        1,
+                        256,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        256,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        2,
+                        1,
+                        1024,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        1024,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                ],
+                "6144": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        1,
+                        256,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        1,
+                        512,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        3,
+                        1,
+                        1024,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        6,
+                        1,
+                        1024,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                ],
+                "8192": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        256,
+                        8,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        512,
+                        4,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        1024,
+                        2,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        8,
+                        1,
+                        1024,
+                        1,
+                        True,
+                        False,
+                        False,
+                        False,
+                        0,
+                        0,
+                        1,
+                    ),
+                ],
+                "big": [
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        1,
+                        1,
+                        1024,
+                        8,
+                        True,
+                        False,
+                        False,
+                        True,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        256,
+                        4,
+                        True,
+                        False,
+                        False,
+                        True,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        12,
+                        1,
+                        256,
+                        2,
+                        True,
+                        False,
+                        False,
+                        True,
+                        0,
+                        0,
+                        1,
+                    ),
+                    h_traits(
+                        "x",
+                        "y",
+                        "xs",
+                        "ys",
+                        "uqy",
+                        1,
+                        4,
+                        1,
+                        1024,
+                        1,
+                        True,
+                        False,
+                        False,
+                        True,
+                        0,
+                        0,
+                        1,
+                    ),
+                ],
+            },
         }
 
         total_blob = list()
 
-        for model_sensitive_flag in [0, 1]: # 0: default; 1: model sensitive
+        for model_sensitive_flag in [0, 1]:  # 0: default; 1: model sensitive
             current_trait_dict = h_trait_dicts[model_sensitive_flag]
             for hs_key in current_trait_dict:
                 hs = current_trait_dict[hs_key]
                 current_n = hs_key
-                for dtype, scale_type, fused_add, fused_quant, save_unquant in itertools.product(dtype_list, scale_list, fused_add_list, fused_sweep_list, bool_list):
-                    prec_i, prec_o = dtype.split(',')
-                    scale_sm, scale_y = scale_type.split(',')
-                    if prec_o in dynamic_quant_out_dtype and fused_quant != 1 and fused_quant != 2:
-                        continue # skip non dynamic quant case
-                    if (fused_quant == 1 or fused_quant == 2) and hs_key == 'big':
+                for (
+                    dtype,
+                    scale_type,
+                    fused_add,
+                    fused_quant,
+                    save_unquant,
+                ) in itertools.product(
+                    dtype_list, scale_list, fused_add_list, fused_sweep_list, bool_list
+                ):
+                    prec_i, prec_o = dtype.split(",")
+                    scale_sm, scale_y = scale_type.split(",")
+                    if (
+                        prec_o in dynamic_quant_out_dtype
+                        and fused_quant != 1
+                        and fused_quant != 2
+                    ):
+                        continue  # skip non dynamic quant case
+                    if (fused_quant == 1 or fused_quant == 2) and hs_key == "big":
                         continue
-                    if (fused_quant == 0 and save_unquant == True):
-                        continue # save_unquant should always be false when there is no quant enabled
+                    if fused_quant == 0 and save_unquant:
+                        continue  # save_unquant should always be false when there is no quant enabled
                     current_hs = list()
                     for chs_ in hs:
-                        h_ = copy.copy(chs_) # copy the base instance out
+                        h_ = copy.copy(chs_)  # copy the base instance out
                         h_.F_XDataType = prec_i
                         h_.F_YDataType = prec_o
                         h_.F_SmoothScaleDataType = scale_sm
@@ -668,20 +2634,30 @@ float rmsnorm2d_fwd(rmsnorm2d_fwd_traits t,
                         h_.F_kFusedAdd = fused_add
                         h_.F_kFusedQuant = fused_quant
                         h_.F_kSaveUnquant = save_unquant
-                        current_hs.append(h_) # + "\n"
-                    #f.write(str(f.parent / GEN_DIR / (blobs.api_common_header_
-                    current_n_str = 'big' if hs_key == 'big' else current_n
-                    total_blob.append(h_instance(dtype, current_n_str, fused_add, fused_quant, save_unquant, h_.F_use_model_sensitive_rmsnorm, current_hs))
+                        current_hs.append(h_)  # + "\n"
+                    # f.write(str(f.parent / GEN_DIR / (blobs.api_common_header_
+                    current_n_str = "big" if hs_key == "big" else current_n
+                    total_blob.append(
+                        h_instance(
+                            dtype,
+                            current_n_str,
+                            fused_add,
+                            fused_quant,
+                            save_unquant,
+                            h_.F_use_model_sensitive_rmsnorm,
+                            current_hs,
+                        )
+                    )
         return total_blob
 
     def list_blobs(self) -> None:
         w_p = Path(self.working_path)
-        list_p = w_p / 'rmsnorm2d_fwd_blobs.txt'
+        list_p = w_p / "rmsnorm2d_fwd_blobs.txt"
         blobs = self.get_blobs()
-        with list_p.open('w') as list_f:
+        with list_p.open("w") as list_f:
             # api related file
-            list_f.write(str(w_p / (self.name_api + ".cpp"))  + "\n")
-            list_f.write(str(w_p / (self.name_common_header + ".hpp"))  + "\n")
+            list_f.write(str(w_p / (self.name_api + ".cpp")) + "\n")
+            list_f.write(str(w_p / (self.name_common_header + ".hpp")) + "\n")
             # kernel instance file
             for b in blobs:
                 list_f.write(str(w_p / (b.name + ".cpp")) + "\n")
@@ -689,23 +2665,25 @@ float rmsnorm2d_fwd(rmsnorm2d_fwd_traits t,
     def gen_blobs(self) -> None:
         w_p = Path(self.working_path)
         (w_p / (self.name_api + ".cpp")).write_text(self.content_api)
-        (w_p / (self.name_common_header + ".hpp")).write_text(self.content_common_header)
+        (w_p / (self.name_common_header + ".hpp")).write_text(
+            self.content_common_header
+        )
         blobs = self.get_blobs()
         for b in blobs:
             (w_p / (b.name + ".cpp")).write_text(b.content)
 
 
 def list_blobs(args):
-    api_list = args.api.split(',')
+    api_list = args.api.split(",")
     for api in api_list:
-        if api == 'fwd':
+        if api == "fwd":
             rmsnorm_fwd_codegen(args.working_path, args.filter).list_blobs()
 
 
 def gen_blobs(args):
-    api_list = args.api.split(',')
+    api_list = args.api.split(",")
     for api in api_list:
-        if api == 'fwd':
+        if api == "fwd":
             rmsnorm_fwd_codegen(args.working_path, args.filter).gen_blobs()
 
 
@@ -717,9 +2695,9 @@ if __name__ == "__main__":
     parser.add_argument(
         "-a",
         "--api",
-        default='fwd[all]',
+        default="fwd[all]",
         required=False,
-        help="supply API(s) to generate (default: fwd). separated by comma."
+        help="supply API(s) to generate (default: fwd). separated by comma.",
     )
 
     # the directory for list_blobs/gen_blobs to write files into
@@ -728,7 +2706,7 @@ if __name__ == "__main__":
         "--working_path",
         default="./",
         required=False,
-        help="the path where all the blobs are going to be generated"
+        help="the path where all the blobs are going to be generated",
     )
 
     # this script have 2 modes
@@ -740,15 +2718,15 @@ if __name__ == "__main__":
     parser.add_argument(
         "-l",
         "--list_blobs",
-        action='store_true',
-        help="list all the kernels to a file, "
+        action="store_true",
+        help="list all the kernels to a file, ",
     )
 
     parser.add_argument(
         "-g",
         "--gen_blobs",
-        action='store_true',
-        help="generate all kernels into different tile"
+        action="store_true",
+        help="generate all kernels into different tile",
     )
 
     # TODO: if using filter, must apply same value to output_dir and list_blobs
@@ -756,7 +2734,7 @@ if __name__ == "__main__":
         "-f",
         "--filter",
         required=False,
-        help="filter out kernels that need to generate, using fnmatch module"
+        help="filter out kernels that need to generate, using fnmatch module",
     )
 
     parser.add_argument(
@@ -764,22 +2742,20 @@ if __name__ == "__main__":
         "--traits",
         default="all",
         required=False,
-        help="enable/disable some feature. default generate all"
+        help="enable/disable some feature. default generate all",
     )
 
     parser.add_argument(
-        "-r",
-        "--receipt",
-        default=0,
-        required=False,
-        help="codegen receipt."
+        "-r", "--receipt", default=0, required=False, help="codegen receipt."
     )
 
     args = parser.parse_args()
 
     # print(f'{args.list_blobs}-{args.gen_blobs}')
-    if (args.gen_blobs and args.list_blobs) or ((not args.gen_blobs) and (not args.list_blobs)):
-        print('gen_blobs/list_blobs must specify only one option')
+    if (args.gen_blobs and args.list_blobs) or (
+        (not args.gen_blobs) and (not args.list_blobs)
+    ):
+        print("gen_blobs/list_blobs must specify only one option")
         sys.exit()
 
     p = Path(args.working_path)
diff --git a/example/ck_tile/36_pooling/pool3d.cpp b/example/ck_tile/36_pooling/pool3d.cpp
index bdfa1d99b3..bb76efbc03 100644
--- a/example/ck_tile/36_pooling/pool3d.cpp
+++ b/example/ck_tile/36_pooling/pool3d.cpp
@@ -2,7 +2,7 @@
 // Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
 
 #include "ck_tile/host.hpp"
-#include "ck_tile/ops/pool.hpp"
+#include "ck_tile/ops/pooling.hpp"
 #include "ck_tile/host/reference/reference_pool.hpp"
 #include <cstring>
 
diff --git a/example/ck_tile/remod.py b/example/ck_tile/remod.py
index b64fac7b06..b2ac7c52bf 100644
--- a/example/ck_tile/remod.py
+++ b/example/ck_tile/remod.py
@@ -1,21 +1,19 @@
 import pathlib
 from pathlib import Path
 import subprocess
-import os
-import copy
 
 all_files = []
 for p in sorted(Path("./").rglob("*")):
-    if p.suffix in ['.hpp', '.cpp']:
+    if p.suffix in [".hpp", ".cpp"]:
         all_files.append(pathlib.PurePath(p))
-            
+
 
 # formatting
 for x in all_files:
-    subprocess.Popen(f'dos2unix {str(x)}', shell=True)
-    cmd = f'clang-format-18 -style=file -i {str(x)}'
-    #for xp in x.parents:
-    #print(get_file_base(x))
+    subprocess.Popen(f"dos2unix -n {str(x)}", shell=True)
+    cmd = f"clang-format-18 -style=file -i {str(x)}"
+    # for xp in x.parents:
+    # print(get_file_base(x))
     subprocess.Popen(cmd, shell=True)
 
-#print(all_files)
+# print(all_files)
diff --git a/include/ck_tile/host.hpp b/include/ck_tile/host.hpp
index d815b1db40..b46bdd272d 100644
--- a/include/ck_tile/host.hpp
+++ b/include/ck_tile/host.hpp
@@ -18,6 +18,7 @@
 #include "ck_tile/host/kernel_launch.hpp"
 #include "ck_tile/host/permute_pk_int4.hpp"
 #include "ck_tile/host/ranges.hpp"
+#include "ck_tile/host/reference/reference_batched_contraction.hpp"
 #include "ck_tile/host/reference/reference_batched_dropout.hpp"
 #include "ck_tile/host/reference/reference_batched_dropout_randval.hpp"
 #include "ck_tile/host/reference/reference_batched_elementwise.hpp"
@@ -36,6 +37,7 @@
 #include "ck_tile/host/reference/reference_layernorm2d_fwd.hpp"
 #include "ck_tile/host/reference/reference_moe_sorting.hpp"
 #include "ck_tile/host/reference/reference_permute.hpp"
+#include "ck_tile/host/reference/reference_pool.hpp"
 #include "ck_tile/host/reference/reference_reduce.hpp"
 #include "ck_tile/host/reference/reference_rmsnorm2d_fwd.hpp"
 #include "ck_tile/host/reference/reference_rowwise_quantization2d.hpp"
diff --git a/include/ck_tile/host/reference/reference_batched_contraction.hpp b/include/ck_tile/host/reference/reference_batched_contraction.hpp
index 1ce071969c..a86accc778 100644
--- a/include/ck_tile/host/reference/reference_batched_contraction.hpp
+++ b/include/ck_tile/host/reference/reference_batched_contraction.hpp
@@ -4,6 +4,8 @@
 #pragma once
 
 #include <cstdlib>
+#include <functional>
+#include <numeric>
 #include <thread>
 
 #include "ck_tile/core.hpp"
@@ -155,6 +157,10 @@ void calculate_reference_multi_dimensional(
     b_idx.reserve(B_dims.size());
     e_idx.reserve(E_dims.size());
 
+    auto calculate_total_elements = [](const std::vector<ck_tile::index_t>& dims) {
+        return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<ck_tile::index_t>());
+    };
+
     for(ck_tile::index_t g_flat = 0; g_flat < calculate_total_elements(G_dims); ++g_flat)
     {
         ck_tile::index_t temp = g_flat;
diff --git a/include/ck_tile/host/reference/reference_pool.hpp b/include/ck_tile/host/reference/reference_pool.hpp
index 1b3e45bce8..4fdb5fed78 100644
--- a/include/ck_tile/host/reference/reference_pool.hpp
+++ b/include/ck_tile/host/reference/reference_pool.hpp
@@ -5,6 +5,7 @@
 
 #include "ck_tile/core.hpp"
 #include "ck_tile/host/host_tensor.hpp"
+#include "ck_tile/ops/pooling/kernel/pool_kernel.hpp"
 #include <thread>
 
 namespace ck_tile {
diff --git a/include/ck_tile/ops/batched_contraction.hpp b/include/ck_tile/ops/batched_contraction.hpp
index 9162f421d1..2232ec1261 100644
--- a/include/ck_tile/ops/batched_contraction.hpp
+++ b/include/ck_tile/ops/batched_contraction.hpp
@@ -5,5 +5,9 @@
 
 #include "ck_tile/ops/batched_contraction/kernel/batched_contraction_kernel.hpp"
 #include "ck_tile/ops/batched_contraction/pipeline/batched_contraction_problem.hpp"
+#include "ck_tile/ops/batched_contraction/utils/tensor_descriptor_utils.hpp"
+#include "ck_tile/ops/common/generic_2d_block_shape.hpp"
+#include "ck_tile/ops/common/load_interleaved_pk_type.hpp"
+#include "ck_tile/ops/common/streamk_common.hpp"
 #include "ck_tile/ops/common/tensor_layout.hpp"
 #include "ck_tile/ops/common/utils.hpp"
diff --git a/include/ck_tile/ops/gemm_quant.hpp b/include/ck_tile/ops/gemm_quant.hpp
index 61cb96c8f4..3273131875 100644
--- a/include/ck_tile/ops/gemm_quant.hpp
+++ b/include/ck_tile/ops/gemm_quant.hpp
@@ -9,9 +9,9 @@
 #include "ck_tile/ops/gemm_quant/kernel/gemm_quant_kernel.hpp"
 #include "ck_tile/ops/gemm_quant/kernel/grouped_gemm_quant_kernel.hpp"
 #include "ck_tile/ops/gemm_quant/pipeline/gemm_aquant_pipeline_ag_bg_cr_base.hpp"
+#include "ck_tile/ops/gemm_quant/pipeline/gemm_aquant_pipeline_ag_bg_cr_mem.hpp"
 #include "ck_tile/ops/gemm_quant/pipeline/gemm_aquant_pipeline_ag_bg_cr_policy.hpp"
 #include "ck_tile/ops/gemm_quant/pipeline/gemm_aquant_pipeline_ag_bg_cr_v3.hpp"
-#include "ck_tile/ops/gemm_quant/pipeline/gemm_aquant_pipeline_ag_bg_cr_mem.hpp"
 #include "ck_tile/ops/gemm_quant/pipeline/gemm_bquant_pipeline_ag_bg_cr_base.hpp"
 #include "ck_tile/ops/gemm_quant/pipeline/gemm_bquant_pipeline_ag_bg_cr_policy.hpp"
 #include "ck_tile/ops/gemm_quant/pipeline/gemm_bquant_pipeline_ag_bg_cr_v3.hpp"
diff --git a/include/ck_tile/ops/gemm_quant/pipeline/gemm_aquant_pipeline_ag_bg_cr_mem.hpp b/include/ck_tile/ops/gemm_quant/pipeline/gemm_aquant_pipeline_ag_bg_cr_mem.hpp
index 1ba9b2a903..4b59c8cbf0 100644
--- a/include/ck_tile/ops/gemm_quant/pipeline/gemm_aquant_pipeline_ag_bg_cr_mem.hpp
+++ b/include/ck_tile/ops/gemm_quant/pipeline/gemm_aquant_pipeline_ag_bg_cr_mem.hpp
@@ -10,6 +10,7 @@
 #include "ck_tile/core/numeric/math.hpp"
 #include "ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_scheduler.hpp"
 #include "ck_tile/ops/gemm_quant/pipeline/gemm_aquant_pipeline_ag_bg_cr_base.hpp"
+#include "ck_tile/ops/gemm_quant/pipeline/gemm_aquant_pipeline_ag_bg_cr_policy.hpp"
 #include "ck_tile/host/concat.hpp"
 
 namespace ck_tile {
diff --git a/include/ck_tile/ops/pool.hpp b/include/ck_tile/ops/pooling.hpp
similarity index 58%
rename from include/ck_tile/ops/pool.hpp
rename to include/ck_tile/ops/pooling.hpp
index 350ef17dcb..084b498203 100644
--- a/include/ck_tile/ops/pool.hpp
+++ b/include/ck_tile/ops/pooling.hpp
@@ -1,11 +1,14 @@
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
 
 #pragma once
 
 #include "ck_tile/ops/pooling/kernel/pool_kernel.hpp"
+#include "ck_tile/ops/pooling/pipeline/pool_default_policy.hpp"
 #include "ck_tile/ops/pooling/pipeline/pool_problem.hpp"
 #include "ck_tile/ops/pooling/pipeline/pool_shape.hpp"
 #include "ck_tile/ops/common/generic_2d_block_shape.hpp"
+#include "ck_tile/ops/common/load_interleaved_pk_type.hpp"
+#include "ck_tile/ops/common/streamk_common.hpp"
 #include "ck_tile/ops/common/tensor_layout.hpp"
 #include "ck_tile/ops/common/utils.hpp"
diff --git a/include/ck_tile/remod.py b/include/ck_tile/remod.py
index 1584f706e9..bd940036bd 100644
--- a/include/ck_tile/remod.py
+++ b/include/ck_tile/remod.py
@@ -5,39 +5,43 @@ import subprocess
 import os
 import copy
 
-NS = 'ck_tile'
-OPS = 'ops'
-REF = 'ref'
-OPS_COMMON = 'common' #common header will be duplicated into ops/* other module
+NS = "ck_tile"
+OPS = "ops"
+OPS_COMMON = "common"  # common header will be duplicated into ops/* other module
 
+IGNORED_DIRS = ["utility", "ref"]
 HEADER_COMMON = f"""// SPDX-License-Identifier: MIT
 // Copyright (c) 2018-{datetime.now().year}, Advanced Micro Devices, Inc. All rights reserved.\n
 """
 
+
 # aa/bb/cc/file.hpp -> (aa, bb, cc, file.hpp)
-def get_module(f, level = 0):
+def get_module(f, level=0):
     all_parts = f.parts
     return str(all_parts[level])
 
+
 all_files = []
 for p in sorted(Path("./").rglob("*")):
-    if p.suffix == '.hpp':
+    if p.suffix == ".hpp":
         all_files.append(pathlib.PurePath(p))
 
+
 class submodule_t:
     def __init__(self):
         self.m = dict()
+
     def push(self, f):
-        if len(f.parents) != 1: # ignore ./xxx.hpp
+        if len(f.parents) != 1:  # ignore ./xxx.hpp
             mod = get_module(f)
-            # ref is supposed to include one header on demand
-            if mod == REF:
+            # Should only be included by demand
+            if mod in IGNORED_DIRS:
                 return
             if mod == OPS:
                 if mod not in self.m.keys():
                     self.m[mod] = dict()
                 mod2 = get_module(f, 1)
-                if Path(mod2).suffix != '.hpp':
+                if Path(mod2).suffix != ".hpp":
                     # ignore ops/xxx.hpp
                     if mod2 not in self.m[mod].keys():
                         self.m[mod][mod2] = list()
@@ -52,14 +56,15 @@ class submodule_t:
             # print(hpath)
             if os.path.exists(str(hpath)):
                 os.remove(str(hpath))
-            with hpath.open('w') as f:
+            with hpath.open("w") as f:
                 f.write(HEADER_COMMON)
-                f.write('#pragma once\n')
-                f.write('\n')
+                f.write("#pragma once\n")
+                f.write("\n")
                 for individual_header in include_list:
-                    header_path = NS + '/' + str(individual_header)
-                    f.write(f'#include \"{header_path}\"\n')
+                    header_path = NS + "/" + str(individual_header)
+                    f.write(f'#include "{header_path}"\n')
                 # f.write('\n') # otherwise clang-format will complain
+
         # print(self.m)
         # restructure common
         for k, v in self.m.items():
@@ -73,21 +78,21 @@ class submodule_t:
         for k, v in self.m.items():
             if k == OPS:
                 for km, kv in v.items():
-                    gen_header(Path(k) / (f'{km}.hpp'), kv)
+                    gen_header(Path(k) / (f"{km}.hpp"), kv)
             else:
-                gen_header(Path(f'{k}.hpp'), v)
+                gen_header(Path(f"{k}.hpp"), v)
 
 
 submodule = submodule_t()
 # formatting
 for x in all_files:
-    subprocess.Popen(f'dos2unix {str(x)}', shell=True)
-    cmd = f'clang-format-18 -style=file -i {str(x)}'
-    #for xp in x.parents:
-    #print(get_file_base(x))
+    subprocess.Popen(f"dos2unix -n {str(x)}", shell=True)
+    cmd = f"clang-format-18 -style=file -i {str(x)}"
+    # for xp in x.parents:
+    # print(get_file_base(x))
     subprocess.Popen(cmd, shell=True)
     submodule.push(x)
 
 submodule.gen()
 
-#print(all_files)
+# print(all_files)
diff --git a/include/rapidjson/allocators.h b/include/rapidjson/allocators.h
index 275417bd8b..45be6609e1 100644
--- a/include/rapidjson/allocators.h
+++ b/include/rapidjson/allocators.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_ALLOCATORS_H_
@@ -32,10 +32,10 @@ RAPIDJSON_NAMESPACE_BEGIN
 
 /*! \class rapidjson::Allocator
     \brief Concept for allocating, resizing and freeing memory block.
-    
+
     Note that Malloc() and Realloc() are non-static but Free() is static.
-    
-    So if an allocator need to support Free(), it needs to put its pointer in 
+
+    So if an allocator need to support Free(), it needs to put its pointer in
     the header of memory block.
 
 \code
@@ -49,7 +49,8 @@ concept Allocator {
 
     // Resize a memory block.
     // \param originalPtr The pointer to current memory block. Null pointer is permitted.
-    // \param originalSize The current size in bytes. (Design issue: since some allocator may not book-keep this, explicitly pass to it can save memory.)
+    // \param originalSize The current size in bytes. (Design issue: since some allocator may not
+book-keep this, explicitly pass to it can save memory.)
     // \param newSize the new size in bytes.
     void* Realloc(void* originalPtr, size_t originalSize, size_t newSize);
 
@@ -60,7 +61,6 @@ concept Allocator {
 \endcode
 */
 
-
 /*! \def RAPIDJSON_ALLOCATOR_DEFAULT_CHUNK_CAPACITY
     \ingroup RAPIDJSON_CONFIG
     \brief User-defined kDefaultChunkCapacity definition.
@@ -72,7 +72,6 @@ concept Allocator {
 #define RAPIDJSON_ALLOCATOR_DEFAULT_CHUNK_CAPACITY (64 * 1024)
 #endif
 
-
 ///////////////////////////////////////////////////////////////////////////////
 // CrtAllocator
 
@@ -80,38 +79,38 @@ concept Allocator {
 /*! This class is just wrapper for standard C library memory routines.
     \note implements Allocator concept
 */
-class CrtAllocator {
-public:
+class CrtAllocator
+{
+    public:
     static const bool kNeedFree = true;
-    void* Malloc(size_t size) { 
-        if (size) //  behavior of malloc(0) is implementation defined.
+    void* Malloc(size_t size)
+    {
+        if(size) //  behavior of malloc(0) is implementation defined.
             return RAPIDJSON_MALLOC(size);
         else
             return NULL; // standardize to returning NULL.
     }
-    void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) {
+    void* Realloc(void* originalPtr, size_t originalSize, size_t newSize)
+    {
         (void)originalSize;
-        if (newSize == 0) {
+        if(newSize == 0)
+        {
             RAPIDJSON_FREE(originalPtr);
             return NULL;
         }
         return RAPIDJSON_REALLOC(originalPtr, newSize);
     }
-    static void Free(void *ptr) RAPIDJSON_NOEXCEPT { RAPIDJSON_FREE(ptr); }
+    static void Free(void* ptr) RAPIDJSON_NOEXCEPT { RAPIDJSON_FREE(ptr); }
 
-    bool operator==(const CrtAllocator&) const RAPIDJSON_NOEXCEPT {
-        return true;
-    }
-    bool operator!=(const CrtAllocator&) const RAPIDJSON_NOEXCEPT {
-        return false;
-    }
+    bool operator==(const CrtAllocator&) const RAPIDJSON_NOEXCEPT { return true; }
+    bool operator!=(const CrtAllocator&) const RAPIDJSON_NOEXCEPT { return false; }
 };
 
 ///////////////////////////////////////////////////////////////////////////////
 // MemoryPoolAllocator
 
 //! Default memory allocator used by the parser and DOM.
-/*! This allocator allocate memory blocks from pre-allocated memory chunks. 
+/*! This allocator allocate memory blocks from pre-allocated memory chunks.
 
     It does not free memory blocks. And Realloc() only allocate new memory.
 
@@ -127,69 +126,82 @@ public:
     \note implements Allocator concept
 */
 template <typename BaseAllocator = CrtAllocator>
-class MemoryPoolAllocator {
+class MemoryPoolAllocator
+{
     //! Chunk header for perpending to each chunk.
     /*! Chunks are stored as a singly linked list.
-    */
-    struct ChunkHeader {
-        size_t capacity;    //!< Capacity of the chunk in bytes (excluding the header itself).
-        size_t size;        //!< Current size of allocated memory in bytes.
-        ChunkHeader *next;  //!< Next chunk in the linked list.
+     */
+    struct ChunkHeader
+    {
+        size_t capacity;   //!< Capacity of the chunk in bytes (excluding the header itself).
+        size_t size;       //!< Current size of allocated memory in bytes.
+        ChunkHeader* next; //!< Next chunk in the linked list.
     };
 
-    struct SharedData {
-        ChunkHeader *chunkHead;  //!< Head of the chunk linked-list. Only the head chunk serves allocation.
+    struct SharedData
+    {
+        ChunkHeader*
+            chunkHead; //!< Head of the chunk linked-list. Only the head chunk serves allocation.
         BaseAllocator* ownBaseAllocator; //!< base allocator created by this object.
         size_t refcount;
         bool ownBuffer;
     };
 
-    static const size_t SIZEOF_SHARED_DATA = RAPIDJSON_ALIGN(sizeof(SharedData));
+    static const size_t SIZEOF_SHARED_DATA  = RAPIDJSON_ALIGN(sizeof(SharedData));
     static const size_t SIZEOF_CHUNK_HEADER = RAPIDJSON_ALIGN(sizeof(ChunkHeader));
 
-    static inline ChunkHeader *GetChunkHead(SharedData *shared)
+    static inline ChunkHeader* GetChunkHead(SharedData* shared)
     {
-        return reinterpret_cast<ChunkHeader*>(reinterpret_cast<uint8_t*>(shared) + SIZEOF_SHARED_DATA);
+        return reinterpret_cast<ChunkHeader*>(reinterpret_cast<uint8_t*>(shared) +
+                                              SIZEOF_SHARED_DATA);
     }
-    static inline uint8_t *GetChunkBuffer(SharedData *shared)
+    static inline uint8_t* GetChunkBuffer(SharedData* shared)
     {
         return reinterpret_cast<uint8_t*>(shared->chunkHead) + SIZEOF_CHUNK_HEADER;
     }
 
-    static const size_t kDefaultChunkCapacity = RAPIDJSON_ALLOCATOR_DEFAULT_CHUNK_CAPACITY; //!< Default chunk capacity.
+    static const size_t kDefaultChunkCapacity =
+        RAPIDJSON_ALLOCATOR_DEFAULT_CHUNK_CAPACITY; //!< Default chunk capacity.
 
-public:
-    static const bool kNeedFree = false;    //!< Tell users that no need to call Free() with this allocator. (concept Allocator)
-    static const bool kRefCounted = true;   //!< Tell users that this allocator is reference counted on copy
+    public:
+    static const bool kNeedFree =
+        false; //!< Tell users that no need to call Free() with this allocator. (concept Allocator)
+    static const bool kRefCounted =
+        true; //!< Tell users that this allocator is reference counted on copy
 
     //! Constructor with chunkSize.
     /*! \param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
         \param baseAllocator The allocator for allocating memory chunks.
     */
-    explicit
-    MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : 
-        chunk_capacity_(chunkSize),
-        baseAllocator_(baseAllocator ? baseAllocator : RAPIDJSON_NEW(BaseAllocator)()),
-        shared_(static_cast<SharedData*>(baseAllocator_ ? baseAllocator_->Malloc(SIZEOF_SHARED_DATA + SIZEOF_CHUNK_HEADER) : 0))
+    explicit MemoryPoolAllocator(size_t chunkSize             = kDefaultChunkCapacity,
+                                 BaseAllocator* baseAllocator = 0)
+        : chunk_capacity_(chunkSize),
+          baseAllocator_(baseAllocator ? baseAllocator : RAPIDJSON_NEW(BaseAllocator)()),
+          shared_(static_cast<SharedData*>(
+              baseAllocator_ ? baseAllocator_->Malloc(SIZEOF_SHARED_DATA + SIZEOF_CHUNK_HEADER)
+                             : 0))
     {
         RAPIDJSON_ASSERT(baseAllocator_ != 0);
         RAPIDJSON_ASSERT(shared_ != 0);
-        if (baseAllocator) {
+        if(baseAllocator)
+        {
             shared_->ownBaseAllocator = 0;
         }
-        else {
+        else
+        {
             shared_->ownBaseAllocator = baseAllocator_;
         }
-        shared_->chunkHead = GetChunkHead(shared_);
+        shared_->chunkHead           = GetChunkHead(shared_);
         shared_->chunkHead->capacity = 0;
-        shared_->chunkHead->size = 0;
-        shared_->chunkHead->next = 0;
-        shared_->ownBuffer = true;
-        shared_->refcount = 1;
+        shared_->chunkHead->size     = 0;
+        shared_->chunkHead->next     = 0;
+        shared_->ownBuffer           = true;
+        shared_->refcount            = 1;
     }
 
     //! Constructor with user-supplied buffer.
-    /*! The user buffer will be used firstly. When it is full, memory pool allocates new chunk with chunk size.
+    /*! The user buffer will be used firstly. When it is full, memory pool allocates new chunk with
+       chunk size.
 
         The user buffer will not be deallocated when this allocator is destructed.
 
@@ -198,25 +210,28 @@ public:
         \param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
         \param baseAllocator The allocator for allocating memory chunks.
     */
-    MemoryPoolAllocator(void *buffer, size_t size, size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) :
-        chunk_capacity_(chunkSize),
-        baseAllocator_(baseAllocator),
-        shared_(static_cast<SharedData*>(AlignBuffer(buffer, size)))
+    MemoryPoolAllocator(void* buffer,
+                        size_t size,
+                        size_t chunkSize             = kDefaultChunkCapacity,
+                        BaseAllocator* baseAllocator = 0)
+        : chunk_capacity_(chunkSize),
+          baseAllocator_(baseAllocator),
+          shared_(static_cast<SharedData*>(AlignBuffer(buffer, size)))
     {
         RAPIDJSON_ASSERT(size >= SIZEOF_SHARED_DATA + SIZEOF_CHUNK_HEADER);
-        shared_->chunkHead = GetChunkHead(shared_);
+        shared_->chunkHead           = GetChunkHead(shared_);
         shared_->chunkHead->capacity = size - SIZEOF_SHARED_DATA - SIZEOF_CHUNK_HEADER;
-        shared_->chunkHead->size = 0;
-        shared_->chunkHead->next = 0;
-        shared_->ownBaseAllocator = 0;
-        shared_->ownBuffer = false;
-        shared_->refcount = 1;
+        shared_->chunkHead->size     = 0;
+        shared_->chunkHead->next     = 0;
+        shared_->ownBaseAllocator    = 0;
+        shared_->ownBuffer           = false;
+        shared_->refcount            = 1;
     }
 
-    MemoryPoolAllocator(const MemoryPoolAllocator& rhs) RAPIDJSON_NOEXCEPT :
-        chunk_capacity_(rhs.chunk_capacity_),
-        baseAllocator_(rhs.baseAllocator_),
-        shared_(rhs.shared_)
+    MemoryPoolAllocator(const MemoryPoolAllocator& rhs) RAPIDJSON_NOEXCEPT
+        : chunk_capacity_(rhs.chunk_capacity_),
+          baseAllocator_(rhs.baseAllocator_),
+          shared_(rhs.shared_)
     {
         RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
         ++shared_->refcount;
@@ -226,17 +241,17 @@ public:
         RAPIDJSON_NOEXCEPT_ASSERT(rhs.shared_->refcount > 0);
         ++rhs.shared_->refcount;
         this->~MemoryPoolAllocator();
-        baseAllocator_ = rhs.baseAllocator_;
+        baseAllocator_  = rhs.baseAllocator_;
         chunk_capacity_ = rhs.chunk_capacity_;
-        shared_ = rhs.shared_;
+        shared_         = rhs.shared_;
         return *this;
     }
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
-    MemoryPoolAllocator(MemoryPoolAllocator&& rhs) RAPIDJSON_NOEXCEPT :
-        chunk_capacity_(rhs.chunk_capacity_),
-        baseAllocator_(rhs.baseAllocator_),
-        shared_(rhs.shared_)
+    MemoryPoolAllocator(MemoryPoolAllocator&& rhs) RAPIDJSON_NOEXCEPT
+        : chunk_capacity_(rhs.chunk_capacity_),
+          baseAllocator_(rhs.baseAllocator_),
+          shared_(rhs.shared_)
     {
         RAPIDJSON_NOEXCEPT_ASSERT(rhs.shared_->refcount > 0);
         rhs.shared_ = 0;
@@ -245,40 +260,47 @@ public:
     {
         RAPIDJSON_NOEXCEPT_ASSERT(rhs.shared_->refcount > 0);
         this->~MemoryPoolAllocator();
-        baseAllocator_ = rhs.baseAllocator_;
+        baseAllocator_  = rhs.baseAllocator_;
         chunk_capacity_ = rhs.chunk_capacity_;
-        shared_ = rhs.shared_;
-        rhs.shared_ = 0;
+        shared_         = rhs.shared_;
+        rhs.shared_     = 0;
         return *this;
     }
 #endif
 
     //! Destructor.
     /*! This deallocates all memory chunks, excluding the user-supplied buffer.
-    */
-    ~MemoryPoolAllocator() RAPIDJSON_NOEXCEPT {
-        if (!shared_) {
+     */
+    ~MemoryPoolAllocator() RAPIDJSON_NOEXCEPT
+    {
+        if(!shared_)
+        {
             // do nothing if moved
             return;
         }
-        if (shared_->refcount > 1) {
+        if(shared_->refcount > 1)
+        {
             --shared_->refcount;
             return;
         }
         Clear();
-        BaseAllocator *a = shared_->ownBaseAllocator;
-        if (shared_->ownBuffer) {
+        BaseAllocator* a = shared_->ownBaseAllocator;
+        if(shared_->ownBuffer)
+        {
             baseAllocator_->Free(shared_);
         }
         RAPIDJSON_DELETE(a);
     }
 
     //! Deallocates all memory chunks, excluding the first/user one.
-    void Clear() RAPIDJSON_NOEXCEPT {
+    void Clear() RAPIDJSON_NOEXCEPT
+    {
         RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
-        for (;;) {
+        for(;;)
+        {
             ChunkHeader* c = shared_->chunkHead;
-            if (!c->next) {
+            if(!c->next)
+            {
                 break;
             }
             shared_->chunkHead = c->next;
@@ -289,78 +311,86 @@ public:
 
     //! Computes the total capacity of allocated memory chunks.
     /*! \return total capacity in bytes.
-    */
-    size_t Capacity() const RAPIDJSON_NOEXCEPT {
+     */
+    size_t Capacity() const RAPIDJSON_NOEXCEPT
+    {
         RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
         size_t capacity = 0;
-        for (ChunkHeader* c = shared_->chunkHead; c != 0; c = c->next)
+        for(ChunkHeader* c = shared_->chunkHead; c != 0; c = c->next)
             capacity += c->capacity;
         return capacity;
     }
 
     //! Computes the memory blocks allocated.
     /*! \return total used bytes.
-    */
-    size_t Size() const RAPIDJSON_NOEXCEPT {
+     */
+    size_t Size() const RAPIDJSON_NOEXCEPT
+    {
         RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
         size_t size = 0;
-        for (ChunkHeader* c = shared_->chunkHead; c != 0; c = c->next)
+        for(ChunkHeader* c = shared_->chunkHead; c != 0; c = c->next)
             size += c->size;
         return size;
     }
 
     //! Whether the allocator is shared.
     /*! \return true or false.
-    */
-    bool Shared() const RAPIDJSON_NOEXCEPT {
+     */
+    bool Shared() const RAPIDJSON_NOEXCEPT
+    {
         RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
         return shared_->refcount > 1;
     }
 
     //! Allocates a memory block. (concept Allocator)
-    void* Malloc(size_t size) {
+    void* Malloc(size_t size)
+    {
         RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
-        if (!size)
+        if(!size)
             return NULL;
 
         size = RAPIDJSON_ALIGN(size);
-        if (RAPIDJSON_UNLIKELY(shared_->chunkHead->size + size > shared_->chunkHead->capacity))
-            if (!AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size))
+        if(RAPIDJSON_UNLIKELY(shared_->chunkHead->size + size > shared_->chunkHead->capacity))
+            if(!AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size))
                 return NULL;
 
-        void *buffer = GetChunkBuffer(shared_) + shared_->chunkHead->size;
+        void* buffer = GetChunkBuffer(shared_) + shared_->chunkHead->size;
         shared_->chunkHead->size += size;
         return buffer;
     }
 
     //! Resizes a memory block (concept Allocator)
-    void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) {
-        if (originalPtr == 0)
+    void* Realloc(void* originalPtr, size_t originalSize, size_t newSize)
+    {
+        if(originalPtr == 0)
             return Malloc(newSize);
 
         RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
-        if (newSize == 0)
+        if(newSize == 0)
             return NULL;
 
         originalSize = RAPIDJSON_ALIGN(originalSize);
-        newSize = RAPIDJSON_ALIGN(newSize);
+        newSize      = RAPIDJSON_ALIGN(newSize);
 
         // Do not shrink if new size is smaller than original
-        if (originalSize >= newSize)
+        if(originalSize >= newSize)
             return originalPtr;
 
         // Simply expand it if it is the last allocation and there is sufficient space
-        if (originalPtr == GetChunkBuffer(shared_) + shared_->chunkHead->size - originalSize) {
+        if(originalPtr == GetChunkBuffer(shared_) + shared_->chunkHead->size - originalSize)
+        {
             size_t increment = static_cast<size_t>(newSize - originalSize);
-            if (shared_->chunkHead->size + increment <= shared_->chunkHead->capacity) {
+            if(shared_->chunkHead->size + increment <= shared_->chunkHead->capacity)
+            {
                 shared_->chunkHead->size += increment;
                 return originalPtr;
             }
         }
 
         // Realloc process: allocate and copy memory, do not free original buffer.
-        if (void* newBuffer = Malloc(newSize)) {
-            if (originalSize)
+        if(void* newBuffer = Malloc(newSize))
+        {
+            if(originalSize)
                 std::memcpy(newBuffer, originalPtr, originalSize);
             return newBuffer;
         }
@@ -369,31 +399,36 @@ public:
     }
 
     //! Frees a memory block (concept Allocator)
-    static void Free(void *ptr) RAPIDJSON_NOEXCEPT { (void)ptr; } // Do nothing
+    static void Free(void* ptr) RAPIDJSON_NOEXCEPT { (void)ptr; } // Do nothing
 
     //! Compare (equality) with another MemoryPoolAllocator
-    bool operator==(const MemoryPoolAllocator& rhs) const RAPIDJSON_NOEXCEPT {
+    bool operator==(const MemoryPoolAllocator& rhs) const RAPIDJSON_NOEXCEPT
+    {
         RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
         RAPIDJSON_NOEXCEPT_ASSERT(rhs.shared_->refcount > 0);
         return shared_ == rhs.shared_;
     }
     //! Compare (inequality) with another MemoryPoolAllocator
-    bool operator!=(const MemoryPoolAllocator& rhs) const RAPIDJSON_NOEXCEPT {
+    bool operator!=(const MemoryPoolAllocator& rhs) const RAPIDJSON_NOEXCEPT
+    {
         return !operator==(rhs);
     }
 
-private:
+    private:
     //! Creates a new chunk.
     /*! \param capacity Capacity of the chunk in bytes.
         \return true if success.
     */
-    bool AddChunk(size_t capacity) {
-        if (!baseAllocator_)
+    bool AddChunk(size_t capacity)
+    {
+        if(!baseAllocator_)
             shared_->ownBaseAllocator = baseAllocator_ = RAPIDJSON_NEW(BaseAllocator)();
-        if (ChunkHeader* chunk = static_cast<ChunkHeader*>(baseAllocator_->Malloc(SIZEOF_CHUNK_HEADER + capacity))) {
-            chunk->capacity = capacity;
-            chunk->size = 0;
-            chunk->next = shared_->chunkHead;
+        if(ChunkHeader* chunk =
+               static_cast<ChunkHeader*>(baseAllocator_->Malloc(SIZEOF_CHUNK_HEADER + capacity)))
+        {
+            chunk->capacity    = capacity;
+            chunk->size        = 0;
+            chunk->next        = shared_->chunkHead;
             shared_->chunkHead = chunk;
             return true;
         }
@@ -401,12 +436,13 @@ private:
             return false;
     }
 
-    static inline void* AlignBuffer(void* buf, size_t &size)
+    static inline void* AlignBuffer(void* buf, size_t& size)
     {
         RAPIDJSON_NOEXCEPT_ASSERT(buf != 0);
         const uintptr_t mask = sizeof(void*) - 1;
         const uintptr_t ubuf = reinterpret_cast<uintptr_t>(buf);
-        if (RAPIDJSON_UNLIKELY(ubuf & mask)) {
+        if(RAPIDJSON_UNLIKELY(ubuf & mask))
+        {
             const uintptr_t abuf = (ubuf + mask) & ~mask;
             RAPIDJSON_ASSERT(size >= abuf - ubuf);
             buf = reinterpret_cast<void*>(abuf);
@@ -415,37 +451,38 @@ private:
         return buf;
     }
 
-    size_t chunk_capacity_;     //!< The minimum capacity of chunk when they are allocated.
-    BaseAllocator* baseAllocator_;  //!< base allocator for allocating memory chunks.
-    SharedData *shared_;        //!< The shared data of the allocator
+    size_t chunk_capacity_;        //!< The minimum capacity of chunk when they are allocated.
+    BaseAllocator* baseAllocator_; //!< base allocator for allocating memory chunks.
+    SharedData* shared_;           //!< The shared data of the allocator
 };
 
 namespace internal {
-    template<typename, typename = void>
-    struct IsRefCounted :
-        public FalseType
-    { };
-    template<typename T>
-    struct IsRefCounted<T, typename internal::EnableIfCond<T::kRefCounted>::Type> :
-        public TrueType
-    { };
-}
+template <typename, typename = void>
+struct IsRefCounted : public FalseType
+{
+};
+template <typename T>
+struct IsRefCounted<T, typename internal::EnableIfCond<T::kRefCounted>::Type> : public TrueType
+{
+};
+} // namespace internal
 
-template<typename T, typename A>
+template <typename T, typename A>
 inline T* Realloc(A& a, T* old_p, size_t old_n, size_t new_n)
 {
-    RAPIDJSON_NOEXCEPT_ASSERT(old_n <= (std::numeric_limits<size_t>::max)() / sizeof(T) && new_n <= (std::numeric_limits<size_t>::max)() / sizeof(T));
+    RAPIDJSON_NOEXCEPT_ASSERT(old_n <= (std::numeric_limits<size_t>::max)() / sizeof(T) &&
+                              new_n <= (std::numeric_limits<size_t>::max)() / sizeof(T));
     return static_cast<T*>(a.Realloc(old_p, old_n * sizeof(T), new_n * sizeof(T)));
 }
 
-template<typename T, typename A>
-inline T *Malloc(A& a, size_t n = 1)
+template <typename T, typename A>
+inline T* Malloc(A& a, size_t n = 1)
 {
     return Realloc<T, A>(a, NULL, 0, n);
 }
 
-template<typename T, typename A>
-inline void Free(A& a, T *p, size_t n = 1)
+template <typename T, typename A>
+inline void Free(A& a, T* p, size_t n = 1)
 {
     static_cast<void>(Realloc<T, A>(a, p, n, 0));
 }
@@ -456,8 +493,7 @@ RAPIDJSON_DIAG_OFF(effc++) // std::allocator can safely be inherited
 #endif
 
 template <typename T, typename BaseAllocator = CrtAllocator>
-class StdAllocator :
-    public std::allocator<T>
+class StdAllocator : public std::allocator<T>
 {
     typedef std::allocator<T> allocator_type;
 #if RAPIDJSON_HAS_CXX11
@@ -466,113 +502,90 @@ class StdAllocator :
     typedef allocator_type traits_type;
 #endif
 
-public:
+    public:
     typedef BaseAllocator BaseAllocatorType;
 
-    StdAllocator() RAPIDJSON_NOEXCEPT :
-        allocator_type(),
-        baseAllocator_()
-    { }
+    StdAllocator() RAPIDJSON_NOEXCEPT : allocator_type(), baseAllocator_() {}
 
-    StdAllocator(const StdAllocator& rhs) RAPIDJSON_NOEXCEPT :
-        allocator_type(rhs),
-        baseAllocator_(rhs.baseAllocator_)
-    { }
+    StdAllocator(const StdAllocator& rhs) RAPIDJSON_NOEXCEPT : allocator_type(rhs),
+                                                               baseAllocator_(rhs.baseAllocator_)
+    {
+    }
 
-    template<typename U>
-    StdAllocator(const StdAllocator<U, BaseAllocator>& rhs) RAPIDJSON_NOEXCEPT :
-        allocator_type(rhs),
-        baseAllocator_(rhs.baseAllocator_)
-    { }
+    template <typename U>
+    StdAllocator(const StdAllocator<U, BaseAllocator>& rhs) RAPIDJSON_NOEXCEPT
+        : allocator_type(rhs),
+          baseAllocator_(rhs.baseAllocator_)
+    {
+    }
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
-    StdAllocator(StdAllocator&& rhs) RAPIDJSON_NOEXCEPT :
-        allocator_type(std::move(rhs)),
-        baseAllocator_(std::move(rhs.baseAllocator_))
-    { }
+    StdAllocator(StdAllocator&& rhs) RAPIDJSON_NOEXCEPT
+        : allocator_type(std::move(rhs)),
+          baseAllocator_(std::move(rhs.baseAllocator_))
+    {
+    }
 #endif
 #if RAPIDJSON_HAS_CXX11
     using propagate_on_container_move_assignment = std::true_type;
-    using propagate_on_container_swap = std::true_type;
+    using propagate_on_container_swap            = std::true_type;
 #endif
 
     /* implicit */
-    StdAllocator(const BaseAllocator& baseAllocator) RAPIDJSON_NOEXCEPT :
-        allocator_type(),
-        baseAllocator_(baseAllocator)
-    { }
+    StdAllocator(const BaseAllocator& baseAllocator) RAPIDJSON_NOEXCEPT
+        : allocator_type(),
+          baseAllocator_(baseAllocator)
+    {
+    }
 
-    ~StdAllocator() RAPIDJSON_NOEXCEPT
-    { }
+    ~StdAllocator() RAPIDJSON_NOEXCEPT {}
 
-    template<typename U>
-    struct rebind {
+    template <typename U>
+    struct rebind
+    {
         typedef StdAllocator<U, BaseAllocator> other;
     };
 
-    typedef typename traits_type::size_type         size_type;
-    typedef typename traits_type::difference_type   difference_type;
+    typedef typename traits_type::size_type size_type;
+    typedef typename traits_type::difference_type difference_type;
 
-    typedef typename traits_type::value_type        value_type;
-    typedef typename traits_type::pointer           pointer;
-    typedef typename traits_type::const_pointer     const_pointer;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::pointer pointer;
+    typedef typename traits_type::const_pointer const_pointer;
 
 #if RAPIDJSON_HAS_CXX11
 
-    typedef typename std::add_lvalue_reference<value_type>::type &reference;
-    typedef typename std::add_lvalue_reference<typename std::add_const<value_type>::type>::type &const_reference;
+    typedef typename std::add_lvalue_reference<value_type>::type& reference;
+    typedef typename std::add_lvalue_reference<typename std::add_const<value_type>::type>::type&
+        const_reference;
 
-    pointer address(reference r) const RAPIDJSON_NOEXCEPT
-    {
-        return std::addressof(r);
-    }
-    const_pointer address(const_reference r) const RAPIDJSON_NOEXCEPT
-    {
-        return std::addressof(r);
-    }
+    pointer address(reference r) const RAPIDJSON_NOEXCEPT { return std::addressof(r); }
+    const_pointer address(const_reference r) const RAPIDJSON_NOEXCEPT { return std::addressof(r); }
 
-    size_type max_size() const RAPIDJSON_NOEXCEPT
-    {
-        return traits_type::max_size(*this);
-    }
+    size_type max_size() const RAPIDJSON_NOEXCEPT { return traits_type::max_size(*this); }
 
-    template <typename ...Args>
+    template <typename... Args>
     void construct(pointer p, Args&&... args)
     {
         traits_type::construct(*this, p, std::forward<Args>(args)...);
     }
-    void destroy(pointer p)
-    {
-        traits_type::destroy(*this, p);
-    }
+    void destroy(pointer p) { traits_type::destroy(*this, p); }
 
 #else // !RAPIDJSON_HAS_CXX11
 
-    typedef typename allocator_type::reference       reference;
+    typedef typename allocator_type::reference reference;
     typedef typename allocator_type::const_reference const_reference;
 
-    pointer address(reference r) const RAPIDJSON_NOEXCEPT
-    {
-        return allocator_type::address(r);
-    }
+    pointer address(reference r) const RAPIDJSON_NOEXCEPT { return allocator_type::address(r); }
     const_pointer address(const_reference r) const RAPIDJSON_NOEXCEPT
     {
         return allocator_type::address(r);
     }
 
-    size_type max_size() const RAPIDJSON_NOEXCEPT
-    {
-        return allocator_type::max_size();
-    }
+    size_type max_size() const RAPIDJSON_NOEXCEPT { return allocator_type::max_size(); }
 
-    void construct(pointer p, const_reference r)
-    {
-        allocator_type::construct(p, r);
-    }
-    void destroy(pointer p)
-    {
-        allocator_type::destroy(p);
-    }
+    void construct(pointer p, const_reference r) { allocator_type::construct(p, r); }
+    void destroy(pointer p) { allocator_type::destroy(p); }
 
 #endif // !RAPIDJSON_HAS_CXX11
 
@@ -587,47 +600,35 @@ public:
         RAPIDJSON_NAMESPACE::Free<U>(baseAllocator_, p, n);
     }
 
-    pointer allocate(size_type n = 1, const void* = 0)
-    {
-        return allocate<value_type>(n);
-    }
-    void deallocate(pointer p, size_type n = 1)
-    {
-        deallocate<value_type>(p, n);
-    }
+    pointer allocate(size_type n = 1, const void* = 0) { return allocate<value_type>(n); }
+    void deallocate(pointer p, size_type n = 1) { deallocate<value_type>(p, n); }
 
 #if RAPIDJSON_HAS_CXX11
     using is_always_equal = std::is_empty<BaseAllocator>;
 #endif
 
-    template<typename U>
+    template <typename U>
     bool operator==(const StdAllocator<U, BaseAllocator>& rhs) const RAPIDJSON_NOEXCEPT
     {
         return baseAllocator_ == rhs.baseAllocator_;
     }
-    template<typename U>
+    template <typename U>
     bool operator!=(const StdAllocator<U, BaseAllocator>& rhs) const RAPIDJSON_NOEXCEPT
     {
         return !operator==(rhs);
     }
 
     //! rapidjson Allocator concept
-    static const bool kNeedFree = BaseAllocator::kNeedFree;
+    static const bool kNeedFree   = BaseAllocator::kNeedFree;
     static const bool kRefCounted = internal::IsRefCounted<BaseAllocator>::Value;
-    void* Malloc(size_t size)
-    {
-        return baseAllocator_.Malloc(size);
-    }
+    void* Malloc(size_t size) { return baseAllocator_.Malloc(size); }
     void* Realloc(void* originalPtr, size_t originalSize, size_t newSize)
     {
         return baseAllocator_.Realloc(originalPtr, originalSize, newSize);
     }
-    static void Free(void *ptr) RAPIDJSON_NOEXCEPT
-    {
-        BaseAllocator::Free(ptr);
-    }
+    static void Free(void* ptr) RAPIDJSON_NOEXCEPT { BaseAllocator::Free(ptr); }
 
-private:
+    private:
     template <typename, typename>
     friend class StdAllocator; // access to StdAllocator<!T>.*
 
@@ -636,47 +637,45 @@ private:
 
 #if !RAPIDJSON_HAS_CXX17 // std::allocator<void> deprecated in C++17
 template <typename BaseAllocator>
-class StdAllocator<void, BaseAllocator> :
-    public std::allocator<void>
+class StdAllocator<void, BaseAllocator> : public std::allocator<void>
 {
     typedef std::allocator<void> allocator_type;
 
-public:
+    public:
     typedef BaseAllocator BaseAllocatorType;
 
-    StdAllocator() RAPIDJSON_NOEXCEPT :
-        allocator_type(),
-        baseAllocator_()
-    { }
+    StdAllocator() RAPIDJSON_NOEXCEPT : allocator_type(), baseAllocator_() {}
 
-    StdAllocator(const StdAllocator& rhs) RAPIDJSON_NOEXCEPT :
-        allocator_type(rhs),
-        baseAllocator_(rhs.baseAllocator_)
-    { }
+    StdAllocator(const StdAllocator& rhs) RAPIDJSON_NOEXCEPT : allocator_type(rhs),
+                                                               baseAllocator_(rhs.baseAllocator_)
+    {
+    }
 
-    template<typename U>
-    StdAllocator(const StdAllocator<U, BaseAllocator>& rhs) RAPIDJSON_NOEXCEPT :
-        allocator_type(rhs),
-        baseAllocator_(rhs.baseAllocator_)
-    { }
+    template <typename U>
+    StdAllocator(const StdAllocator<U, BaseAllocator>& rhs) RAPIDJSON_NOEXCEPT
+        : allocator_type(rhs),
+          baseAllocator_(rhs.baseAllocator_)
+    {
+    }
 
     /* implicit */
-    StdAllocator(const BaseAllocator& baseAllocator) RAPIDJSON_NOEXCEPT :
-        allocator_type(),
-        baseAllocator_(baseAllocator)
-    { }
+    StdAllocator(const BaseAllocator& baseAllocator) RAPIDJSON_NOEXCEPT
+        : allocator_type(),
+          baseAllocator_(baseAllocator)
+    {
+    }
 
-    ~StdAllocator() RAPIDJSON_NOEXCEPT
-    { }
+    ~StdAllocator() RAPIDJSON_NOEXCEPT {}
 
-    template<typename U>
-    struct rebind {
+    template <typename U>
+    struct rebind
+    {
         typedef StdAllocator<U, BaseAllocator> other;
     };
 
     typedef typename allocator_type::value_type value_type;
 
-private:
+    private:
     template <typename, typename>
     friend class StdAllocator; // access to StdAllocator<!T>.*
 
diff --git a/include/rapidjson/cursorstreamwrapper.h b/include/rapidjson/cursorstreamwrapper.h
index fd6513db14..3cdb901be6 100644
--- a/include/rapidjson/cursorstreamwrapper.h
+++ b/include/rapidjson/cursorstreamwrapper.h
@@ -24,33 +24,39 @@ RAPIDJSON_DIAG_OFF(effc++)
 
 #if defined(_MSC_VER) && _MSC_VER <= 1800
 RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(4702)  // unreachable code
-RAPIDJSON_DIAG_OFF(4512)  // assignment operator could not be generated
+RAPIDJSON_DIAG_OFF(4702) // unreachable code
+RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
 
-
 //! Cursor stream wrapper for counting line and column number if error exists.
 /*!
     \tparam InputStream     Any stream that implements Stream Concept
 */
-template <typename InputStream, typename Encoding = UTF8<> >
-class CursorStreamWrapper : public GenericStreamWrapper<InputStream, Encoding> {
-public:
+template <typename InputStream, typename Encoding = UTF8<>>
+class CursorStreamWrapper : public GenericStreamWrapper<InputStream, Encoding>
+{
+    public:
     typedef typename Encoding::Ch Ch;
 
-    CursorStreamWrapper(InputStream& is):
-        GenericStreamWrapper<InputStream, Encoding>(is), line_(1), col_(0) {}
+    CursorStreamWrapper(InputStream& is)
+        : GenericStreamWrapper<InputStream, Encoding>(is), line_(1), col_(0)
+    {
+    }
 
     // counting line and column number
-    Ch Take() {
+    Ch Take()
+    {
         Ch ch = this->is_.Take();
-        if(ch == '\n') {
-            line_ ++;
+        if(ch == '\n')
+        {
+            line_++;
             col_ = 0;
-        } else {
-            col_ ++;
+        }
+        else
+        {
+            col_++;
         }
         return ch;
     }
@@ -60,9 +66,9 @@ public:
     //! Get the error column number, if error exists.
     size_t GetColumn() const { return col_; }
 
-private:
-    size_t line_;   //!< Current Line
-    size_t col_;    //!< Current Column
+    private:
+    size_t line_; //!< Current Line
+    size_t col_;  //!< Current Column
 };
 
 #if defined(_MSC_VER) && _MSC_VER <= 1800
diff --git a/include/rapidjson/document.h b/include/rapidjson/document.h
index 4b2d723224..0b12550a00 100644
--- a/include/rapidjson/document.h
+++ b/include/rapidjson/document.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_DOCUMENT_H_
@@ -22,7 +22,7 @@
 #include "internal/strfunc.h"
 #include "memorystream.h"
 #include "encodedstream.h"
-#include <new>      // placement new
+#include <new> // placement new
 #include <limits>
 #ifdef __cpp_lib_three_way_comparison
 #include <compare>
@@ -31,8 +31,8 @@
 RAPIDJSON_DIAG_PUSH
 #ifdef __clang__
 RAPIDJSON_DIAG_OFF(padded)
-RAPIDJSON_DIAG_OFF(switch-enum)
-RAPIDJSON_DIAG_OFF(c++98-compat)
+RAPIDJSON_DIAG_OFF(switch - enum)
+RAPIDJSON_DIAG_OFF(c++ 98 - compat)
 #elif defined(_MSC_VER)
 RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
 RAPIDJSON_DIAG_OFF(4244) // conversion from kXxxFlags to 'uint16_t', possible loss of data
@@ -75,7 +75,8 @@ class GenericDocument;
     User can define this to use CrtAllocator or MemoryPoolAllocator.
 */
 #ifndef RAPIDJSON_DEFAULT_ALLOCATOR
-#define RAPIDJSON_DEFAULT_ALLOCATOR ::RAPIDJSON_NAMESPACE::MemoryPoolAllocator<::RAPIDJSON_NAMESPACE::CrtAllocator>
+#define RAPIDJSON_DEFAULT_ALLOCATOR \
+    ::RAPIDJSON_NAMESPACE::MemoryPoolAllocator<::RAPIDJSON_NAMESPACE::CrtAllocator>
 #endif
 
 /*! \def RAPIDJSON_DEFAULT_STACK_ALLOCATOR
@@ -113,47 +114,52 @@ class GenericDocument;
 //! Name-value pair in a JSON object value.
 /*!
     This class was internal to GenericValue. It used to be a inner struct.
-    But a compiler (IBM XL C/C++ for AIX) have reported to have problem with that so it moved as a namespace scope struct.
-    https://code.google.com/p/rapidjson/issues/detail?id=64
+    But a compiler (IBM XL C/C++ for AIX) have reported to have problem with that so it moved as a
+   namespace scope struct. https://code.google.com/p/rapidjson/issues/detail?id=64
 */
-template <typename Encoding, typename Allocator> 
-class GenericMember {
-public:
-    GenericValue<Encoding, Allocator> name;     //!< name of member (must be a string)
-    GenericValue<Encoding, Allocator> value;    //!< value of member.
+template <typename Encoding, typename Allocator>
+class GenericMember
+{
+    public:
+    GenericValue<Encoding, Allocator> name;  //!< name of member (must be a string)
+    GenericValue<Encoding, Allocator> value; //!< value of member.
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
     //! Move constructor in C++11
-    GenericMember(GenericMember&& rhs) RAPIDJSON_NOEXCEPT
-        : name(std::move(rhs.name)),
-          value(std::move(rhs.value))
+    GenericMember(GenericMember&& rhs) RAPIDJSON_NOEXCEPT : name(std::move(rhs.name)),
+                                                            value(std::move(rhs.value))
     {
     }
 
     //! Move assignment in C++11
-    GenericMember& operator=(GenericMember&& rhs) RAPIDJSON_NOEXCEPT {
+    GenericMember& operator=(GenericMember&& rhs) RAPIDJSON_NOEXCEPT
+    {
         return *this = static_cast<GenericMember&>(rhs);
     }
 #endif
 
     //! Assignment with move semantics.
-    /*! \param rhs Source of the assignment. Its name and value will become a null value after assignment.
-    */
-    GenericMember& operator=(GenericMember& rhs) RAPIDJSON_NOEXCEPT {
-        if (RAPIDJSON_LIKELY(this != &rhs)) {
-            name = rhs.name;
+    /*! \param rhs Source of the assignment. Its name and value will become a null value after
+     * assignment.
+     */
+    GenericMember& operator=(GenericMember& rhs) RAPIDJSON_NOEXCEPT
+    {
+        if(RAPIDJSON_LIKELY(this != &rhs))
+        {
+            name  = rhs.name;
             value = rhs.value;
         }
         return *this;
     }
 
     // swap() for std::sort() and other potential use in STL.
-    friend inline void swap(GenericMember& a, GenericMember& b) RAPIDJSON_NOEXCEPT {
+    friend inline void swap(GenericMember& a, GenericMember& b) RAPIDJSON_NOEXCEPT
+    {
         a.name.Swap(b.name);
         a.value.Swap(b.value);
     }
 
-private:
+    private:
     //! Copy constructor is not permitted.
     GenericMember(const GenericMember& rhs);
 };
@@ -166,8 +172,9 @@ private:
 //! (Constant) member iterator for a JSON object value
 /*!
     \tparam Const Is this a constant iterator?
-    \tparam Encoding    Encoding of the value. (Even non-string values need to have the same encoding in a document)
-    \tparam Allocator   Allocator type for allocating memory of object, array and string.
+    \tparam Encoding    Encoding of the value. (Even non-string values need to have the same
+   encoding in a document) \tparam Allocator   Allocator type for allocating memory of object, array
+   and string.
 
     This class implements a Random Access Iterator for GenericMember elements
     of a GenericValue, see ISO/IEC 14882:2003(E) C++ standard, 24.1 [lib.iterator.requirements].
@@ -183,35 +190,37 @@ private:
     \see GenericMember, GenericValue::MemberIterator, GenericValue::ConstMemberIterator
  */
 template <bool Const, typename Encoding, typename Allocator>
-class GenericMemberIterator {
+class GenericMemberIterator
+{
 
-    friend class GenericValue<Encoding,Allocator>;
-    template <bool, typename, typename> friend class GenericMemberIterator;
+    friend class GenericValue<Encoding, Allocator>;
+    template <bool, typename, typename>
+    friend class GenericMemberIterator;
 
-    typedef GenericMember<Encoding,Allocator> PlainType;
-    typedef typename internal::MaybeAddConst<Const,PlainType>::Type ValueType;
+    typedef GenericMember<Encoding, Allocator> PlainType;
+    typedef typename internal::MaybeAddConst<Const, PlainType>::Type ValueType;
 
-public:
+    public:
     //! Iterator type itself
     typedef GenericMemberIterator Iterator;
     //! Constant iterator type
-    typedef GenericMemberIterator<true,Encoding,Allocator>  ConstIterator;
+    typedef GenericMemberIterator<true, Encoding, Allocator> ConstIterator;
     //! Non-constant iterator type
-    typedef GenericMemberIterator<false,Encoding,Allocator> NonConstIterator;
+    typedef GenericMemberIterator<false, Encoding, Allocator> NonConstIterator;
 
     /** \name std::iterator_traits support */
     //@{
-    typedef ValueType      value_type;
-    typedef ValueType *    pointer;
-    typedef ValueType &    reference;
+    typedef ValueType value_type;
+    typedef ValueType* pointer;
+    typedef ValueType& reference;
     typedef std::ptrdiff_t difference_type;
     typedef std::random_access_iterator_tag iterator_category;
     //@}
 
     //! Pointer to (const) GenericMember
-    typedef pointer         Pointer;
+    typedef pointer Pointer;
     //! Reference to (const) GenericMember
-    typedef reference       Reference;
+    typedef reference Reference;
     //! Signed integer type (e.g. \c ptrdiff_t)
     typedef difference_type DifferenceType;
 
@@ -237,51 +246,110 @@ public:
             constructor effectively defines a regular copy-constructor.
             Otherwise, the copy constructor is implicitly defined.
     */
-    GenericMemberIterator(const NonConstIterator & it) : ptr_(it.ptr_) {}
-    Iterator& operator=(const NonConstIterator & it) { ptr_ = it.ptr_; return *this; }
+    GenericMemberIterator(const NonConstIterator& it) : ptr_(it.ptr_) {}
+    Iterator& operator=(const NonConstIterator& it)
+    {
+        ptr_ = it.ptr_;
+        return *this;
+    }
 
     //! @name stepping
     //@{
-    Iterator& operator++(){ ++ptr_; return *this; }
-    Iterator& operator--(){ --ptr_; return *this; }
-    Iterator  operator++(int){ Iterator old(*this); ++ptr_; return old; }
-    Iterator  operator--(int){ Iterator old(*this); --ptr_; return old; }
+    Iterator& operator++()
+    {
+        ++ptr_;
+        return *this;
+    }
+    Iterator& operator--()
+    {
+        --ptr_;
+        return *this;
+    }
+    Iterator operator++(int)
+    {
+        Iterator old(*this);
+        ++ptr_;
+        return old;
+    }
+    Iterator operator--(int)
+    {
+        Iterator old(*this);
+        --ptr_;
+        return old;
+    }
     //@}
 
     //! @name increment/decrement
     //@{
-    Iterator operator+(DifferenceType n) const { return Iterator(ptr_+n); }
-    Iterator operator-(DifferenceType n) const { return Iterator(ptr_-n); }
+    Iterator operator+(DifferenceType n) const { return Iterator(ptr_ + n); }
+    Iterator operator-(DifferenceType n) const { return Iterator(ptr_ - n); }
 
-    Iterator& operator+=(DifferenceType n) { ptr_+=n; return *this; }
-    Iterator& operator-=(DifferenceType n) { ptr_-=n; return *this; }
+    Iterator& operator+=(DifferenceType n)
+    {
+        ptr_ += n;
+        return *this;
+    }
+    Iterator& operator-=(DifferenceType n)
+    {
+        ptr_ -= n;
+        return *this;
+    }
     //@}
 
     //! @name relations
     //@{
-    template <bool Const_> bool operator==(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const { return ptr_ == that.ptr_; }
-    template <bool Const_> bool operator!=(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const { return ptr_ != that.ptr_; }
-    template <bool Const_> bool operator<=(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const { return ptr_ <= that.ptr_; }
-    template <bool Const_> bool operator>=(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const { return ptr_ >= that.ptr_; }
-    template <bool Const_> bool operator< (const GenericMemberIterator<Const_, Encoding, Allocator>& that) const { return ptr_ < that.ptr_; }
-    template <bool Const_> bool operator> (const GenericMemberIterator<Const_, Encoding, Allocator>& that) const { return ptr_ > that.ptr_; }
+    template <bool Const_>
+    bool operator==(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const
+    {
+        return ptr_ == that.ptr_;
+    }
+    template <bool Const_>
+    bool operator!=(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const
+    {
+        return ptr_ != that.ptr_;
+    }
+    template <bool Const_>
+    bool operator<=(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const
+    {
+        return ptr_ <= that.ptr_;
+    }
+    template <bool Const_>
+    bool operator>=(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const
+    {
+        return ptr_ >= that.ptr_;
+    }
+    template <bool Const_>
+    bool operator<(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const
+    {
+        return ptr_ < that.ptr_;
+    }
+    template <bool Const_>
+    bool operator>(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const
+    {
+        return ptr_ > that.ptr_;
+    }
 
 #ifdef __cpp_lib_three_way_comparison
-    template <bool Const_> std::strong_ordering operator<=>(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const { return ptr_ <=> that.ptr_; }
+    template <bool Const_>
+    std::strong_ordering
+    operator<=>(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const
+    {
+        return ptr_ <=> that.ptr_;
+    }
 #endif
     //@}
 
     //! @name dereference
     //@{
     Reference operator*() const { return *ptr_; }
-    Pointer   operator->() const { return ptr_; }
+    Pointer operator->() const { return ptr_; }
     Reference operator[](DifferenceType n) const { return ptr_[n]; }
     //@}
 
     //! Distance
-    DifferenceType operator-(ConstIterator that) const { return ptr_-that.ptr_; }
+    DifferenceType operator-(ConstIterator that) const { return ptr_ - that.ptr_; }
 
-private:
+    private:
     //! Internal constructor from plain pointer
     explicit GenericMemberIterator(Pointer p) : ptr_(p) {}
 
@@ -297,17 +365,19 @@ class GenericMemberIterator;
 
 //! non-const GenericMemberIterator
 template <typename Encoding, typename Allocator>
-class GenericMemberIterator<false,Encoding,Allocator> {
-public:
+class GenericMemberIterator<false, Encoding, Allocator>
+{
+    public:
     //! use plain pointer as iterator type
-    typedef GenericMember<Encoding,Allocator>* Iterator;
+    typedef GenericMember<Encoding, Allocator>* Iterator;
 };
 //! const GenericMemberIterator
 template <typename Encoding, typename Allocator>
-class GenericMemberIterator<true,Encoding,Allocator> {
-public:
+class GenericMemberIterator<true, Encoding, Allocator>
+{
+    public:
     //! use plain const pointer as iterator type
-    typedef const GenericMember<Encoding,Allocator>* Iterator;
+    typedef const GenericMember<Encoding, Allocator>* Iterator;
 };
 
 #endif // RAPIDJSON_NOMEMBERITERATORCLASS
@@ -342,8 +412,9 @@ public:
 
     \see StringRef, GenericValue::SetString
 */
-template<typename CharType>
-struct GenericStringRef {
+template <typename CharType>
+struct GenericStringRef
+{
     typedef CharType Ch; //!< character type of the string
 
     //! Create string reference from \c const character array
@@ -371,9 +442,10 @@ struct GenericStringRef {
             GenericValue instead.
      */
 #endif
-    template<SizeType N>
-    GenericStringRef(const CharType (&str)[N]) RAPIDJSON_NOEXCEPT
-        : s(str), length(N-1) {}
+    template <SizeType N>
+    GenericStringRef(const CharType (&str)[N]) RAPIDJSON_NOEXCEPT : s(str), length(N - 1)
+    {
+    }
 
     //! Explicitly create string reference from \c const character pointer
 #ifndef __clang__ // -Wdocumentation
@@ -396,31 +468,34 @@ struct GenericStringRef {
             GenericValue instead.
      */
 #endif
-    explicit GenericStringRef(const CharType* str)
-        : s(str), length(NotNullStrLen(str)) {}
+    explicit GenericStringRef(const CharType* str) : s(str), length(NotNullStrLen(str)) {}
 
     //! Create constant string reference from pointer and length
 #ifndef __clang__ // -Wdocumentation
-    /*! \param str constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
-        \param len length of the string, excluding the trailing NULL terminator
+    /*! \param str constant string, lifetime assumed to be longer than the use of the string in e.g.
+       a GenericValue \param len length of the string, excluding the trailing NULL terminator
 
         \post \ref s == str && \ref length == len
         \note Constant complexity.
      */
 #endif
     GenericStringRef(const CharType* str, SizeType len)
-        : s(RAPIDJSON_LIKELY(str) ? str : emptyString), length(len) { RAPIDJSON_ASSERT(str != 0 || len == 0u); }
+        : s(RAPIDJSON_LIKELY(str) ? str : emptyString), length(len)
+    {
+        RAPIDJSON_ASSERT(str != 0 || len == 0u);
+    }
 
     GenericStringRef(const GenericStringRef& rhs) : s(rhs.s), length(rhs.length) {}
 
     //! implicit conversion to plain CharType pointer
-    operator const Ch *() const { return s; }
+    operator const Ch*() const { return s; }
 
-    const Ch* const s; //!< plain CharType pointer
+    const Ch* const s;     //!< plain CharType pointer
     const SizeType length; //!< length of the string (excluding the trailing NULL terminator)
 
-private:
-    SizeType NotNullStrLen(const CharType* str) {
+    private:
+    SizeType NotNullStrLen(const CharType* str)
+    {
         RAPIDJSON_ASSERT(str != 0);
         return internal::StrLen(str);
     }
@@ -429,14 +504,14 @@ private:
     static const Ch emptyString[];
 
     //! Disallow construction from non-const array
-    template<SizeType N>
+    template <SizeType N>
     GenericStringRef(CharType (&str)[N]) /* = delete */;
     //! Copy assignment operator not permitted - immutable type
     GenericStringRef& operator=(const GenericStringRef& rhs) /* = delete */;
 };
 
-template<typename CharType>
-const CharType GenericStringRef<CharType>::emptyString[] = { CharType() };
+template <typename CharType>
+const CharType GenericStringRef<CharType>::emptyString[] = {CharType()};
 
 //! Mark a character pointer as constant string
 /*! Mark a plain character pointer as a "string literal".  This function
@@ -444,14 +519,16 @@ const CharType GenericStringRef<CharType>::emptyString[] = { CharType() };
     value in a JSON GenericValue object, if the string's lifetime is known
     to be valid long enough.
     \tparam CharType Character type of the string
-    \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
-    \return GenericStringRef string reference object
-    \relatesalso GenericStringRef
+    \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a
+   GenericValue \return GenericStringRef string reference object \relatesalso GenericStringRef
 
-    \see GenericValue::GenericValue(StringRefType), GenericValue::operator=(StringRefType), GenericValue::SetString(StringRefType), GenericValue::PushBack(StringRefType, Allocator&), GenericValue::AddMember
+    \see GenericValue::GenericValue(StringRefType), GenericValue::operator=(StringRefType),
+   GenericValue::SetString(StringRefType), GenericValue::PushBack(StringRefType, Allocator&),
+   GenericValue::AddMember
 */
-template<typename CharType>
-inline GenericStringRef<CharType> StringRef(const CharType* str) {
+template <typename CharType>
+inline GenericStringRef<CharType> StringRef(const CharType* str)
+{
     return GenericStringRef<CharType>(str);
 }
 
@@ -465,13 +542,13 @@ inline GenericStringRef<CharType> StringRef(const CharType* str) {
     supports string containing null characters.
 
     \tparam CharType character type of the string
-    \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
-    \param length The length of source string.
-    \return GenericStringRef string reference object
-    \relatesalso GenericStringRef
+    \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a
+   GenericValue \param length The length of source string. \return GenericStringRef string reference
+   object \relatesalso GenericStringRef
 */
-template<typename CharType>
-inline GenericStringRef<CharType> StringRef(const CharType* str, size_t length) {
+template <typename CharType>
+inline GenericStringRef<CharType> StringRef(const CharType* str, size_t length)
+{
     return GenericStringRef<CharType>(str, SizeType(length));
 }
 
@@ -483,13 +560,13 @@ inline GenericStringRef<CharType> StringRef(const CharType* str, size_t length)
     to be valid long enough.
 
     \tparam CharType character type of the string
-    \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
-    \return GenericStringRef string reference object
-    \relatesalso GenericStringRef
-    \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
+    \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a
+   GenericValue \return GenericStringRef string reference object \relatesalso GenericStringRef \note
+   Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
 */
-template<typename CharType>
-inline GenericStringRef<CharType> StringRef(const std::basic_string<CharType>& str) {
+template <typename CharType>
+inline GenericStringRef<CharType> StringRef(const std::basic_string<CharType>& str)
+{
     return GenericStringRef<CharType>(str.data(), SizeType(str.size()));
 }
 #endif
@@ -499,14 +576,24 @@ inline GenericStringRef<CharType> StringRef(const std::basic_string<CharType>& s
 namespace internal {
 
 template <typename T, typename Encoding = void, typename Allocator = void>
-struct IsGenericValueImpl : FalseType {};
+struct IsGenericValueImpl : FalseType
+{
+};
 
 // select candidates according to nested encoding and allocator types
-template <typename T> struct IsGenericValueImpl<T, typename Void<typename T::EncodingType>::Type, typename Void<typename T::AllocatorType>::Type>
-    : IsBaseOf<GenericValue<typename T::EncodingType, typename T::AllocatorType>, T>::Type {};
+template <typename T>
+struct IsGenericValueImpl<T,
+                          typename Void<typename T::EncodingType>::Type,
+                          typename Void<typename T::AllocatorType>::Type>
+    : IsBaseOf<GenericValue<typename T::EncodingType, typename T::AllocatorType>, T>::Type
+{
+};
 
 // helper to match arbitrary GenericValue instantiations, including derived classes
-template <typename T> struct IsGenericValue : IsGenericValueImpl<T>::Type {};
+template <typename T>
+struct IsGenericValue : IsGenericValueImpl<T>::Type
+{
+};
 
 } // namespace internal
 
@@ -516,130 +603,193 @@ template <typename T> struct IsGenericValue : IsGenericValueImpl<T>::Type {};
 namespace internal {
 
 template <typename ValueType, typename T>
-struct TypeHelper {};
+struct TypeHelper
+{
+};
 
-template<typename ValueType> 
-struct TypeHelper<ValueType, bool> {
+template <typename ValueType>
+struct TypeHelper<ValueType, bool>
+{
     static bool Is(const ValueType& v) { return v.IsBool(); }
     static bool Get(const ValueType& v) { return v.GetBool(); }
     static ValueType& Set(ValueType& v, bool data) { return v.SetBool(data); }
-    static ValueType& Set(ValueType& v, bool data, typename ValueType::AllocatorType&) { return v.SetBool(data); }
+    static ValueType& Set(ValueType& v, bool data, typename ValueType::AllocatorType&)
+    {
+        return v.SetBool(data);
+    }
 };
 
-template<typename ValueType> 
-struct TypeHelper<ValueType, int> {
+template <typename ValueType>
+struct TypeHelper<ValueType, int>
+{
     static bool Is(const ValueType& v) { return v.IsInt(); }
     static int Get(const ValueType& v) { return v.GetInt(); }
     static ValueType& Set(ValueType& v, int data) { return v.SetInt(data); }
-    static ValueType& Set(ValueType& v, int data, typename ValueType::AllocatorType&) { return v.SetInt(data); }
+    static ValueType& Set(ValueType& v, int data, typename ValueType::AllocatorType&)
+    {
+        return v.SetInt(data);
+    }
 };
 
-template<typename ValueType> 
-struct TypeHelper<ValueType, unsigned> {
+template <typename ValueType>
+struct TypeHelper<ValueType, unsigned>
+{
     static bool Is(const ValueType& v) { return v.IsUint(); }
     static unsigned Get(const ValueType& v) { return v.GetUint(); }
     static ValueType& Set(ValueType& v, unsigned data) { return v.SetUint(data); }
-    static ValueType& Set(ValueType& v, unsigned data, typename ValueType::AllocatorType&) { return v.SetUint(data); }
+    static ValueType& Set(ValueType& v, unsigned data, typename ValueType::AllocatorType&)
+    {
+        return v.SetUint(data);
+    }
 };
 
 #ifdef _MSC_VER
 RAPIDJSON_STATIC_ASSERT(sizeof(long) == sizeof(int));
-template<typename ValueType>
-struct TypeHelper<ValueType, long> {
+template <typename ValueType>
+struct TypeHelper<ValueType, long>
+{
     static bool Is(const ValueType& v) { return v.IsInt(); }
     static long Get(const ValueType& v) { return v.GetInt(); }
     static ValueType& Set(ValueType& v, long data) { return v.SetInt(data); }
-    static ValueType& Set(ValueType& v, long data, typename ValueType::AllocatorType&) { return v.SetInt(data); }
+    static ValueType& Set(ValueType& v, long data, typename ValueType::AllocatorType&)
+    {
+        return v.SetInt(data);
+    }
 };
 
 RAPIDJSON_STATIC_ASSERT(sizeof(unsigned long) == sizeof(unsigned));
-template<typename ValueType>
-struct TypeHelper<ValueType, unsigned long> {
+template <typename ValueType>
+struct TypeHelper<ValueType, unsigned long>
+{
     static bool Is(const ValueType& v) { return v.IsUint(); }
     static unsigned long Get(const ValueType& v) { return v.GetUint(); }
     static ValueType& Set(ValueType& v, unsigned long data) { return v.SetUint(data); }
-    static ValueType& Set(ValueType& v, unsigned long data, typename ValueType::AllocatorType&) { return v.SetUint(data); }
+    static ValueType& Set(ValueType& v, unsigned long data, typename ValueType::AllocatorType&)
+    {
+        return v.SetUint(data);
+    }
 };
 #endif
 
-template<typename ValueType> 
-struct TypeHelper<ValueType, int64_t> {
+template <typename ValueType>
+struct TypeHelper<ValueType, int64_t>
+{
     static bool Is(const ValueType& v) { return v.IsInt64(); }
     static int64_t Get(const ValueType& v) { return v.GetInt64(); }
     static ValueType& Set(ValueType& v, int64_t data) { return v.SetInt64(data); }
-    static ValueType& Set(ValueType& v, int64_t data, typename ValueType::AllocatorType&) { return v.SetInt64(data); }
+    static ValueType& Set(ValueType& v, int64_t data, typename ValueType::AllocatorType&)
+    {
+        return v.SetInt64(data);
+    }
 };
 
-template<typename ValueType> 
-struct TypeHelper<ValueType, uint64_t> {
+template <typename ValueType>
+struct TypeHelper<ValueType, uint64_t>
+{
     static bool Is(const ValueType& v) { return v.IsUint64(); }
     static uint64_t Get(const ValueType& v) { return v.GetUint64(); }
     static ValueType& Set(ValueType& v, uint64_t data) { return v.SetUint64(data); }
-    static ValueType& Set(ValueType& v, uint64_t data, typename ValueType::AllocatorType&) { return v.SetUint64(data); }
+    static ValueType& Set(ValueType& v, uint64_t data, typename ValueType::AllocatorType&)
+    {
+        return v.SetUint64(data);
+    }
 };
 
-template<typename ValueType> 
-struct TypeHelper<ValueType, double> {
+template <typename ValueType>
+struct TypeHelper<ValueType, double>
+{
     static bool Is(const ValueType& v) { return v.IsDouble(); }
     static double Get(const ValueType& v) { return v.GetDouble(); }
     static ValueType& Set(ValueType& v, double data) { return v.SetDouble(data); }
-    static ValueType& Set(ValueType& v, double data, typename ValueType::AllocatorType&) { return v.SetDouble(data); }
+    static ValueType& Set(ValueType& v, double data, typename ValueType::AllocatorType&)
+    {
+        return v.SetDouble(data);
+    }
 };
 
-template<typename ValueType> 
-struct TypeHelper<ValueType, float> {
+template <typename ValueType>
+struct TypeHelper<ValueType, float>
+{
     static bool Is(const ValueType& v) { return v.IsFloat(); }
     static float Get(const ValueType& v) { return v.GetFloat(); }
     static ValueType& Set(ValueType& v, float data) { return v.SetFloat(data); }
-    static ValueType& Set(ValueType& v, float data, typename ValueType::AllocatorType&) { return v.SetFloat(data); }
+    static ValueType& Set(ValueType& v, float data, typename ValueType::AllocatorType&)
+    {
+        return v.SetFloat(data);
+    }
 };
 
-template<typename ValueType> 
-struct TypeHelper<ValueType, const typename ValueType::Ch*> {
+template <typename ValueType>
+struct TypeHelper<ValueType, const typename ValueType::Ch*>
+{
     typedef const typename ValueType::Ch* StringType;
     static bool Is(const ValueType& v) { return v.IsString(); }
     static StringType Get(const ValueType& v) { return v.GetString(); }
-    static ValueType& Set(ValueType& v, const StringType data) { return v.SetString(typename ValueType::StringRefType(data)); }
-    static ValueType& Set(ValueType& v, const StringType data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); }
+    static ValueType& Set(ValueType& v, const StringType data)
+    {
+        return v.SetString(typename ValueType::StringRefType(data));
+    }
+    static ValueType& Set(ValueType& v, const StringType data, typename ValueType::AllocatorType& a)
+    {
+        return v.SetString(data, a);
+    }
 };
 
 #if RAPIDJSON_HAS_STDSTRING
-template<typename ValueType> 
-struct TypeHelper<ValueType, std::basic_string<typename ValueType::Ch> > {
+template <typename ValueType>
+struct TypeHelper<ValueType, std::basic_string<typename ValueType::Ch>>
+{
     typedef std::basic_string<typename ValueType::Ch> StringType;
     static bool Is(const ValueType& v) { return v.IsString(); }
-    static StringType Get(const ValueType& v) { return StringType(v.GetString(), v.GetStringLength()); }
-    static ValueType& Set(ValueType& v, const StringType& data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); }
+    static StringType Get(const ValueType& v)
+    {
+        return StringType(v.GetString(), v.GetStringLength());
+    }
+    static ValueType&
+    Set(ValueType& v, const StringType& data, typename ValueType::AllocatorType& a)
+    {
+        return v.SetString(data, a);
+    }
 };
 #endif
 
-template<typename ValueType> 
-struct TypeHelper<ValueType, typename ValueType::Array> {
+template <typename ValueType>
+struct TypeHelper<ValueType, typename ValueType::Array>
+{
     typedef typename ValueType::Array ArrayType;
     static bool Is(const ValueType& v) { return v.IsArray(); }
     static ArrayType Get(ValueType& v) { return v.GetArray(); }
     static ValueType& Set(ValueType& v, ArrayType data) { return v = data; }
-    static ValueType& Set(ValueType& v, ArrayType data, typename ValueType::AllocatorType&) { return v = data; }
+    static ValueType& Set(ValueType& v, ArrayType data, typename ValueType::AllocatorType&)
+    {
+        return v = data;
+    }
 };
 
-template<typename ValueType> 
-struct TypeHelper<ValueType, typename ValueType::ConstArray> {
+template <typename ValueType>
+struct TypeHelper<ValueType, typename ValueType::ConstArray>
+{
     typedef typename ValueType::ConstArray ArrayType;
     static bool Is(const ValueType& v) { return v.IsArray(); }
     static ArrayType Get(const ValueType& v) { return v.GetArray(); }
 };
 
-template<typename ValueType> 
-struct TypeHelper<ValueType, typename ValueType::Object> {
+template <typename ValueType>
+struct TypeHelper<ValueType, typename ValueType::Object>
+{
     typedef typename ValueType::Object ObjectType;
     static bool Is(const ValueType& v) { return v.IsObject(); }
     static ObjectType Get(ValueType& v) { return v.GetObject(); }
     static ValueType& Set(ValueType& v, ObjectType data) { return v = data; }
-    static ValueType& Set(ValueType& v, ObjectType data, typename ValueType::AllocatorType&) { return v = data; }
+    static ValueType& Set(ValueType& v, ObjectType data, typename ValueType::AllocatorType&)
+    {
+        return v = data;
+    }
 };
 
-template<typename ValueType> 
-struct TypeHelper<ValueType, typename ValueType::ConstObject> {
+template <typename ValueType>
+struct TypeHelper<ValueType, typename ValueType::ConstObject>
+{
     typedef typename ValueType::ConstObject ObjectType;
     static bool Is(const ValueType& v) { return v.IsObject(); }
     static ObjectType Get(const ValueType& v) { return v.GetObject(); }
@@ -648,8 +798,10 @@ struct TypeHelper<ValueType, typename ValueType::ConstObject> {
 } // namespace internal
 
 // Forward declarations
-template <bool, typename> class GenericArray;
-template <bool, typename> class GenericObject;
+template <bool, typename>
+class GenericArray;
+template <bool, typename>
+class GenericObject;
 
 ///////////////////////////////////////////////////////////////////////////////
 // GenericValue
@@ -661,23 +813,28 @@ template <bool, typename> class GenericObject;
 
     Use the Value if UTF8 and default allocator
 
-    \tparam Encoding    Encoding of the value. (Even non-string values need to have the same encoding in a document)
-    \tparam Allocator   Allocator type for allocating memory of object, array and string.
+    \tparam Encoding    Encoding of the value. (Even non-string values need to have the same
+   encoding in a document) \tparam Allocator   Allocator type for allocating memory of object, array
+   and string.
 */
-template <typename Encoding, typename Allocator = RAPIDJSON_DEFAULT_ALLOCATOR >
-class GenericValue {
-public:
+template <typename Encoding, typename Allocator = RAPIDJSON_DEFAULT_ALLOCATOR>
+class GenericValue
+{
+    public:
     //! Name-value pair in an object.
     typedef GenericMember<Encoding, Allocator> Member;
-    typedef Encoding EncodingType;                  //!< Encoding type from template parameter.
-    typedef Allocator AllocatorType;                //!< Allocator type from template parameter.
-    typedef typename Encoding::Ch Ch;               //!< Character type derived from Encoding.
-    typedef GenericStringRef<Ch> StringRefType;     //!< Reference to a constant string
-    typedef typename GenericMemberIterator<false,Encoding,Allocator>::Iterator MemberIterator;  //!< Member iterator for iterating in object.
-    typedef typename GenericMemberIterator<true,Encoding,Allocator>::Iterator ConstMemberIterator;  //!< Constant member iterator for iterating in object.
-    typedef GenericValue* ValueIterator;            //!< Value iterator for iterating in array.
-    typedef const GenericValue* ConstValueIterator; //!< Constant value iterator for iterating in array.
-    typedef GenericValue<Encoding, Allocator> ValueType;    //!< Value type of itself.
+    typedef Encoding EncodingType;              //!< Encoding type from template parameter.
+    typedef Allocator AllocatorType;            //!< Allocator type from template parameter.
+    typedef typename Encoding::Ch Ch;           //!< Character type derived from Encoding.
+    typedef GenericStringRef<Ch> StringRefType; //!< Reference to a constant string
+    typedef typename GenericMemberIterator<false, Encoding, Allocator>::Iterator
+        MemberIterator; //!< Member iterator for iterating in object.
+    typedef typename GenericMemberIterator<true, Encoding, Allocator>::Iterator
+        ConstMemberIterator;             //!< Constant member iterator for iterating in object.
+    typedef GenericValue* ValueIterator; //!< Value iterator for iterating in array.
+    typedef const GenericValue*
+        ConstValueIterator; //!< Constant value iterator for iterating in array.
+    typedef GenericValue<Encoding, Allocator> ValueType; //!< Value type of itself.
     typedef GenericArray<false, ValueType> Array;
     typedef GenericArray<true, ValueType> ConstArray;
     typedef GenericObject<false, ValueType> Object;
@@ -691,42 +848,46 @@ public:
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
     //! Move constructor in C++11
-    GenericValue(GenericValue&& rhs) RAPIDJSON_NOEXCEPT : data_(rhs.data_) {
+    GenericValue(GenericValue&& rhs) RAPIDJSON_NOEXCEPT : data_(rhs.data_)
+    {
         rhs.data_.f.flags = kNullFlag; // give up contents
     }
 #endif
 
-private:
+    private:
     //! Copy constructor is not permitted.
     GenericValue(const GenericValue& rhs);
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
     //! Moving from a GenericDocument is not permitted.
     template <typename StackAllocator>
-    GenericValue(GenericDocument<Encoding,Allocator,StackAllocator>&& rhs);
+    GenericValue(GenericDocument<Encoding, Allocator, StackAllocator>&& rhs);
 
     //! Move assignment from a GenericDocument is not permitted.
     template <typename StackAllocator>
-    GenericValue& operator=(GenericDocument<Encoding,Allocator,StackAllocator>&& rhs);
+    GenericValue& operator=(GenericDocument<Encoding, Allocator, StackAllocator>&& rhs);
 #endif
 
-public:
-
+    public:
     //! Constructor with JSON value type.
     /*! This creates a Value of specified type with default content.
         \param type Type of the value.
         \note Default content for number is zero.
     */
-    explicit GenericValue(Type type) RAPIDJSON_NOEXCEPT : data_() {
-        static const uint16_t defaultFlags[] = {
-            kNullFlag, kFalseFlag, kTrueFlag, kObjectFlag, kArrayFlag, kShortStringFlag,
-            kNumberAnyFlag
-        };
+    explicit GenericValue(Type type) RAPIDJSON_NOEXCEPT : data_()
+    {
+        static const uint16_t defaultFlags[] = {kNullFlag,
+                                                kFalseFlag,
+                                                kTrueFlag,
+                                                kObjectFlag,
+                                                kArrayFlag,
+                                                kShortStringFlag,
+                                                kNumberAnyFlag};
         RAPIDJSON_NOEXCEPT_ASSERT(type >= kNullType && type <= kNumberType);
         data_.f.flags = defaultFlags[type];
 
         // Use ShortString to store empty string.
-        if (type == kStringType)
+        if(type == kStringType)
             data_.ss.SetLength(0);
     }
 
@@ -734,38 +895,42 @@ public:
     /*! Creates a copy of a Value by using the given Allocator
         \tparam SourceAllocator allocator of \c rhs
         \param rhs Value to copy from (read-only)
-        \param allocator Allocator for allocating copied elements and buffers. Commonly use GenericDocument::GetAllocator().
-        \param copyConstStrings Force copying of constant strings (e.g. referencing an in-situ buffer)
-        \see CopyFrom()
+        \param allocator Allocator for allocating copied elements and buffers. Commonly use
+       GenericDocument::GetAllocator(). \param copyConstStrings Force copying of constant strings
+       (e.g. referencing an in-situ buffer) \see CopyFrom()
     */
     template <typename SourceAllocator>
-    GenericValue(const GenericValue<Encoding,SourceAllocator>& rhs, Allocator& allocator, bool copyConstStrings = false) {
-        switch (rhs.GetType()) {
-        case kObjectType:
-            DoCopyMembers(rhs, allocator, copyConstStrings);
-            break;
+    GenericValue(const GenericValue<Encoding, SourceAllocator>& rhs,
+                 Allocator& allocator,
+                 bool copyConstStrings = false)
+    {
+        switch(rhs.GetType())
+        {
+        case kObjectType: DoCopyMembers(rhs, allocator, copyConstStrings); break;
         case kArrayType: {
-                SizeType count = rhs.data_.a.size;
-                GenericValue* le = reinterpret_cast<GenericValue*>(allocator.Malloc(count * sizeof(GenericValue)));
-                const GenericValue<Encoding,SourceAllocator>* re = rhs.GetElementsPointer();
-                for (SizeType i = 0; i < count; i++)
-                    new (&le[i]) GenericValue(re[i], allocator, copyConstStrings);
-                data_.f.flags = kArrayFlag;
-                data_.a.size = data_.a.capacity = count;
-                SetElementsPointer(le);
-            }
-            break;
+            SizeType count = rhs.data_.a.size;
+            GenericValue* le =
+                reinterpret_cast<GenericValue*>(allocator.Malloc(count * sizeof(GenericValue)));
+            const GenericValue<Encoding, SourceAllocator>* re = rhs.GetElementsPointer();
+            for(SizeType i = 0; i < count; i++)
+                new(&le[i]) GenericValue(re[i], allocator, copyConstStrings);
+            data_.f.flags = kArrayFlag;
+            data_.a.size = data_.a.capacity = count;
+            SetElementsPointer(le);
+        }
+        break;
         case kStringType:
-            if (rhs.data_.f.flags == kConstStringFlag && !copyConstStrings) {
+            if(rhs.data_.f.flags == kConstStringFlag && !copyConstStrings)
+            {
                 data_.f.flags = rhs.data_.f.flags;
-                data_  = *reinterpret_cast<const Data*>(&rhs.data_);
+                data_         = *reinterpret_cast<const Data*>(&rhs.data_);
             }
             else
                 SetStringRaw(StringRef(rhs.GetString(), rhs.GetStringLength()), allocator);
             break;
         default:
             data_.f.flags = rhs.data_.f.flags;
-            data_  = *reinterpret_cast<const Data*>(&rhs.data_);
+            data_         = *reinterpret_cast<const Data*>(&rhs.data_);
             break;
         }
     }
@@ -778,78 +943,106 @@ public:
      */
 #ifndef RAPIDJSON_DOXYGEN_RUNNING // hide SFINAE from Doxygen
     template <typename T>
-    explicit GenericValue(T b, RAPIDJSON_ENABLEIF((internal::IsSame<bool, T>))) RAPIDJSON_NOEXCEPT  // See #472
+    explicit GenericValue(T b, RAPIDJSON_ENABLEIF((internal::IsSame<bool, T>)))
+        RAPIDJSON_NOEXCEPT // See #472
 #else
     explicit GenericValue(bool b) RAPIDJSON_NOEXCEPT
 #endif
-        : data_() {
-            // safe-guard against failing SFINAE
-            RAPIDJSON_STATIC_ASSERT((internal::IsSame<bool,T>::Value));
-            data_.f.flags = b ? kTrueFlag : kFalseFlag;
+        : data_()
+    {
+        // safe-guard against failing SFINAE
+        RAPIDJSON_STATIC_ASSERT((internal::IsSame<bool, T>::Value));
+        data_.f.flags = b ? kTrueFlag : kFalseFlag;
     }
 
     //! Constructor for int value.
-    explicit GenericValue(int i) RAPIDJSON_NOEXCEPT : data_() {
-        data_.n.i64 = i;
+    explicit GenericValue(int i) RAPIDJSON_NOEXCEPT : data_()
+    {
+        data_.n.i64   = i;
         data_.f.flags = (i >= 0) ? (kNumberIntFlag | kUintFlag | kUint64Flag) : kNumberIntFlag;
     }
 
     //! Constructor for unsigned value.
-    explicit GenericValue(unsigned u) RAPIDJSON_NOEXCEPT : data_() {
-        data_.n.u64 = u; 
-        data_.f.flags = (u & 0x80000000) ? kNumberUintFlag : (kNumberUintFlag | kIntFlag | kInt64Flag);
+    explicit GenericValue(unsigned u) RAPIDJSON_NOEXCEPT : data_()
+    {
+        data_.n.u64 = u;
+        data_.f.flags =
+            (u & 0x80000000) ? kNumberUintFlag : (kNumberUintFlag | kIntFlag | kInt64Flag);
     }
 
     //! Constructor for int64_t value.
-    explicit GenericValue(int64_t i64) RAPIDJSON_NOEXCEPT : data_() {
-        data_.n.i64 = i64;
+    explicit GenericValue(int64_t i64) RAPIDJSON_NOEXCEPT : data_()
+    {
+        data_.n.i64   = i64;
         data_.f.flags = kNumberInt64Flag;
-        if (i64 >= 0) {
+        if(i64 >= 0)
+        {
             data_.f.flags |= kNumberUint64Flag;
-            if (!(static_cast<uint64_t>(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000)))
+            if(!(static_cast<uint64_t>(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000)))
                 data_.f.flags |= kUintFlag;
-            if (!(static_cast<uint64_t>(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
+            if(!(static_cast<uint64_t>(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
                 data_.f.flags |= kIntFlag;
         }
-        else if (i64 >= static_cast<int64_t>(RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
+        else if(i64 >= static_cast<int64_t>(RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
             data_.f.flags |= kIntFlag;
     }
 
     //! Constructor for uint64_t value.
-    explicit GenericValue(uint64_t u64) RAPIDJSON_NOEXCEPT : data_() {
-        data_.n.u64 = u64;
+    explicit GenericValue(uint64_t u64) RAPIDJSON_NOEXCEPT : data_()
+    {
+        data_.n.u64   = u64;
         data_.f.flags = kNumberUint64Flag;
-        if (!(u64 & RAPIDJSON_UINT64_C2(0x80000000, 0x00000000)))
+        if(!(u64 & RAPIDJSON_UINT64_C2(0x80000000, 0x00000000)))
             data_.f.flags |= kInt64Flag;
-        if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000)))
+        if(!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000)))
             data_.f.flags |= kUintFlag;
-        if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
+        if(!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
             data_.f.flags |= kIntFlag;
     }
 
     //! Constructor for double value.
-    explicit GenericValue(double d) RAPIDJSON_NOEXCEPT : data_() { data_.n.d = d; data_.f.flags = kNumberDoubleFlag; }
+    explicit GenericValue(double d) RAPIDJSON_NOEXCEPT : data_()
+    {
+        data_.n.d     = d;
+        data_.f.flags = kNumberDoubleFlag;
+    }
 
     //! Constructor for float value.
-    explicit GenericValue(float f) RAPIDJSON_NOEXCEPT : data_() { data_.n.d = static_cast<double>(f); data_.f.flags = kNumberDoubleFlag; }
+    explicit GenericValue(float f) RAPIDJSON_NOEXCEPT : data_()
+    {
+        data_.n.d     = static_cast<double>(f);
+        data_.f.flags = kNumberDoubleFlag;
+    }
 
     //! Constructor for constant string (i.e. do not make a copy of string)
-    GenericValue(const Ch* s, SizeType length) RAPIDJSON_NOEXCEPT : data_() { SetStringRaw(StringRef(s, length)); }
+    GenericValue(const Ch* s, SizeType length) RAPIDJSON_NOEXCEPT : data_()
+    {
+        SetStringRaw(StringRef(s, length));
+    }
 
     //! Constructor for constant string (i.e. do not make a copy of string)
     explicit GenericValue(StringRefType s) RAPIDJSON_NOEXCEPT : data_() { SetStringRaw(s); }
 
     //! Constructor for copy-string (i.e. do make a copy of string)
-    GenericValue(const Ch* s, SizeType length, Allocator& allocator) : data_() { SetStringRaw(StringRef(s, length), allocator); }
+    GenericValue(const Ch* s, SizeType length, Allocator& allocator) : data_()
+    {
+        SetStringRaw(StringRef(s, length), allocator);
+    }
 
     //! Constructor for copy-string (i.e. do make a copy of string)
-    GenericValue(const Ch*s, Allocator& allocator) : data_() { SetStringRaw(StringRef(s), allocator); }
+    GenericValue(const Ch* s, Allocator& allocator) : data_()
+    {
+        SetStringRaw(StringRef(s), allocator);
+    }
 
 #if RAPIDJSON_HAS_STDSTRING
     //! Constructor for copy-string from a string object (i.e. do make a copy of string)
     /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
      */
-    GenericValue(const std::basic_string<Ch>& s, Allocator& allocator) : data_() { SetStringRaw(StringRef(s), allocator); }
+    GenericValue(const std::basic_string<Ch>& s, Allocator& allocator) : data_()
+    {
+        SetStringRaw(StringRef(s), allocator);
+    }
 #endif
 
     //! Constructor for Array.
@@ -858,8 +1051,9 @@ public:
         \note \c Array is always pass-by-value.
         \note the source array is moved into this value and the sourec array becomes empty.
     */
-    GenericValue(Array a) RAPIDJSON_NOEXCEPT : data_(a.value_.data_) {
-        a.value_.data_ = Data();
+    GenericValue(Array a) RAPIDJSON_NOEXCEPT : data_(a.value_.data_)
+    {
+        a.value_.data_         = Data();
         a.value_.data_.f.flags = kArrayFlag;
     }
 
@@ -869,43 +1063,45 @@ public:
         \note \c Object is always pass-by-value.
         \note the source object is moved into this value and the sourec object becomes empty.
     */
-    GenericValue(Object o) RAPIDJSON_NOEXCEPT : data_(o.value_.data_) {
-        o.value_.data_ = Data();
+    GenericValue(Object o) RAPIDJSON_NOEXCEPT : data_(o.value_.data_)
+    {
+        o.value_.data_         = Data();
         o.value_.data_.f.flags = kObjectFlag;
     }
 
     //! Destructor.
     /*! Need to destruct elements of array, members of object, or copy-string.
-    */
-    ~GenericValue() {
+     */
+    ~GenericValue()
+    {
         // With RAPIDJSON_USE_MEMBERSMAP, the maps need to be destroyed to release
         // their Allocator if it's refcounted (e.g. MemoryPoolAllocator).
-        if (Allocator::kNeedFree || (RAPIDJSON_USE_MEMBERSMAP+0 &&
-                                     internal::IsRefCounted<Allocator>::Value)) {
-            switch(data_.f.flags) {
-            case kArrayFlag:
-                {
-                    GenericValue* e = GetElementsPointer();
-                    for (GenericValue* v = e; v != e + data_.a.size; ++v)
-                        v->~GenericValue();
-                    if (Allocator::kNeedFree) { // Shortcut by Allocator's trait
-                        Allocator::Free(e);
-                    }
+        if(Allocator::kNeedFree ||
+           (RAPIDJSON_USE_MEMBERSMAP + 0 && internal::IsRefCounted<Allocator>::Value))
+        {
+            switch(data_.f.flags)
+            {
+            case kArrayFlag: {
+                GenericValue* e = GetElementsPointer();
+                for(GenericValue* v = e; v != e + data_.a.size; ++v)
+                    v->~GenericValue();
+                if(Allocator::kNeedFree)
+                { // Shortcut by Allocator's trait
+                    Allocator::Free(e);
                 }
-                break;
+            }
+            break;
 
-            case kObjectFlag:
-                DoFreeMembers();
-                break;
+            case kObjectFlag: DoFreeMembers(); break;
 
             case kCopyStringFlag:
-                if (Allocator::kNeedFree) { // Shortcut by Allocator's trait
+                if(Allocator::kNeedFree)
+                { // Shortcut by Allocator's trait
                     Allocator::Free(const_cast<Ch*>(GetStringPointer()));
                 }
                 break;
 
-            default:
-                break;  // Do nothing for other types.
+            default: break; // Do nothing for other types.
             }
         }
     }
@@ -917,9 +1113,11 @@ public:
 
     //! Assignment with move semantics.
     /*! \param rhs Source of the assignment. It will become a null value after assignment.
-    */
-    GenericValue& operator=(GenericValue& rhs) RAPIDJSON_NOEXCEPT {
-        if (RAPIDJSON_LIKELY(this != &rhs)) {
+     */
+    GenericValue& operator=(GenericValue& rhs) RAPIDJSON_NOEXCEPT
+    {
+        if(RAPIDJSON_LIKELY(this != &rhs))
+        {
             // Can't destroy "this" before assigning "rhs", otherwise "rhs"
             // could be used after free if it's an sub-Value of "this",
             // hence the temporary danse.
@@ -933,17 +1131,16 @@ public:
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
     //! Move assignment in C++11
-    GenericValue& operator=(GenericValue&& rhs) RAPIDJSON_NOEXCEPT {
-        return *this = rhs.Move();
-    }
+    GenericValue& operator=(GenericValue&& rhs) RAPIDJSON_NOEXCEPT { return *this = rhs.Move(); }
 #endif
 
     //! Assignment of constant string reference (no copy)
     /*! \param str Constant string reference to be assigned
-        \note This overload is needed to avoid clashes with the generic primitive type assignment overload below.
-        \see GenericStringRef, operator=(T)
+        \note This overload is needed to avoid clashes with the generic primitive type assignment
+       overload below. \see GenericStringRef, operator=(T)
     */
-    GenericValue& operator=(StringRefType str) RAPIDJSON_NOEXCEPT {
+    GenericValue& operator=(StringRefType str) RAPIDJSON_NOEXCEPT
+    {
         GenericValue s(str);
         return *this = s;
     }
@@ -962,7 +1159,8 @@ public:
     */
     template <typename T>
     RAPIDJSON_DISABLEIF_RETURN((internal::IsPointer<T>), (GenericValue&))
-    operator=(T value) {
+    operator=(T value)
+    {
         GenericValue v(value);
         return *this = v;
     }
@@ -972,13 +1170,17 @@ public:
         \tparam SourceAllocator Allocator type of \c rhs
         \param rhs Value to copy from (read-only)
         \param allocator Allocator to use for copying
-        \param copyConstStrings Force copying of constant strings (e.g. referencing an in-situ buffer)
+        \param copyConstStrings Force copying of constant strings (e.g. referencing an in-situ
+       buffer)
      */
     template <typename SourceAllocator>
-    GenericValue& CopyFrom(const GenericValue<Encoding, SourceAllocator>& rhs, Allocator& allocator, bool copyConstStrings = false) {
+    GenericValue& CopyFrom(const GenericValue<Encoding, SourceAllocator>& rhs,
+                           Allocator& allocator,
+                           bool copyConstStrings = false)
+    {
         RAPIDJSON_ASSERT(static_cast<void*>(this) != static_cast<void const*>(&rhs));
         this->~GenericValue();
-        new (this) GenericValue(rhs, allocator, copyConstStrings);
+        new(this) GenericValue(rhs, allocator, copyConstStrings);
         return *this;
     }
 
@@ -987,7 +1189,8 @@ public:
         \param other Another value.
         \note Constant complexity.
     */
-    GenericValue& Swap(GenericValue& other) RAPIDJSON_NOEXCEPT {
+    GenericValue& Swap(GenericValue& other) RAPIDJSON_NOEXCEPT
+    {
         GenericValue temp;
         temp.RawAssign(*this);
         RawAssign(other);
@@ -997,11 +1200,8 @@ public:
 
     //! free-standing swap function helper
     /*!
-        Helper function to enable support for common swap implementation pattern based on \c std::swap:
-        \code
-        void swap(MyClass& a, MyClass& b) {
-            using std::swap;
-            swap(a.value, b.value);
+        Helper function to enable support for common swap implementation pattern based on \c
+       std::swap: \code void swap(MyClass& a, MyClass& b) { using std::swap; swap(a.value, b.value);
             // ...
         }
         \endcode
@@ -1018,39 +1218,46 @@ public:
     //@{
     //! Equal-to operator
     /*!
-        \note If an object contains duplicated named member, comparing equality with any object is always \c false.
-        \note Complexity is quadratic in Object's member number and linear for the rest (number of all values in the subtree and total lengths of all strings).
+        \note If an object contains duplicated named member, comparing equality with any object is
+       always \c false. \note Complexity is quadratic in Object's member number and linear for the
+       rest (number of all values in the subtree and total lengths of all strings).
     */
     template <typename SourceAllocator>
-    bool operator==(const GenericValue<Encoding, SourceAllocator>& rhs) const {
+    bool operator==(const GenericValue<Encoding, SourceAllocator>& rhs) const
+    {
         typedef GenericValue<Encoding, SourceAllocator> RhsType;
-        if (GetType() != rhs.GetType())
+        if(GetType() != rhs.GetType())
             return false;
 
-        switch (GetType()) {
+        switch(GetType())
+        {
         case kObjectType: // Warning: O(n^2) inner-loop
-            if (data_.o.size != rhs.data_.o.size)
-                return false;           
-            for (ConstMemberIterator lhsMemberItr = MemberBegin(); lhsMemberItr != MemberEnd(); ++lhsMemberItr) {
-                typename RhsType::ConstMemberIterator rhsMemberItr = rhs.FindMember(lhsMemberItr->name);
-                if (rhsMemberItr == rhs.MemberEnd() || (!(lhsMemberItr->value == rhsMemberItr->value)))
+            if(data_.o.size != rhs.data_.o.size)
+                return false;
+            for(ConstMemberIterator lhsMemberItr = MemberBegin(); lhsMemberItr != MemberEnd();
+                ++lhsMemberItr)
+            {
+                typename RhsType::ConstMemberIterator rhsMemberItr =
+                    rhs.FindMember(lhsMemberItr->name);
+                if(rhsMemberItr == rhs.MemberEnd() ||
+                   (!(lhsMemberItr->value == rhsMemberItr->value)))
                     return false;
             }
             return true;
-            
+
         case kArrayType:
-            if (data_.a.size != rhs.data_.a.size)
+            if(data_.a.size != rhs.data_.a.size)
                 return false;
-            for (SizeType i = 0; i < data_.a.size; i++)
-                if (!((*this)[i] == rhs[i]))
+            for(SizeType i = 0; i < data_.a.size; i++)
+                if(!((*this)[i] == rhs[i]))
                     return false;
             return true;
 
-        case kStringType:
-            return StringEqual(rhs);
+        case kStringType: return StringEqual(rhs);
 
         case kNumberType:
-            if (IsDouble() || rhs.IsDouble()) {
+            if(IsDouble() || rhs.IsDouble())
+            {
                 double a = GetDouble();     // May convert from integer to double.
                 double b = rhs.GetDouble(); // Ditto
                 return a >= b && a <= b;    // Prevent -Wfloat-equal
@@ -1058,8 +1265,7 @@ public:
             else
                 return data_.n.u64 == rhs.data_.n.u64;
 
-        default:
-            return true;
+        default: return true;
         }
     }
 
@@ -1070,20 +1276,33 @@ public:
     //! Equal-to operator with string object
     /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
      */
-    bool operator==(const std::basic_string<Ch>& rhs) const { return *this == GenericValue(StringRef(rhs)); }
+    bool operator==(const std::basic_string<Ch>& rhs) const
+    {
+        return *this == GenericValue(StringRef(rhs));
+    }
 #endif
 
     //! Equal-to operator with primitive types
-    /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c true, \c false
-    */
-    template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>,internal::IsGenericValue<T> >), (bool)) operator==(const T& rhs) const { return *this == GenericValue(rhs); }
+    /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c
+     * true, \c false
+     */
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN(
+        (internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T>>), (bool))
+    operator==(const T & rhs) const
+    {
+        return *this == GenericValue(rhs);
+    }
 
 #ifndef __cpp_impl_three_way_comparison
     //! Not-equal-to operator
     /*! \return !(*this == rhs)
      */
     template <typename SourceAllocator>
-    bool operator!=(const GenericValue<Encoding, SourceAllocator>& rhs) const { return !(*this == rhs); }
+    bool operator!=(const GenericValue<Encoding, SourceAllocator>& rhs) const
+    {
+        return !(*this == rhs);
+    }
 
     //! Not-equal-to operator with const C-string pointer
     bool operator!=(const Ch* rhs) const { return !(*this == rhs); }
@@ -1091,74 +1310,96 @@ public:
     //! Not-equal-to operator with arbitrary types
     /*! \return !(*this == rhs)
      */
-    template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>), (bool)) operator!=(const T& rhs) const { return !(*this == rhs); }
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>), (bool))
+    operator!=(const T & rhs) const
+    {
+        return !(*this == rhs);
+    }
 
     //! Equal-to operator with arbitrary types (symmetric version)
     /*! \return (rhs == lhs)
      */
-    template <typename T> friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>), (bool)) operator==(const T& lhs, const GenericValue& rhs) { return rhs == lhs; }
+    template <typename T>
+    friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>),
+                                      (bool)) operator==(const T & lhs, const GenericValue & rhs)
+    {
+        return rhs == lhs;
+    }
 
     //! Not-Equal-to operator with arbitrary types (symmetric version)
     /*! \return !(rhs == lhs)
      */
-    template <typename T> friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>), (bool)) operator!=(const T& lhs, const GenericValue& rhs) { return !(rhs == lhs); }
+    template <typename T>
+    friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>),
+                                      (bool)) operator!=(const T & lhs, const GenericValue & rhs)
+    {
+        return !(rhs == lhs);
+    }
     //@}
 #endif
 
     //!@name Type
     //@{
 
-    Type GetType()  const { return static_cast<Type>(data_.f.flags & kTypeMask); }
-    bool IsNull()   const { return data_.f.flags == kNullFlag; }
-    bool IsFalse()  const { return data_.f.flags == kFalseFlag; }
-    bool IsTrue()   const { return data_.f.flags == kTrueFlag; }
-    bool IsBool()   const { return (data_.f.flags & kBoolFlag) != 0; }
+    Type GetType() const { return static_cast<Type>(data_.f.flags & kTypeMask); }
+    bool IsNull() const { return data_.f.flags == kNullFlag; }
+    bool IsFalse() const { return data_.f.flags == kFalseFlag; }
+    bool IsTrue() const { return data_.f.flags == kTrueFlag; }
+    bool IsBool() const { return (data_.f.flags & kBoolFlag) != 0; }
     bool IsObject() const { return data_.f.flags == kObjectFlag; }
-    bool IsArray()  const { return data_.f.flags == kArrayFlag; }
+    bool IsArray() const { return data_.f.flags == kArrayFlag; }
     bool IsNumber() const { return (data_.f.flags & kNumberFlag) != 0; }
-    bool IsInt()    const { return (data_.f.flags & kIntFlag) != 0; }
-    bool IsUint()   const { return (data_.f.flags & kUintFlag) != 0; }
-    bool IsInt64()  const { return (data_.f.flags & kInt64Flag) != 0; }
+    bool IsInt() const { return (data_.f.flags & kIntFlag) != 0; }
+    bool IsUint() const { return (data_.f.flags & kUintFlag) != 0; }
+    bool IsInt64() const { return (data_.f.flags & kInt64Flag) != 0; }
     bool IsUint64() const { return (data_.f.flags & kUint64Flag) != 0; }
     bool IsDouble() const { return (data_.f.flags & kDoubleFlag) != 0; }
     bool IsString() const { return (data_.f.flags & kStringFlag) != 0; }
 
     // Checks whether a number can be losslessly converted to a double.
-    bool IsLosslessDouble() const {
-        if (!IsNumber()) return false;
-        if (IsUint64()) {
-            uint64_t u = GetUint64();
+    bool IsLosslessDouble() const
+    {
+        if(!IsNumber())
+            return false;
+        if(IsUint64())
+        {
+            uint64_t u        = GetUint64();
             volatile double d = static_cast<double>(u);
-            return (d >= 0.0)
-                && (d < static_cast<double>((std::numeric_limits<uint64_t>::max)()))
-                && (u == static_cast<uint64_t>(d));
+            return (d >= 0.0) &&
+                   (d < static_cast<double>((std::numeric_limits<uint64_t>::max)())) &&
+                   (u == static_cast<uint64_t>(d));
         }
-        if (IsInt64()) {
-            int64_t i = GetInt64();
+        if(IsInt64())
+        {
+            int64_t i         = GetInt64();
             volatile double d = static_cast<double>(i);
-            return (d >= static_cast<double>((std::numeric_limits<int64_t>::min)()))
-                && (d < static_cast<double>((std::numeric_limits<int64_t>::max)()))
-                && (i == static_cast<int64_t>(d));
+            return (d >= static_cast<double>((std::numeric_limits<int64_t>::min)())) &&
+                   (d < static_cast<double>((std::numeric_limits<int64_t>::max)())) &&
+                   (i == static_cast<int64_t>(d));
         }
         return true; // double, int, uint are always lossless
     }
 
     // Checks whether a number is a float (possible lossy).
-    bool IsFloat() const  {
-        if ((data_.f.flags & kDoubleFlag) == 0)
+    bool IsFloat() const
+    {
+        if((data_.f.flags & kDoubleFlag) == 0)
             return false;
         double d = GetDouble();
         return d >= -3.4028234e38 && d <= 3.4028234e38;
     }
     // Checks whether a number can be losslessly converted to a float.
-    bool IsLosslessFloat() const {
-        if (!IsNumber()) return false;
+    bool IsLosslessFloat() const
+    {
+        if(!IsNumber())
+            return false;
         double a = GetDouble();
-        if (a < static_cast<double>(-(std::numeric_limits<float>::max)())
-                || a > static_cast<double>((std::numeric_limits<float>::max)()))
+        if(a < static_cast<double>(-(std::numeric_limits<float>::max)()) ||
+           a > static_cast<double>((std::numeric_limits<float>::max)()))
             return false;
         double b = static_cast<double>(static_cast<float>(a));
-        return a >= b && a <= b;    // Prevent -Wfloat-equal
+        return a >= b && a <= b; // Prevent -Wfloat-equal
     }
 
     //@}
@@ -1166,17 +1407,31 @@ public:
     //!@name Null
     //@{
 
-    GenericValue& SetNull() { this->~GenericValue(); new (this) GenericValue(); return *this; }
+    GenericValue& SetNull()
+    {
+        this->~GenericValue();
+        new(this) GenericValue();
+        return *this;
+    }
 
     //@}
 
     //!@name Bool
     //@{
 
-    bool GetBool() const { RAPIDJSON_ASSERT(IsBool()); return data_.f.flags == kTrueFlag; }
+    bool GetBool() const
+    {
+        RAPIDJSON_ASSERT(IsBool());
+        return data_.f.flags == kTrueFlag;
+    }
     //!< Set boolean value
     /*! \post IsBool() == true */
-    GenericValue& SetBool(bool b) { this->~GenericValue(); new (this) GenericValue(b); return *this; }
+    GenericValue& SetBool(bool b)
+    {
+        this->~GenericValue();
+        new(this) GenericValue(b);
+        return *this;
+    }
 
     //@}
 
@@ -1185,104 +1440,160 @@ public:
 
     //! Set this value as an empty object.
     /*! \post IsObject() == true */
-    GenericValue& SetObject() { this->~GenericValue(); new (this) GenericValue(kObjectType); return *this; }
+    GenericValue& SetObject()
+    {
+        this->~GenericValue();
+        new(this) GenericValue(kObjectType);
+        return *this;
+    }
 
     //! Get the number of members in the object.
-    SizeType MemberCount() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.size; }
+    SizeType MemberCount() const
+    {
+        RAPIDJSON_ASSERT(IsObject());
+        return data_.o.size;
+    }
 
     //! Get the capacity of object.
-    SizeType MemberCapacity() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.capacity; }
+    SizeType MemberCapacity() const
+    {
+        RAPIDJSON_ASSERT(IsObject());
+        return data_.o.capacity;
+    }
 
     //! Check whether the object is empty.
-    bool ObjectEmpty() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.size == 0; }
+    bool ObjectEmpty() const
+    {
+        RAPIDJSON_ASSERT(IsObject());
+        return data_.o.size == 0;
+    }
 
     //! Get a value from an object associated with the name.
     /*! \pre IsObject() == true
-        \tparam T Either \c Ch or \c const \c Ch (template used for disambiguation with \ref operator[](SizeType))
-        \note In version 0.1x, if the member is not found, this function returns a null value. This makes issue 7.
-        Since 0.2, if the name is not correct, it will assert.
-        If user is unsure whether a member exists, user should use HasMember() first.
-        A better approach is to use FindMember().
-        \note Linear time complexity.
+        \tparam T Either \c Ch or \c const \c Ch (template used for disambiguation with \ref
+       operator[](SizeType)) \note In version 0.1x, if the member is not found, this function
+       returns a null value. This makes issue 7. Since 0.2, if the name is not correct, it will
+       assert. If user is unsure whether a member exists, user should use HasMember() first. A
+       better approach is to use FindMember(). \note Linear time complexity.
     */
     template <typename T>
-    RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch> >),(GenericValue&)) operator[](T* name) {
+    RAPIDJSON_DISABLEIF_RETURN(
+        (internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch>>),
+        (GenericValue&))
+    operator[](T * name)
+    {
         GenericValue n(StringRef(name));
         return (*this)[n];
     }
     template <typename T>
-    RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch> >),(const GenericValue&)) operator[](T* name) const { return const_cast<GenericValue&>(*this)[name]; }
+    RAPIDJSON_DISABLEIF_RETURN(
+        (internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch>>),
+        (const GenericValue&))
+    operator[](T * name) const
+    {
+        return const_cast<GenericValue&>(*this)[name];
+    }
 
     //! Get a value from an object associated with the name.
     /*! \pre IsObject() == true
         \tparam SourceAllocator Allocator of the \c name value
 
-        \note Compared to \ref operator[](T*), this version is faster because it does not need a StrLen().
-        And it can also handle strings with embedded null characters.
+        \note Compared to \ref operator[](T*), this version is faster because it does not need a
+       StrLen(). And it can also handle strings with embedded null characters.
 
         \note Linear time complexity.
     */
     template <typename SourceAllocator>
-    GenericValue& operator[](const GenericValue<Encoding, SourceAllocator>& name) {
+    GenericValue& operator[](const GenericValue<Encoding, SourceAllocator>& name)
+    {
         MemberIterator member = FindMember(name);
-        if (member != MemberEnd())
+        if(member != MemberEnd())
             return member->value;
-        else {
-            RAPIDJSON_ASSERT(false);    // see above note
+        else
+        {
+            RAPIDJSON_ASSERT(false); // see above note
 
 #if RAPIDJSON_HAS_CXX11
             // Use thread-local storage to prevent races between threads.
             // Use static buffer and placement-new to prevent destruction, with
             // alignas() to ensure proper alignment.
             alignas(GenericValue) thread_local static char buffer[sizeof(GenericValue)];
-            return *new (buffer) GenericValue();
+            return *new(buffer) GenericValue();
 #elif defined(_MSC_VER) && _MSC_VER < 1900
             // There's no way to solve both thread locality and proper alignment
             // simultaneously.
             __declspec(thread) static char buffer[sizeof(GenericValue)];
-            return *new (buffer) GenericValue();
+            return *new(buffer) GenericValue();
 #elif defined(__GNUC__) || defined(__clang__)
-            // This will generate -Wexit-time-destructors in clang, but that's
-            // better than having under-alignment.
-            __thread static GenericValue buffer;
-            return buffer;
+                                         // This will generate -Wexit-time-destructors in clang, but
+                                         // that's
+                // better than having under-alignment.
+                __thread static GenericValue buffer;
+                return buffer;
 #else
-            // Don't know what compiler this is, so don't know how to ensure
-            // thread-locality.
-            static GenericValue buffer;
-            return buffer;
+                                         // Don't know what compiler this is, so don't know how to
+                                         // ensure
+                // thread-locality.
+                static GenericValue buffer;
+                return buffer;
 #endif
         }
     }
     template <typename SourceAllocator>
-    const GenericValue& operator[](const GenericValue<Encoding, SourceAllocator>& name) const { return const_cast<GenericValue&>(*this)[name]; }
+    const GenericValue& operator[](const GenericValue<Encoding, SourceAllocator>& name) const
+    {
+        return const_cast<GenericValue&>(*this)[name];
+    }
 
 #if RAPIDJSON_HAS_STDSTRING
     //! Get a value from an object associated with name (string object).
-    GenericValue& operator[](const std::basic_string<Ch>& name) { return (*this)[GenericValue(StringRef(name))]; }
-    const GenericValue& operator[](const std::basic_string<Ch>& name) const { return (*this)[GenericValue(StringRef(name))]; }
+    GenericValue& operator[](const std::basic_string<Ch>& name)
+    {
+        return (*this)[GenericValue(StringRef(name))];
+    }
+    const GenericValue& operator[](const std::basic_string<Ch>& name) const
+    {
+        return (*this)[GenericValue(StringRef(name))];
+    }
 #endif
 
     //! Const member iterator
     /*! \pre IsObject() == true */
-    ConstMemberIterator MemberBegin() const { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(GetMembersPointer()); }
+    ConstMemberIterator MemberBegin() const
+    {
+        RAPIDJSON_ASSERT(IsObject());
+        return ConstMemberIterator(GetMembersPointer());
+    }
     //! Const \em past-the-end member iterator
     /*! \pre IsObject() == true */
-    ConstMemberIterator MemberEnd() const   { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(GetMembersPointer() + data_.o.size); }
+    ConstMemberIterator MemberEnd() const
+    {
+        RAPIDJSON_ASSERT(IsObject());
+        return ConstMemberIterator(GetMembersPointer() + data_.o.size);
+    }
     //! Member iterator
     /*! \pre IsObject() == true */
-    MemberIterator MemberBegin()            { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(GetMembersPointer()); }
+    MemberIterator MemberBegin()
+    {
+        RAPIDJSON_ASSERT(IsObject());
+        return MemberIterator(GetMembersPointer());
+    }
     //! \em Past-the-end member iterator
     /*! \pre IsObject() == true */
-    MemberIterator MemberEnd()              { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(GetMembersPointer() + data_.o.size); }
+    MemberIterator MemberEnd()
+    {
+        RAPIDJSON_ASSERT(IsObject());
+        return MemberIterator(GetMembersPointer() + data_.o.size);
+    }
 
     //! Request the object to have enough capacity to store members.
     /*! \param newCapacity  The capacity that the object at least need to have.
-        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
-        \return The value itself for fluent API.
-        \note Linear time complexity.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used
+       before. Commonly use GenericDocument::GetAllocator(). \return The value itself for fluent
+       API. \note Linear time complexity.
     */
-    GenericValue& MemberReserve(SizeType newCapacity, Allocator &allocator) {
+    GenericValue& MemberReserve(SizeType newCapacity, Allocator& allocator)
+    {
         RAPIDJSON_ASSERT(IsObject());
         DoReserveMembers(newCapacity, allocator);
         return *this;
@@ -1307,20 +1618,24 @@ public:
         \note It is better to use FindMember() directly if you need the obtain the value as well.
         \note Linear time complexity.
     */
-    bool HasMember(const std::basic_string<Ch>& name) const { return FindMember(name) != MemberEnd(); }
+    bool HasMember(const std::basic_string<Ch>& name) const
+    {
+        return FindMember(name) != MemberEnd();
+    }
 #endif
 
     //! Check whether a member exists in the object with GenericValue name.
     /*!
-        This version is faster because it does not need a StrLen(). It can also handle string with null character.
-        \param name Member name to be searched.
-        \pre IsObject() == true
-        \return Whether a member with that name exists.
-        \note It is better to use FindMember() directly if you need the obtain the value as well.
-        \note Linear time complexity.
+        This version is faster because it does not need a StrLen(). It can also handle string with
+       null character. \param name Member name to be searched. \pre IsObject() == true \return
+       Whether a member with that name exists. \note It is better to use FindMember() directly if
+       you need the obtain the value as well. \note Linear time complexity.
     */
     template <typename SourceAllocator>
-    bool HasMember(const GenericValue<Encoding, SourceAllocator>& name) const { return FindMember(name) != MemberEnd(); }
+    bool HasMember(const GenericValue<Encoding, SourceAllocator>& name) const
+    {
+        return FindMember(name) != MemberEnd();
+    }
 
     //! Find member by name.
     /*!
@@ -1334,20 +1649,22 @@ public:
             \c std::map, this has been changed to MemberEnd() now.
         \note Linear time complexity.
     */
-    MemberIterator FindMember(const Ch* name) {
+    MemberIterator FindMember(const Ch* name)
+    {
         GenericValue n(StringRef(name));
         return FindMember(n);
     }
 
-    ConstMemberIterator FindMember(const Ch* name) const { return const_cast<GenericValue&>(*this).FindMember(name); }
+    ConstMemberIterator FindMember(const Ch* name) const
+    {
+        return const_cast<GenericValue&>(*this).FindMember(name);
+    }
 
     //! Find member by name.
     /*!
-        This version is faster because it does not need a StrLen(). It can also handle string with null character.
-        \param name Member name to be searched.
-        \pre IsObject() == true
-        \return Iterator to member, if it exists.
-            Otherwise returns \ref MemberEnd().
+        This version is faster because it does not need a StrLen(). It can also handle string with
+       null character. \param name Member name to be searched. \pre IsObject() == true \return
+       Iterator to member, if it exists. Otherwise returns \ref MemberEnd().
 
         \note Earlier versions of Rapidjson returned a \c NULL pointer, in case
             the requested member doesn't exist. For consistency with e.g.
@@ -1355,12 +1672,17 @@ public:
         \note Linear time complexity.
     */
     template <typename SourceAllocator>
-    MemberIterator FindMember(const GenericValue<Encoding, SourceAllocator>& name) {
+    MemberIterator FindMember(const GenericValue<Encoding, SourceAllocator>& name)
+    {
         RAPIDJSON_ASSERT(IsObject());
         RAPIDJSON_ASSERT(name.IsString());
         return DoFindMember(name);
     }
-    template <typename SourceAllocator> ConstMemberIterator FindMember(const GenericValue<Encoding, SourceAllocator>& name) const { return const_cast<GenericValue&>(*this).FindMember(name); }
+    template <typename SourceAllocator>
+    ConstMemberIterator FindMember(const GenericValue<Encoding, SourceAllocator>& name) const
+    {
+        return const_cast<GenericValue&>(*this).FindMember(name);
+    }
 
 #if RAPIDJSON_HAS_STDSTRING
     //! Find member by string object name.
@@ -1370,21 +1692,27 @@ public:
         \return Iterator to member, if it exists.
             Otherwise returns \ref MemberEnd().
     */
-    MemberIterator FindMember(const std::basic_string<Ch>& name) { return FindMember(GenericValue(StringRef(name))); }
-    ConstMemberIterator FindMember(const std::basic_string<Ch>& name) const { return FindMember(GenericValue(StringRef(name))); }
+    MemberIterator FindMember(const std::basic_string<Ch>& name)
+    {
+        return FindMember(GenericValue(StringRef(name)));
+    }
+    ConstMemberIterator FindMember(const std::basic_string<Ch>& name) const
+    {
+        return FindMember(GenericValue(StringRef(name)));
+    }
 #endif
 
     //! Add a member (name-value pair) to the object.
     /*! \param name A string value as name of member.
         \param value Value of any type.
-        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
-        \return The value itself for fluent API.
-        \note The ownership of \c name and \c value will be transferred to this object on success.
-        \pre  IsObject() && name.IsString()
-        \post name.IsNull() && value.IsNull()
-        \note Amortized Constant time complexity.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used
+       before. Commonly use GenericDocument::GetAllocator(). \return The value itself for fluent
+       API. \note The ownership of \c name and \c value will be transferred to this object on
+       success. \pre  IsObject() && name.IsString() \post name.IsNull() && value.IsNull() \note
+       Amortized Constant time complexity.
     */
-    GenericValue& AddMember(GenericValue& name, GenericValue& value, Allocator& allocator) {
+    GenericValue& AddMember(GenericValue& name, GenericValue& value, Allocator& allocator)
+    {
         RAPIDJSON_ASSERT(IsObject());
         RAPIDJSON_ASSERT(name.IsString());
         DoAddMember(name, value, allocator);
@@ -1394,13 +1722,14 @@ public:
     //! Add a constant string value as member (name-value pair) to the object.
     /*! \param name A string value as name of member.
         \param value constant string reference as value of member.
-        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
-        \return The value itself for fluent API.
-        \pre  IsObject()
-        \note This overload is needed to avoid clashes with the generic primitive type AddMember(GenericValue&,T,Allocator&) overload below.
-        \note Amortized Constant time complexity.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used
+       before. Commonly use GenericDocument::GetAllocator(). \return The value itself for fluent
+       API. \pre  IsObject() \note This overload is needed to avoid clashes with the generic
+       primitive type AddMember(GenericValue&,T,Allocator&) overload below. \note Amortized Constant
+       time complexity.
     */
-    GenericValue& AddMember(GenericValue& name, StringRefType value, Allocator& allocator) {
+    GenericValue& AddMember(GenericValue& name, StringRefType value, Allocator& allocator)
+    {
         GenericValue v(value);
         return AddMember(name, v, allocator);
     }
@@ -1409,13 +1738,14 @@ public:
     //! Add a string object as member (name-value pair) to the object.
     /*! \param name A string value as name of member.
         \param value constant string reference as value of member.
-        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
-        \return The value itself for fluent API.
-        \pre  IsObject()
-        \note This overload is needed to avoid clashes with the generic primitive type AddMember(GenericValue&,T,Allocator&) overload below.
-        \note Amortized Constant time complexity.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used
+       before. Commonly use GenericDocument::GetAllocator(). \return The value itself for fluent
+       API. \pre  IsObject() \note This overload is needed to avoid clashes with the generic
+       primitive type AddMember(GenericValue&,T,Allocator&) overload below. \note Amortized Constant
+       time complexity.
     */
-    GenericValue& AddMember(GenericValue& name, std::basic_string<Ch>& value, Allocator& allocator) {
+    GenericValue& AddMember(GenericValue& name, std::basic_string<Ch>& value, Allocator& allocator)
+    {
         GenericValue v(value, allocator);
         return AddMember(name, v, allocator);
     }
@@ -1425,9 +1755,8 @@ public:
     /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t
         \param name A string value as name of member.
         \param value Value of primitive type \c T as value of member
-        \param allocator Allocator for reallocating memory. Commonly use GenericDocument::GetAllocator().
-        \return The value itself for fluent API.
-        \pre  IsObject()
+        \param allocator Allocator for reallocating memory. Commonly use
+       GenericDocument::GetAllocator(). \return The value itself for fluent API. \pre  IsObject()
 
         \note The source type \c T explicitly disallows all pointer types,
             especially (\c const) \ref Ch*.  This helps avoiding implicitly
@@ -1439,40 +1768,44 @@ public:
         \note Amortized Constant time complexity.
     */
     template <typename T>
-    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericValue&))
-    AddMember(GenericValue& name, T value, Allocator& allocator) {
+    RAPIDJSON_DISABLEIF_RETURN(
+        (internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T>>), (GenericValue&))
+    AddMember(GenericValue& name, T value, Allocator& allocator)
+    {
         GenericValue v(value);
         return AddMember(name, v, allocator);
     }
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
-    GenericValue& AddMember(GenericValue&& name, GenericValue&& value, Allocator& allocator) {
+    GenericValue& AddMember(GenericValue&& name, GenericValue&& value, Allocator& allocator)
+    {
         return AddMember(name, value, allocator);
     }
-    GenericValue& AddMember(GenericValue&& name, GenericValue& value, Allocator& allocator) {
+    GenericValue& AddMember(GenericValue&& name, GenericValue& value, Allocator& allocator)
+    {
         return AddMember(name, value, allocator);
     }
-    GenericValue& AddMember(GenericValue& name, GenericValue&& value, Allocator& allocator) {
+    GenericValue& AddMember(GenericValue& name, GenericValue&& value, Allocator& allocator)
+    {
         return AddMember(name, value, allocator);
     }
-    GenericValue& AddMember(StringRefType name, GenericValue&& value, Allocator& allocator) {
+    GenericValue& AddMember(StringRefType name, GenericValue&& value, Allocator& allocator)
+    {
         GenericValue n(name);
         return AddMember(n, value, allocator);
     }
 #endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
 
-
     //! Add a member (name-value pair) to the object.
     /*! \param name A constant string reference as name of member.
         \param value Value of any type.
-        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
-        \return The value itself for fluent API.
-        \note The ownership of \c value will be transferred to this object on success.
-        \pre  IsObject()
-        \post value.IsNull()
-        \note Amortized Constant time complexity.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used
+       before. Commonly use GenericDocument::GetAllocator(). \return The value itself for fluent
+       API. \note The ownership of \c value will be transferred to this object on success. \pre
+       IsObject() \post value.IsNull() \note Amortized Constant time complexity.
     */
-    GenericValue& AddMember(StringRefType name, GenericValue& value, Allocator& allocator) {
+    GenericValue& AddMember(StringRefType name, GenericValue& value, Allocator& allocator)
+    {
         GenericValue n(name);
         return AddMember(n, value, allocator);
     }
@@ -1480,13 +1813,14 @@ public:
     //! Add a constant string value as member (name-value pair) to the object.
     /*! \param name A constant string reference as name of member.
         \param value constant string reference as value of member.
-        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
-        \return The value itself for fluent API.
-        \pre  IsObject()
-        \note This overload is needed to avoid clashes with the generic primitive type AddMember(StringRefType,T,Allocator&) overload below.
-        \note Amortized Constant time complexity.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used
+       before. Commonly use GenericDocument::GetAllocator(). \return The value itself for fluent
+       API. \pre  IsObject() \note This overload is needed to avoid clashes with the generic
+       primitive type AddMember(StringRefType,T,Allocator&) overload below. \note Amortized Constant
+       time complexity.
     */
-    GenericValue& AddMember(StringRefType name, StringRefType value, Allocator& allocator) {
+    GenericValue& AddMember(StringRefType name, StringRefType value, Allocator& allocator)
+    {
         GenericValue v(value);
         return AddMember(name, v, allocator);
     }
@@ -1495,9 +1829,8 @@ public:
     /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t
         \param name A constant string reference as name of member.
         \param value Value of primitive type \c T as value of member
-        \param allocator Allocator for reallocating memory. Commonly use GenericDocument::GetAllocator().
-        \return The value itself for fluent API.
-        \pre  IsObject()
+        \param allocator Allocator for reallocating memory. Commonly use
+       GenericDocument::GetAllocator(). \return The value itself for fluent API. \pre  IsObject()
 
         \note The source type \c T explicitly disallows all pointer types,
             especially (\c const) \ref Ch*.  This helps avoiding implicitly
@@ -1509,8 +1842,10 @@ public:
         \note Amortized Constant time complexity.
     */
     template <typename T>
-    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericValue&))
-    AddMember(StringRefType name, T value, Allocator& allocator) {
+    RAPIDJSON_DISABLEIF_RETURN(
+        (internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T>>), (GenericValue&))
+    AddMember(StringRefType name, T value, Allocator& allocator)
+    {
         GenericValue n(name);
         return AddMember(n, value, allocator);
     }
@@ -1519,8 +1854,9 @@ public:
     /*! This function do not deallocate memory in the object, i.e. the capacity is unchanged.
         \note Linear time complexity.
     */
-    void RemoveAllMembers() {
-        RAPIDJSON_ASSERT(IsObject()); 
+    void RemoveAllMembers()
+    {
+        RAPIDJSON_ASSERT(IsObject());
         DoClearMembers();
     }
 
@@ -1532,19 +1868,25 @@ public:
             relative order of the remaining members.
         \note Linear time complexity.
     */
-    bool RemoveMember(const Ch* name) {
+    bool RemoveMember(const Ch* name)
+    {
         GenericValue n(StringRef(name));
         return RemoveMember(n);
     }
 
 #if RAPIDJSON_HAS_STDSTRING
-    bool RemoveMember(const std::basic_string<Ch>& name) { return RemoveMember(GenericValue(StringRef(name))); }
+    bool RemoveMember(const std::basic_string<Ch>& name)
+    {
+        return RemoveMember(GenericValue(StringRef(name)));
+    }
 #endif
 
     template <typename SourceAllocator>
-    bool RemoveMember(const GenericValue<Encoding, SourceAllocator>& name) {
+    bool RemoveMember(const GenericValue<Encoding, SourceAllocator>& name)
+    {
         MemberIterator m = FindMember(name);
-        if (m != MemberEnd()) {
+        if(m != MemberEnd())
+        {
             RemoveMember(m);
             return true;
         }
@@ -1560,7 +1902,8 @@ public:
             relative order of the remaining members.
         \note Constant time complexity.
     */
-    MemberIterator RemoveMember(MemberIterator m) {
+    MemberIterator RemoveMember(MemberIterator m)
+    {
         RAPIDJSON_ASSERT(IsObject());
         RAPIDJSON_ASSERT(data_.o.size > 0);
         RAPIDJSON_ASSERT(GetMembersPointer() != 0);
@@ -1572,14 +1915,12 @@ public:
     /*! \param pos iterator to the member to remove
         \pre IsObject() == true && \ref MemberBegin() <= \c pos < \ref MemberEnd()
         \return Iterator following the removed element.
-            If the iterator \c pos refers to the last element, the \ref MemberEnd() iterator is returned.
-        \note This function preserves the relative order of the remaining object
-            members. If you do not need this, use the more efficient \ref RemoveMember(MemberIterator).
-        \note Linear time complexity.
+            If the iterator \c pos refers to the last element, the \ref MemberEnd() iterator is
+       returned. \note This function preserves the relative order of the remaining object members.
+       If you do not need this, use the more efficient \ref RemoveMember(MemberIterator). \note
+       Linear time complexity.
     */
-    MemberIterator EraseMember(ConstMemberIterator pos) {
-        return EraseMember(pos, pos +1);
-    }
+    MemberIterator EraseMember(ConstMemberIterator pos) { return EraseMember(pos, pos + 1); }
 
     //! Remove members in the range [first, last) from an object.
     /*! \param first iterator to the first member to remove
@@ -1590,7 +1931,8 @@ public:
             members.
         \note Linear time complexity.
     */
-    MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) {
+    MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last)
+    {
         RAPIDJSON_ASSERT(IsObject());
         RAPIDJSON_ASSERT(data_.o.size > 0);
         RAPIDJSON_ASSERT(GetMembersPointer() != 0);
@@ -1605,19 +1947,25 @@ public:
         \return Whether the member existed.
         \note Linear time complexity.
     */
-    bool EraseMember(const Ch* name) {
+    bool EraseMember(const Ch* name)
+    {
         GenericValue n(StringRef(name));
         return EraseMember(n);
     }
 
 #if RAPIDJSON_HAS_STDSTRING
-    bool EraseMember(const std::basic_string<Ch>& name) { return EraseMember(GenericValue(StringRef(name))); }
+    bool EraseMember(const std::basic_string<Ch>& name)
+    {
+        return EraseMember(GenericValue(StringRef(name)));
+    }
 #endif
 
     template <typename SourceAllocator>
-    bool EraseMember(const GenericValue<Encoding, SourceAllocator>& name) {
+    bool EraseMember(const GenericValue<Encoding, SourceAllocator>& name)
+    {
         MemberIterator m = FindMember(name);
-        if (m != MemberEnd()) {
+        if(m != MemberEnd())
+        {
             EraseMember(m);
             return true;
         }
@@ -1625,10 +1973,26 @@ public:
             return false;
     }
 
-    Object GetObject() { RAPIDJSON_ASSERT(IsObject()); return Object(*this); }
-    Object GetObj() { RAPIDJSON_ASSERT(IsObject()); return Object(*this); }
-    ConstObject GetObject() const { RAPIDJSON_ASSERT(IsObject()); return ConstObject(*this); }
-    ConstObject GetObj() const { RAPIDJSON_ASSERT(IsObject()); return ConstObject(*this); }
+    Object GetObject()
+    {
+        RAPIDJSON_ASSERT(IsObject());
+        return Object(*this);
+    }
+    Object GetObj()
+    {
+        RAPIDJSON_ASSERT(IsObject());
+        return Object(*this);
+    }
+    ConstObject GetObject() const
+    {
+        RAPIDJSON_ASSERT(IsObject());
+        return ConstObject(*this);
+    }
+    ConstObject GetObj() const
+    {
+        RAPIDJSON_ASSERT(IsObject());
+        return ConstObject(*this);
+    }
 
     //@}
 
@@ -1637,25 +2001,43 @@ public:
 
     //! Set this value as an empty array.
     /*! \post IsArray == true */
-    GenericValue& SetArray() { this->~GenericValue(); new (this) GenericValue(kArrayType); return *this; }
+    GenericValue& SetArray()
+    {
+        this->~GenericValue();
+        new(this) GenericValue(kArrayType);
+        return *this;
+    }
 
     //! Get the number of elements in array.
-    SizeType Size() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size; }
+    SizeType Size() const
+    {
+        RAPIDJSON_ASSERT(IsArray());
+        return data_.a.size;
+    }
 
     //! Get the capacity of array.
-    SizeType Capacity() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.capacity; }
+    SizeType Capacity() const
+    {
+        RAPIDJSON_ASSERT(IsArray());
+        return data_.a.capacity;
+    }
 
     //! Check whether the array is empty.
-    bool Empty() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size == 0; }
+    bool Empty() const
+    {
+        RAPIDJSON_ASSERT(IsArray());
+        return data_.a.size == 0;
+    }
 
     //! Remove all elements in the array.
     /*! This function do not deallocate memory in the array, i.e. the capacity is unchanged.
         \note Linear time complexity.
     */
-    void Clear() {
-        RAPIDJSON_ASSERT(IsArray()); 
+    void Clear()
+    {
+        RAPIDJSON_ASSERT(IsArray());
         GenericValue* e = GetElementsPointer();
-        for (GenericValue* v = e; v != e + data_.a.size; ++v)
+        for(GenericValue* v = e; v != e + data_.a.size; ++v)
             v->~GenericValue();
         data_.a.size = 0;
     }
@@ -1665,19 +2047,31 @@ public:
         \param index Zero-based index of element.
         \see operator[](T*)
     */
-    GenericValue& operator[](SizeType index) {
+    GenericValue& operator[](SizeType index)
+    {
         RAPIDJSON_ASSERT(IsArray());
         RAPIDJSON_ASSERT(index < data_.a.size);
         return GetElementsPointer()[index];
     }
-    const GenericValue& operator[](SizeType index) const { return const_cast<GenericValue&>(*this)[index]; }
+    const GenericValue& operator[](SizeType index) const
+    {
+        return const_cast<GenericValue&>(*this)[index];
+    }
 
     //! Element iterator
     /*! \pre IsArray() == true */
-    ValueIterator Begin() { RAPIDJSON_ASSERT(IsArray()); return GetElementsPointer(); }
+    ValueIterator Begin()
+    {
+        RAPIDJSON_ASSERT(IsArray());
+        return GetElementsPointer();
+    }
     //! \em Past-the-end element iterator
     /*! \pre IsArray() == true */
-    ValueIterator End() { RAPIDJSON_ASSERT(IsArray()); return GetElementsPointer() + data_.a.size; }
+    ValueIterator End()
+    {
+        RAPIDJSON_ASSERT(IsArray());
+        return GetElementsPointer() + data_.a.size;
+    }
     //! Constant element iterator
     /*! \pre IsArray() == true */
     ConstValueIterator Begin() const { return const_cast<GenericValue&>(*this).Begin(); }
@@ -1687,14 +2081,19 @@ public:
 
     //! Request the array to have enough capacity to store elements.
     /*! \param newCapacity  The capacity that the array at least need to have.
-        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
-        \return The value itself for fluent API.
-        \note Linear time complexity.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used
+       before. Commonly use GenericDocument::GetAllocator(). \return The value itself for fluent
+       API. \note Linear time complexity.
     */
-    GenericValue& Reserve(SizeType newCapacity, Allocator &allocator) {
+    GenericValue& Reserve(SizeType newCapacity, Allocator& allocator)
+    {
         RAPIDJSON_ASSERT(IsArray());
-        if (newCapacity > data_.a.capacity) {
-            SetElementsPointer(reinterpret_cast<GenericValue*>(allocator.Realloc(GetElementsPointer(), data_.a.capacity * sizeof(GenericValue), newCapacity * sizeof(GenericValue))));
+        if(newCapacity > data_.a.capacity)
+        {
+            SetElementsPointer(reinterpret_cast<GenericValue*>(
+                allocator.Realloc(GetElementsPointer(),
+                                  data_.a.capacity * sizeof(GenericValue),
+                                  newCapacity * sizeof(GenericValue))));
             data_.a.capacity = newCapacity;
         }
         return *this;
@@ -1702,48 +2101,51 @@ public:
 
     //! Append a GenericValue at the end of the array.
     /*! \param value        Value to be appended.
-        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
-        \pre IsArray() == true
-        \post value.IsNull() == true
-        \return The value itself for fluent API.
-        \note The ownership of \c value will be transferred to this array on success.
-        \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient.
-        \note Amortized constant time complexity.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used
+       before. Commonly use GenericDocument::GetAllocator(). \pre IsArray() == true \post
+       value.IsNull() == true \return The value itself for fluent API. \note The ownership of \c
+       value will be transferred to this array on success. \note If the number of elements to be
+       appended is known, calls Reserve() once first may be more efficient. \note Amortized constant
+       time complexity.
     */
-    GenericValue& PushBack(GenericValue& value, Allocator& allocator) {
+    GenericValue& PushBack(GenericValue& value, Allocator& allocator)
+    {
         RAPIDJSON_ASSERT(IsArray());
-        if (data_.a.size >= data_.a.capacity)
-            Reserve(data_.a.capacity == 0 ? kDefaultArrayCapacity : (data_.a.capacity + (data_.a.capacity + 1) / 2), allocator);
+        if(data_.a.size >= data_.a.capacity)
+            Reserve(data_.a.capacity == 0 ? kDefaultArrayCapacity
+                                          : (data_.a.capacity + (data_.a.capacity + 1) / 2),
+                    allocator);
         GetElementsPointer()[data_.a.size++].RawAssign(value);
         return *this;
     }
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
-    GenericValue& PushBack(GenericValue&& value, Allocator& allocator) {
+    GenericValue& PushBack(GenericValue&& value, Allocator& allocator)
+    {
         return PushBack(value, allocator);
     }
 #endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
 
     //! Append a constant string reference at the end of the array.
     /*! \param value        Constant string reference to be appended.
-        \param allocator    Allocator for reallocating memory. It must be the same one used previously. Commonly use GenericDocument::GetAllocator().
-        \pre IsArray() == true
-        \return The value itself for fluent API.
-        \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient.
-        \note Amortized constant time complexity.
-        \see GenericStringRef
+        \param allocator    Allocator for reallocating memory. It must be the same one used
+       previously. Commonly use GenericDocument::GetAllocator(). \pre IsArray() == true \return The
+       value itself for fluent API. \note If the number of elements to be appended is known, calls
+       Reserve() once first may be more efficient. \note Amortized constant time complexity. \see
+       GenericStringRef
     */
-    GenericValue& PushBack(StringRefType value, Allocator& allocator) {
+    GenericValue& PushBack(StringRefType value, Allocator& allocator)
+    {
         return (*this).template PushBack<StringRefType>(value, allocator);
     }
 
     //! Append a primitive value at the end of the array.
     /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t
         \param value Value of primitive type T to be appended.
-        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
-        \pre IsArray() == true
-        \return The value itself for fluent API.
-        \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used
+       before. Commonly use GenericDocument::GetAllocator(). \pre IsArray() == true \return The
+       value itself for fluent API. \note If the number of elements to be appended is known, calls
+       Reserve() once first may be more efficient.
 
         \note The source type \c T explicitly disallows all pointer types,
             especially (\c const) \ref Ch*.  This helps avoiding implicitly
@@ -1755,8 +2157,10 @@ public:
         \note Amortized constant time complexity.
     */
     template <typename T>
-    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericValue&))
-    PushBack(T value, Allocator& allocator) {
+    RAPIDJSON_DISABLEIF_RETURN(
+        (internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T>>), (GenericValue&))
+    PushBack(T value, Allocator& allocator)
+    {
         GenericValue v(value);
         return PushBack(v, allocator);
     }
@@ -1765,7 +2169,8 @@ public:
     /*!
         \note Constant time complexity.
     */
-    GenericValue& PopBack() {
+    GenericValue& PopBack()
+    {
         RAPIDJSON_ASSERT(IsArray());
         RAPIDJSON_ASSERT(!Empty());
         GetElementsPointer()[--data_.a.size].~GenericValue();
@@ -1776,12 +2181,10 @@ public:
     /*!
         \param pos iterator to the element to remove
         \pre IsArray() == true && \ref Begin() <= \c pos < \ref End()
-        \return Iterator following the removed element. If the iterator pos refers to the last element, the End() iterator is returned.
-        \note Linear time complexity.
+        \return Iterator following the removed element. If the iterator pos refers to the last
+       element, the End() iterator is returned. \note Linear time complexity.
     */
-    ValueIterator Erase(ConstValueIterator pos) {
-        return Erase(pos, pos + 1);
-    }
+    ValueIterator Erase(ConstValueIterator pos) { return Erase(pos, pos + 1); }
 
     //! Remove elements in the range [first, last) of the array.
     /*!
@@ -1791,7 +2194,8 @@ public:
         \return Iterator following the last removed element.
         \note Linear time complexity.
     */
-    ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) {
+    ValueIterator Erase(ConstValueIterator first, ConstValueIterator last)
+    {
         RAPIDJSON_ASSERT(IsArray());
         RAPIDJSON_ASSERT(data_.a.size > 0);
         RAPIDJSON_ASSERT(GetElementsPointer() != 0);
@@ -1799,116 +2203,209 @@ public:
         RAPIDJSON_ASSERT(first <= last);
         RAPIDJSON_ASSERT(last <= End());
         ValueIterator pos = Begin() + (first - Begin());
-        for (ValueIterator itr = pos; itr != last; ++itr)
+        for(ValueIterator itr = pos; itr != last; ++itr)
             itr->~GenericValue();
-        std::memmove(static_cast<void*>(pos), last, static_cast<size_t>(End() - last) * sizeof(GenericValue));
+        std::memmove(static_cast<void*>(pos),
+                     last,
+                     static_cast<size_t>(End() - last) * sizeof(GenericValue));
         data_.a.size -= static_cast<SizeType>(last - first);
         return pos;
     }
 
-    Array GetArray() { RAPIDJSON_ASSERT(IsArray()); return Array(*this); }
-    ConstArray GetArray() const { RAPIDJSON_ASSERT(IsArray()); return ConstArray(*this); }
+    Array GetArray()
+    {
+        RAPIDJSON_ASSERT(IsArray());
+        return Array(*this);
+    }
+    ConstArray GetArray() const
+    {
+        RAPIDJSON_ASSERT(IsArray());
+        return ConstArray(*this);
+    }
 
     //@}
 
     //!@name Number
     //@{
 
-    int GetInt() const          { RAPIDJSON_ASSERT(data_.f.flags & kIntFlag);   return data_.n.i.i;   }
-    unsigned GetUint() const    { RAPIDJSON_ASSERT(data_.f.flags & kUintFlag);  return data_.n.u.u;   }
-    int64_t GetInt64() const    { RAPIDJSON_ASSERT(data_.f.flags & kInt64Flag); return data_.n.i64; }
-    uint64_t GetUint64() const  { RAPIDJSON_ASSERT(data_.f.flags & kUint64Flag); return data_.n.u64; }
+    int GetInt() const
+    {
+        RAPIDJSON_ASSERT(data_.f.flags & kIntFlag);
+        return data_.n.i.i;
+    }
+    unsigned GetUint() const
+    {
+        RAPIDJSON_ASSERT(data_.f.flags & kUintFlag);
+        return data_.n.u.u;
+    }
+    int64_t GetInt64() const
+    {
+        RAPIDJSON_ASSERT(data_.f.flags & kInt64Flag);
+        return data_.n.i64;
+    }
+    uint64_t GetUint64() const
+    {
+        RAPIDJSON_ASSERT(data_.f.flags & kUint64Flag);
+        return data_.n.u64;
+    }
 
     //! Get the value as double type.
-    /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessDouble() to check whether the converison is lossless.
-    */
-    double GetDouble() const {
+    /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessDouble()
+     * to check whether the converison is lossless.
+     */
+    double GetDouble() const
+    {
         RAPIDJSON_ASSERT(IsNumber());
-        if ((data_.f.flags & kDoubleFlag) != 0)                return data_.n.d;   // exact type, no conversion.
-        if ((data_.f.flags & kIntFlag) != 0)                   return data_.n.i.i; // int -> double
-        if ((data_.f.flags & kUintFlag) != 0)                  return data_.n.u.u; // unsigned -> double
-        if ((data_.f.flags & kInt64Flag) != 0)                 return static_cast<double>(data_.n.i64); // int64_t -> double (may lose precision)
-        RAPIDJSON_ASSERT((data_.f.flags & kUint64Flag) != 0);  return static_cast<double>(data_.n.u64); // uint64_t -> double (may lose precision)
+        if((data_.f.flags & kDoubleFlag) != 0)
+            return data_.n.d; // exact type, no conversion.
+        if((data_.f.flags & kIntFlag) != 0)
+            return data_.n.i.i; // int -> double
+        if((data_.f.flags & kUintFlag) != 0)
+            return data_.n.u.u; // unsigned -> double
+        if((data_.f.flags & kInt64Flag) != 0)
+            return static_cast<double>(data_.n.i64); // int64_t -> double (may lose precision)
+        RAPIDJSON_ASSERT((data_.f.flags & kUint64Flag) != 0);
+        return static_cast<double>(data_.n.u64); // uint64_t -> double (may lose precision)
     }
 
     //! Get the value as float type.
-    /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessFloat() to check whether the converison is lossless.
-    */
-    float GetFloat() const {
-        return static_cast<float>(GetDouble());
-    }
+    /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessFloat()
+     * to check whether the converison is lossless.
+     */
+    float GetFloat() const { return static_cast<float>(GetDouble()); }
 
-    GenericValue& SetInt(int i)             { this->~GenericValue(); new (this) GenericValue(i);    return *this; }
-    GenericValue& SetUint(unsigned u)       { this->~GenericValue(); new (this) GenericValue(u);    return *this; }
-    GenericValue& SetInt64(int64_t i64)     { this->~GenericValue(); new (this) GenericValue(i64);  return *this; }
-    GenericValue& SetUint64(uint64_t u64)   { this->~GenericValue(); new (this) GenericValue(u64);  return *this; }
-    GenericValue& SetDouble(double d)       { this->~GenericValue(); new (this) GenericValue(d);    return *this; }
-    GenericValue& SetFloat(float f)         { this->~GenericValue(); new (this) GenericValue(static_cast<double>(f)); return *this; }
+    GenericValue& SetInt(int i)
+    {
+        this->~GenericValue();
+        new(this) GenericValue(i);
+        return *this;
+    }
+    GenericValue& SetUint(unsigned u)
+    {
+        this->~GenericValue();
+        new(this) GenericValue(u);
+        return *this;
+    }
+    GenericValue& SetInt64(int64_t i64)
+    {
+        this->~GenericValue();
+        new(this) GenericValue(i64);
+        return *this;
+    }
+    GenericValue& SetUint64(uint64_t u64)
+    {
+        this->~GenericValue();
+        new(this) GenericValue(u64);
+        return *this;
+    }
+    GenericValue& SetDouble(double d)
+    {
+        this->~GenericValue();
+        new(this) GenericValue(d);
+        return *this;
+    }
+    GenericValue& SetFloat(float f)
+    {
+        this->~GenericValue();
+        new(this) GenericValue(static_cast<double>(f));
+        return *this;
+    }
 
     //@}
 
     //!@name String
     //@{
 
-    const Ch* GetString() const { RAPIDJSON_ASSERT(IsString()); return DataString(data_); }
+    const Ch* GetString() const
+    {
+        RAPIDJSON_ASSERT(IsString());
+        return DataString(data_);
+    }
 
     //! Get the length of string.
-    /*! Since rapidjson permits "\\u0000" in the json string, strlen(v.GetString()) may not equal to v.GetStringLength().
-    */
-    SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return DataStringLength(data_); }
+    /*! Since rapidjson permits "\\u0000" in the json string, strlen(v.GetString()) may not equal to
+     * v.GetStringLength().
+     */
+    SizeType GetStringLength() const
+    {
+        RAPIDJSON_ASSERT(IsString());
+        return DataStringLength(data_);
+    }
 
     //! Set this value as a string without copying source string.
-    /*! This version has better performance with supplied length, and also support string containing null character.
-        \param s source string pointer. 
-        \param length The length of source string, excluding the trailing null terminator.
-        \return The value itself for fluent API.
-        \post IsString() == true && GetString() == s && GetStringLength() == length
-        \see SetString(StringRefType)
+    /*! This version has better performance with supplied length, and also support string containing
+       null character. \param s source string pointer. \param length The length of source string,
+       excluding the trailing null terminator. \return The value itself for fluent API. \post
+       IsString() == true && GetString() == s && GetStringLength() == length \see
+       SetString(StringRefType)
     */
-    GenericValue& SetString(const Ch* s, SizeType length) { return SetString(StringRef(s, length)); }
+    GenericValue& SetString(const Ch* s, SizeType length)
+    {
+        return SetString(StringRef(s, length));
+    }
 
     //! Set this value as a string without copying source string.
     /*! \param s source string reference
         \return The value itself for fluent API.
         \post IsString() == true && GetString() == s && GetStringLength() == s.length
     */
-    GenericValue& SetString(StringRefType s) { this->~GenericValue(); SetStringRaw(s); return *this; }
+    GenericValue& SetString(StringRefType s)
+    {
+        this->~GenericValue();
+        SetStringRaw(s);
+        return *this;
+    }
 
     //! Set this value as a string by copying from source string.
-    /*! This version has better performance with supplied length, and also support string containing null character.
-        \param s source string. 
-        \param length The length of source string, excluding the trailing null terminator.
-        \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator().
-        \return The value itself for fluent API.
-        \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length
+    /*! This version has better performance with supplied length, and also support string containing
+       null character. \param s source string. \param length The length of source string, excluding
+       the trailing null terminator. \param allocator Allocator for allocating copied buffer.
+       Commonly use GenericDocument::GetAllocator(). \return The value itself for fluent API. \post
+       IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() ==
+       length
     */
-    GenericValue& SetString(const Ch* s, SizeType length, Allocator& allocator) { return SetString(StringRef(s, length), allocator); }
+    GenericValue& SetString(const Ch* s, SizeType length, Allocator& allocator)
+    {
+        return SetString(StringRef(s, length), allocator);
+    }
 
     //! Set this value as a string by copying from source string.
-    /*! \param s source string. 
-        \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator().
-        \return The value itself for fluent API.
-        \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length
+    /*! \param s source string.
+        \param allocator Allocator for allocating copied buffer. Commonly use
+       GenericDocument::GetAllocator(). \return The value itself for fluent API. \post IsString() ==
+       true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length
     */
-    GenericValue& SetString(const Ch* s, Allocator& allocator) { return SetString(StringRef(s), allocator); }
+    GenericValue& SetString(const Ch* s, Allocator& allocator)
+    {
+        return SetString(StringRef(s), allocator);
+    }
 
     //! Set this value as a string by copying from source string.
     /*! \param s source string reference
-        \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator().
-        \return The value itself for fluent API.
-        \post IsString() == true && GetString() != s.s && strcmp(GetString(),s) == 0 && GetStringLength() == length
+        \param allocator Allocator for allocating copied buffer. Commonly use
+       GenericDocument::GetAllocator(). \return The value itself for fluent API. \post IsString() ==
+       true && GetString() != s.s && strcmp(GetString(),s) == 0 && GetStringLength() == length
     */
-    GenericValue& SetString(StringRefType s, Allocator& allocator) { this->~GenericValue(); SetStringRaw(s, allocator); return *this; }
+    GenericValue& SetString(StringRefType s, Allocator& allocator)
+    {
+        this->~GenericValue();
+        SetStringRaw(s, allocator);
+        return *this;
+    }
 
 #if RAPIDJSON_HAS_STDSTRING
     //! Set this value as a string by copying from source string.
     /*! \param s source string.
-        \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator().
-        \return The value itself for fluent API.
-        \post IsString() == true && GetString() != s.data() && strcmp(GetString(),s.data() == 0 && GetStringLength() == s.size()
-        \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
+        \param allocator Allocator for allocating copied buffer. Commonly use
+       GenericDocument::GetAllocator(). \return The value itself for fluent API. \post IsString() ==
+       true && GetString() != s.data() && strcmp(GetString(),s.data() == 0 && GetStringLength() ==
+       s.size() \note Requires the definition of the preprocessor symbol \ref
+       RAPIDJSON_HAS_STDSTRING.
     */
-    GenericValue& SetString(const std::basic_string<Ch>& s, Allocator& allocator) { return SetString(StringRef(s), allocator); }
+    GenericValue& SetString(const std::basic_string<Ch>& s, Allocator& allocator)
+    {
+        return SetString(StringRef(s), allocator);
+    }
 #endif
 
     //@}
@@ -1918,22 +2415,38 @@ public:
 
     //! Templated version for checking whether this value is type T.
     /*!
-        \tparam T Either \c bool, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c float, \c const \c char*, \c std::basic_string<Ch>
+        \tparam T Either \c bool, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c float,
+       \c const \c char*, \c std::basic_string<Ch>
     */
     template <typename T>
-    bool Is() const { return internal::TypeHelper<ValueType, T>::Is(*this); }
+    bool Is() const
+    {
+        return internal::TypeHelper<ValueType, T>::Is(*this);
+    }
 
     template <typename T>
-    T Get() const { return internal::TypeHelper<ValueType, T>::Get(*this); }
+    T Get() const
+    {
+        return internal::TypeHelper<ValueType, T>::Get(*this);
+    }
 
     template <typename T>
-    T Get() { return internal::TypeHelper<ValueType, T>::Get(*this); }
+    T Get()
+    {
+        return internal::TypeHelper<ValueType, T>::Get(*this);
+    }
 
-    template<typename T>
-    ValueType& Set(const T& data) { return internal::TypeHelper<ValueType, T>::Set(*this, data); }
+    template <typename T>
+    ValueType& Set(const T& data)
+    {
+        return internal::TypeHelper<ValueType, T>::Set(*this, data);
+    }
 
-    template<typename T>
-    ValueType& Set(const T& data, AllocatorType& allocator) { return internal::TypeHelper<ValueType, T>::Set(*this, data, allocator); }
+    template <typename T>
+    ValueType& Set(const T& data, AllocatorType& allocator)
+    {
+        return internal::TypeHelper<ValueType, T>::Set(*this, data, allocator);
+    }
 
     //@}
 
@@ -1945,100 +2458,125 @@ public:
         \param handler An object implementing concept Handler.
     */
     template <typename Handler>
-    bool Accept(Handler& handler) const {
-        switch(GetType()) {
-        case kNullType:     return handler.Null();
-        case kFalseType:    return handler.Bool(false);
-        case kTrueType:     return handler.Bool(true);
+    bool Accept(Handler& handler) const
+    {
+        switch(GetType())
+        {
+        case kNullType: return handler.Null();
+        case kFalseType: return handler.Bool(false);
+        case kTrueType: return handler.Bool(true);
 
         case kObjectType:
-            if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
+            if(RAPIDJSON_UNLIKELY(!handler.StartObject()))
                 return false;
-            for (ConstMemberIterator m = MemberBegin(); m != MemberEnd(); ++m) {
-                RAPIDJSON_ASSERT(m->name.IsString()); // User may change the type of name by MemberIterator.
-                if (RAPIDJSON_UNLIKELY(!handler.Key(m->name.GetString(), m->name.GetStringLength(), (m->name.data_.f.flags & kCopyFlag) != 0)))
+            for(ConstMemberIterator m = MemberBegin(); m != MemberEnd(); ++m)
+            {
+                RAPIDJSON_ASSERT(
+                    m->name.IsString()); // User may change the type of name by MemberIterator.
+                if(RAPIDJSON_UNLIKELY(!handler.Key(m->name.GetString(),
+                                                   m->name.GetStringLength(),
+                                                   (m->name.data_.f.flags & kCopyFlag) != 0)))
                     return false;
-                if (RAPIDJSON_UNLIKELY(!m->value.Accept(handler)))
+                if(RAPIDJSON_UNLIKELY(!m->value.Accept(handler)))
                     return false;
             }
             return handler.EndObject(data_.o.size);
 
         case kArrayType:
-            if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
+            if(RAPIDJSON_UNLIKELY(!handler.StartArray()))
                 return false;
-            for (ConstValueIterator v = Begin(); v != End(); ++v)
-                if (RAPIDJSON_UNLIKELY(!v->Accept(handler)))
+            for(ConstValueIterator v = Begin(); v != End(); ++v)
+                if(RAPIDJSON_UNLIKELY(!v->Accept(handler)))
                     return false;
             return handler.EndArray(data_.a.size);
-    
+
         case kStringType:
             return handler.String(GetString(), GetStringLength(), (data_.f.flags & kCopyFlag) != 0);
-    
+
         default:
             RAPIDJSON_ASSERT(GetType() == kNumberType);
-            if (IsDouble())         return handler.Double(data_.n.d);
-            else if (IsInt())       return handler.Int(data_.n.i.i);
-            else if (IsUint())      return handler.Uint(data_.n.u.u);
-            else if (IsInt64())     return handler.Int64(data_.n.i64);
-            else                    return handler.Uint64(data_.n.u64);
+            if(IsDouble())
+                return handler.Double(data_.n.d);
+            else if(IsInt())
+                return handler.Int(data_.n.i.i);
+            else if(IsUint())
+                return handler.Uint(data_.n.u.u);
+            else if(IsInt64())
+                return handler.Int64(data_.n.i64);
+            else
+                return handler.Uint64(data_.n.u64);
         }
     }
 
-private:
-    template <typename, typename> friend class GenericValue;
-    template <typename, typename, typename> friend class GenericDocument;
+    private:
+    template <typename, typename>
+    friend class GenericValue;
+    template <typename, typename, typename>
+    friend class GenericDocument;
 
-    enum {
-        kBoolFlag       = 0x0008,
-        kNumberFlag     = 0x0010,
-        kIntFlag        = 0x0020,
-        kUintFlag       = 0x0040,
-        kInt64Flag      = 0x0080,
-        kUint64Flag     = 0x0100,
-        kDoubleFlag     = 0x0200,
-        kStringFlag     = 0x0400,
-        kCopyFlag       = 0x0800,
-        kInlineStrFlag  = 0x1000,
+    enum
+    {
+        kBoolFlag      = 0x0008,
+        kNumberFlag    = 0x0010,
+        kIntFlag       = 0x0020,
+        kUintFlag      = 0x0040,
+        kInt64Flag     = 0x0080,
+        kUint64Flag    = 0x0100,
+        kDoubleFlag    = 0x0200,
+        kStringFlag    = 0x0400,
+        kCopyFlag      = 0x0800,
+        kInlineStrFlag = 0x1000,
 
         // Initial flags of different types.
         kNullFlag = kNullType,
-        // These casts are added to suppress the warning on MSVC about bitwise operations between enums of different types.
-        kTrueFlag = static_cast<int>(kTrueType) | static_cast<int>(kBoolFlag),
+        // These casts are added to suppress the warning on MSVC about bitwise operations between
+        // enums of different types.
+        kTrueFlag  = static_cast<int>(kTrueType) | static_cast<int>(kBoolFlag),
         kFalseFlag = static_cast<int>(kFalseType) | static_cast<int>(kBoolFlag),
-        kNumberIntFlag = static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kIntFlag | kInt64Flag),
-        kNumberUintFlag = static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kUintFlag | kUint64Flag | kInt64Flag),
-        kNumberInt64Flag = static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kInt64Flag),
-        kNumberUint64Flag = static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kUint64Flag),
-        kNumberDoubleFlag = static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kDoubleFlag),
-        kNumberAnyFlag = static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kIntFlag | kInt64Flag | kUintFlag | kUint64Flag | kDoubleFlag),
+        kNumberIntFlag =
+            static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kIntFlag | kInt64Flag),
+        kNumberUintFlag = static_cast<int>(kNumberType) |
+                          static_cast<int>(kNumberFlag | kUintFlag | kUint64Flag | kInt64Flag),
+        kNumberInt64Flag =
+            static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kInt64Flag),
+        kNumberUint64Flag =
+            static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kUint64Flag),
+        kNumberDoubleFlag =
+            static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kDoubleFlag),
+        kNumberAnyFlag =
+            static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kIntFlag | kInt64Flag |
+                                                             kUintFlag | kUint64Flag | kDoubleFlag),
         kConstStringFlag = static_cast<int>(kStringType) | static_cast<int>(kStringFlag),
         kCopyStringFlag = static_cast<int>(kStringType) | static_cast<int>(kStringFlag | kCopyFlag),
-        kShortStringFlag = static_cast<int>(kStringType) | static_cast<int>(kStringFlag | kCopyFlag | kInlineStrFlag),
+        kShortStringFlag = static_cast<int>(kStringType) |
+                           static_cast<int>(kStringFlag | kCopyFlag | kInlineStrFlag),
         kObjectFlag = kObjectType,
-        kArrayFlag = kArrayType,
+        kArrayFlag  = kArrayType,
 
         kTypeMask = 0x07
     };
 
-    static const SizeType kDefaultArrayCapacity = RAPIDJSON_VALUE_DEFAULT_ARRAY_CAPACITY;
+    static const SizeType kDefaultArrayCapacity  = RAPIDJSON_VALUE_DEFAULT_ARRAY_CAPACITY;
     static const SizeType kDefaultObjectCapacity = RAPIDJSON_VALUE_DEFAULT_OBJECT_CAPACITY;
 
-    struct Flag {
+    struct Flag
+    {
 #if RAPIDJSON_48BITPOINTER_OPTIMIZATION
-        char payload[sizeof(SizeType) * 2 + 6];     // 2 x SizeType + lower 48-bit pointer
+        char payload[sizeof(SizeType) * 2 + 6]; // 2 x SizeType + lower 48-bit pointer
 #elif RAPIDJSON_64BIT
         char payload[sizeof(SizeType) * 2 + sizeof(void*) + 6]; // 6 padding bytes
 #else
-        char payload[sizeof(SizeType) * 2 + sizeof(void*) + 2]; // 2 padding bytes
+            char payload[sizeof(SizeType) * 2 + sizeof(void*) + 2]; // 2 padding bytes
 #endif
         uint16_t flags;
     };
 
-    struct String {
+    struct String
+    {
         SizeType length;
-        SizeType hashcode;  //!< reserved
+        SizeType hashcode; //!< reserved
         const Ch* str;
-    };  // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
+    }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
 
     // implementation detail: ShortString can represent zero-terminated strings up to MaxSize chars
     // (excluding the terminating zero) and store a value to determine the length of the contained
@@ -2047,95 +2585,136 @@ private:
     // the string terminator as well. For getting the string length back from that value just use
     // "MaxSize - str[LenPos]".
     // This allows to store 13-chars strings in 32-bit mode, 21-chars strings in 64-bit mode,
-    // 13-chars strings for RAPIDJSON_48BITPOINTER_OPTIMIZATION=1 inline (for `UTF8`-encoded strings).
-    struct ShortString {
-        enum { MaxChars = sizeof(static_cast<Flag*>(0)->payload) / sizeof(Ch), MaxSize = MaxChars - 1, LenPos = MaxSize };
+    // 13-chars strings for RAPIDJSON_48BITPOINTER_OPTIMIZATION=1 inline (for `UTF8`-encoded
+    // strings).
+    struct ShortString
+    {
+        enum
+        {
+            MaxChars = sizeof(static_cast<Flag*>(0)->payload) / sizeof(Ch),
+            MaxSize  = MaxChars - 1,
+            LenPos   = MaxSize
+        };
         Ch str[MaxChars];
 
-        inline static bool Usable(SizeType len) { return                       (MaxSize >= len); }
-        inline void     SetLength(SizeType len) { str[LenPos] = static_cast<Ch>(MaxSize -  len); }
-        inline SizeType GetLength() const       { return  static_cast<SizeType>(MaxSize -  str[LenPos]); }
-    };  // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
+        inline static bool Usable(SizeType len) { return (MaxSize >= len); }
+        inline void SetLength(SizeType len) { str[LenPos] = static_cast<Ch>(MaxSize - len); }
+        inline SizeType GetLength() const { return static_cast<SizeType>(MaxSize - str[LenPos]); }
+    }; // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit
+       // mode
 
     // By using proper binary layout, retrieval of different integer types do not need conversions.
-    union Number {
+    union Number
+    {
 #if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN
-        struct I {
+        struct I
+        {
             int i;
             char padding[4];
-        }i;
-        struct U {
+        } i;
+        struct U
+        {
             unsigned u;
             char padding2[4];
-        }u;
+        } u;
 #else
-        struct I {
+        struct I
+        {
             char padding[4];
             int i;
-        }i;
-        struct U {
+        } i;
+        struct U
+        {
             char padding2[4];
             unsigned u;
-        }u;
+        } u;
 #endif
         int64_t i64;
         uint64_t u64;
         double d;
-    };  // 8 bytes
+    }; // 8 bytes
 
-    struct ObjectData {
+    struct ObjectData
+    {
         SizeType size;
         SizeType capacity;
         Member* members;
-    };  // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
+    }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
 
-    struct ArrayData {
+    struct ArrayData
+    {
         SizeType size;
         SizeType capacity;
         GenericValue* elements;
-    };  // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
+    }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
 
-    union Data {
+    union Data
+    {
         String s;
         ShortString ss;
         Number n;
         ObjectData o;
         ArrayData a;
         Flag f;
-    };  // 16 bytes in 32-bit mode, 24 bytes in 64-bit mode, 16 bytes in 64-bit with RAPIDJSON_48BITPOINTER_OPTIMIZATION
+    }; // 16 bytes in 32-bit mode, 24 bytes in 64-bit mode, 16 bytes in 64-bit with
+       // RAPIDJSON_48BITPOINTER_OPTIMIZATION
 
-    static RAPIDJSON_FORCEINLINE const Ch* DataString(const Data& data) {
+    static RAPIDJSON_FORCEINLINE const Ch* DataString(const Data& data)
+    {
         return (data.f.flags & kInlineStrFlag) ? data.ss.str : RAPIDJSON_GETPOINTER(Ch, data.s.str);
     }
-    static RAPIDJSON_FORCEINLINE SizeType DataStringLength(const Data& data) {
+    static RAPIDJSON_FORCEINLINE SizeType DataStringLength(const Data& data)
+    {
         return (data.f.flags & kInlineStrFlag) ? data.ss.GetLength() : data.s.length;
     }
 
-    RAPIDJSON_FORCEINLINE const Ch* GetStringPointer() const { return RAPIDJSON_GETPOINTER(Ch, data_.s.str); }
-    RAPIDJSON_FORCEINLINE const Ch* SetStringPointer(const Ch* str) { return RAPIDJSON_SETPOINTER(Ch, data_.s.str, str); }
-    RAPIDJSON_FORCEINLINE GenericValue* GetElementsPointer() const { return RAPIDJSON_GETPOINTER(GenericValue, data_.a.elements); }
-    RAPIDJSON_FORCEINLINE GenericValue* SetElementsPointer(GenericValue* elements) { return RAPIDJSON_SETPOINTER(GenericValue, data_.a.elements, elements); }
-    RAPIDJSON_FORCEINLINE Member* GetMembersPointer() const { return RAPIDJSON_GETPOINTER(Member, data_.o.members); }
-    RAPIDJSON_FORCEINLINE Member* SetMembersPointer(Member* members) { return RAPIDJSON_SETPOINTER(Member, data_.o.members, members); }
+    RAPIDJSON_FORCEINLINE const Ch* GetStringPointer() const
+    {
+        return RAPIDJSON_GETPOINTER(Ch, data_.s.str);
+    }
+    RAPIDJSON_FORCEINLINE const Ch* SetStringPointer(const Ch* str)
+    {
+        return RAPIDJSON_SETPOINTER(Ch, data_.s.str, str);
+    }
+    RAPIDJSON_FORCEINLINE GenericValue* GetElementsPointer() const
+    {
+        return RAPIDJSON_GETPOINTER(GenericValue, data_.a.elements);
+    }
+    RAPIDJSON_FORCEINLINE GenericValue* SetElementsPointer(GenericValue* elements)
+    {
+        return RAPIDJSON_SETPOINTER(GenericValue, data_.a.elements, elements);
+    }
+    RAPIDJSON_FORCEINLINE Member* GetMembersPointer() const
+    {
+        return RAPIDJSON_GETPOINTER(Member, data_.o.members);
+    }
+    RAPIDJSON_FORCEINLINE Member* SetMembersPointer(Member* members)
+    {
+        return RAPIDJSON_SETPOINTER(Member, data_.o.members, members);
+    }
 
 #if RAPIDJSON_USE_MEMBERSMAP
 
-    struct MapTraits {
-        struct Less {
-            bool operator()(const Data& s1, const Data& s2) const {
+    struct MapTraits
+    {
+        struct Less
+        {
+            bool operator()(const Data& s1, const Data& s2) const
+            {
                 SizeType n1 = DataStringLength(s1), n2 = DataStringLength(s2);
-                int cmp = std::memcmp(DataString(s1), DataString(s2), sizeof(Ch) * (n1 < n2 ? n1 : n2));
+                int cmp =
+                    std::memcmp(DataString(s1), DataString(s2), sizeof(Ch) * (n1 < n2 ? n1 : n2));
                 return cmp < 0 || (cmp == 0 && n1 < n2);
             }
         };
         typedef std::pair<const Data, SizeType> Pair;
-        typedef std::multimap<Data, SizeType, Less, StdAllocator<Pair, Allocator> > Map;
+        typedef std::multimap<Data, SizeType, Less, StdAllocator<Pair, Allocator>> Map;
         typedef typename Map::iterator Iterator;
     };
-    typedef typename MapTraits::Map         Map;
-    typedef typename MapTraits::Less        MapLess;
-    typedef typename MapTraits::Pair        MapPair;
-    typedef typename MapTraits::Iterator    MapIterator;
+    typedef typename MapTraits::Map Map;
+    typedef typename MapTraits::Less MapLess;
+    typedef typename MapTraits::Pair MapPair;
+    typedef typename MapTraits::Iterator MapIterator;
 
     //
     // Layout of the members' map/array, re(al)located according to the needed capacity:
@@ -2145,32 +2724,35 @@ private:
     // (where <> stands for the RAPIDJSON_ALIGN-ment, if needed)
     //
 
-    static RAPIDJSON_FORCEINLINE size_t GetMapLayoutSize(SizeType capacity) {
-        return RAPIDJSON_ALIGN(sizeof(Map*)) +
-               RAPIDJSON_ALIGN(sizeof(SizeType)) +
-               RAPIDJSON_ALIGN(capacity * sizeof(Member)) +
-               capacity * sizeof(MapIterator);
+    static RAPIDJSON_FORCEINLINE size_t GetMapLayoutSize(SizeType capacity)
+    {
+        return RAPIDJSON_ALIGN(sizeof(Map*)) + RAPIDJSON_ALIGN(sizeof(SizeType)) +
+               RAPIDJSON_ALIGN(capacity * sizeof(Member)) + capacity * sizeof(MapIterator);
     }
 
-    static RAPIDJSON_FORCEINLINE SizeType &GetMapCapacity(Map* &map) {
+    static RAPIDJSON_FORCEINLINE SizeType& GetMapCapacity(Map*& map)
+    {
         return *reinterpret_cast<SizeType*>(reinterpret_cast<uintptr_t>(&map) +
                                             RAPIDJSON_ALIGN(sizeof(Map*)));
     }
 
-    static RAPIDJSON_FORCEINLINE Member* GetMapMembers(Map* &map) {
+    static RAPIDJSON_FORCEINLINE Member* GetMapMembers(Map*& map)
+    {
         return reinterpret_cast<Member*>(reinterpret_cast<uintptr_t>(&map) +
                                          RAPIDJSON_ALIGN(sizeof(Map*)) +
                                          RAPIDJSON_ALIGN(sizeof(SizeType)));
     }
 
-    static RAPIDJSON_FORCEINLINE MapIterator* GetMapIterators(Map* &map) {
-        return reinterpret_cast<MapIterator*>(reinterpret_cast<uintptr_t>(&map) +
-                                              RAPIDJSON_ALIGN(sizeof(Map*)) +
-                                              RAPIDJSON_ALIGN(sizeof(SizeType)) +
-                                              RAPIDJSON_ALIGN(GetMapCapacity(map) * sizeof(Member)));
+    static RAPIDJSON_FORCEINLINE MapIterator* GetMapIterators(Map*& map)
+    {
+        return reinterpret_cast<MapIterator*>(
+            reinterpret_cast<uintptr_t>(&map) + RAPIDJSON_ALIGN(sizeof(Map*)) +
+            RAPIDJSON_ALIGN(sizeof(SizeType)) +
+            RAPIDJSON_ALIGN(GetMapCapacity(map) * sizeof(Member)));
     }
 
-    static RAPIDJSON_FORCEINLINE Map* &GetMap(Member* members) {
+    static RAPIDJSON_FORCEINLINE Map*& GetMap(Member* members)
+    {
         RAPIDJSON_ASSERT(members != 0);
         return *reinterpret_cast<Map**>(reinterpret_cast<uintptr_t>(members) -
                                         RAPIDJSON_ALIGN(sizeof(SizeType)) -
@@ -2178,7 +2760,8 @@ private:
     }
 
     // Some compilers' debug mechanisms want all iterators to be destroyed, for their accounting..
-    RAPIDJSON_FORCEINLINE MapIterator DropMapIterator(MapIterator& rhs) {
+    RAPIDJSON_FORCEINLINE MapIterator DropMapIterator(MapIterator& rhs)
+    {
 #if RAPIDJSON_HAS_CXX11
         MapIterator ret = std::move(rhs);
 #else
@@ -2188,60 +2771,72 @@ private:
         return ret;
     }
 
-    Map* &DoReallocMap(Map** oldMap, SizeType newCapacity, Allocator& allocator) {
-        Map **newMap = static_cast<Map**>(allocator.Malloc(GetMapLayoutSize(newCapacity)));
+    Map*& DoReallocMap(Map** oldMap, SizeType newCapacity, Allocator& allocator)
+    {
+        Map** newMap = static_cast<Map**>(allocator.Malloc(GetMapLayoutSize(newCapacity)));
         GetMapCapacity(*newMap) = newCapacity;
-        if (!oldMap) {
-            *newMap = new (allocator.Malloc(sizeof(Map))) Map(MapLess(), allocator);
+        if(!oldMap)
+        {
+            *newMap = new(allocator.Malloc(sizeof(Map))) Map(MapLess(), allocator);
         }
-        else {
-            *newMap = *oldMap;
+        else
+        {
+            *newMap      = *oldMap;
             size_t count = (*oldMap)->size();
             std::memcpy(static_cast<void*>(GetMapMembers(*newMap)),
                         static_cast<void*>(GetMapMembers(*oldMap)),
                         count * sizeof(Member));
-            MapIterator *oldIt = GetMapIterators(*oldMap),
-                        *newIt = GetMapIterators(*newMap);
-            while (count--) {
-                new (&newIt[count]) MapIterator(DropMapIterator(oldIt[count]));
+            MapIterator *oldIt = GetMapIterators(*oldMap), *newIt = GetMapIterators(*newMap);
+            while(count--)
+            {
+                new(&newIt[count]) MapIterator(DropMapIterator(oldIt[count]));
             }
             Allocator::Free(oldMap);
         }
         return *newMap;
     }
 
-    RAPIDJSON_FORCEINLINE Member* DoAllocMembers(SizeType capacity, Allocator& allocator) {
+    RAPIDJSON_FORCEINLINE Member* DoAllocMembers(SizeType capacity, Allocator& allocator)
+    {
         return GetMapMembers(DoReallocMap(0, capacity, allocator));
     }
 
-    void DoReserveMembers(SizeType newCapacity, Allocator& allocator) {
+    void DoReserveMembers(SizeType newCapacity, Allocator& allocator)
+    {
         ObjectData& o = data_.o;
-        if (newCapacity > o.capacity) {
+        if(newCapacity > o.capacity)
+        {
             Member* oldMembers = GetMembersPointer();
-            Map **oldMap = oldMembers ? &GetMap(oldMembers) : 0,
-                *&newMap = DoReallocMap(oldMap, newCapacity, allocator);
+            Map **oldMap       = oldMembers ? &GetMap(oldMembers) : 0,
+                *&newMap       = DoReallocMap(oldMap, newCapacity, allocator);
             RAPIDJSON_SETPOINTER(Member, o.members, GetMapMembers(newMap));
             o.capacity = newCapacity;
         }
     }
 
     template <typename SourceAllocator>
-    MemberIterator DoFindMember(const GenericValue<Encoding, SourceAllocator>& name) {
-        if (Member* members = GetMembersPointer()) {
-            Map* &map = GetMap(members);
+    MemberIterator DoFindMember(const GenericValue<Encoding, SourceAllocator>& name)
+    {
+        if(Member* members = GetMembersPointer())
+        {
+            Map*& map       = GetMap(members);
             MapIterator mit = map->find(reinterpret_cast<const Data&>(name.data_));
-            if (mit != map->end()) {
+            if(mit != map->end())
+            {
                 return MemberIterator(&members[mit->second]);
             }
         }
         return MemberEnd();
     }
 
-    void DoClearMembers() {
-        if (Member* members = GetMembersPointer()) {
-            Map* &map = GetMap(members);
+    void DoClearMembers()
+    {
+        if(Member* members = GetMembersPointer())
+        {
+            Map*& map        = GetMap(members);
             MapIterator* mit = GetMapIterators(map);
-            for (SizeType i = 0; i < data_.o.size; i++) {
+            for(SizeType i = 0; i < data_.o.size; i++)
+            {
                 map->erase(DropMapIterator(mit[i]));
                 members[i].~Member();
             }
@@ -2249,13 +2844,17 @@ private:
         }
     }
 
-    void DoFreeMembers() {
-        if (Member* members = GetMembersPointer()) {
+    void DoFreeMembers()
+    {
+        if(Member* members = GetMembersPointer())
+        {
             GetMap(members)->~Map();
-            for (SizeType i = 0; i < data_.o.size; i++) {
+            for(SizeType i = 0; i < data_.o.size; i++)
+            {
                 members[i].~Member();
             }
-            if (Allocator::kNeedFree) { // Shortcut by Allocator's trait
+            if(Allocator::kNeedFree)
+            { // Shortcut by Allocator's trait
                 Map** map = &GetMap(members);
                 Allocator::Free(*map);
                 Allocator::Free(map);
@@ -2265,133 +2864,153 @@ private:
 
 #else // !RAPIDJSON_USE_MEMBERSMAP
 
-    RAPIDJSON_FORCEINLINE Member* DoAllocMembers(SizeType capacity, Allocator& allocator) {
+    RAPIDJSON_FORCEINLINE Member* DoAllocMembers(SizeType capacity, Allocator& allocator)
+    {
         return Malloc<Member>(allocator, capacity);
     }
 
-    void DoReserveMembers(SizeType newCapacity, Allocator& allocator) {
+    void DoReserveMembers(SizeType newCapacity, Allocator& allocator)
+    {
         ObjectData& o = data_.o;
-        if (newCapacity > o.capacity) {
-            Member* newMembers = Realloc<Member>(allocator, GetMembersPointer(), o.capacity, newCapacity);
+        if(newCapacity > o.capacity)
+        {
+            Member* newMembers =
+                Realloc<Member>(allocator, GetMembersPointer(), o.capacity, newCapacity);
             RAPIDJSON_SETPOINTER(Member, o.members, newMembers);
             o.capacity = newCapacity;
         }
     }
 
     template <typename SourceAllocator>
-    MemberIterator DoFindMember(const GenericValue<Encoding, SourceAllocator>& name) {
+    MemberIterator DoFindMember(const GenericValue<Encoding, SourceAllocator>& name)
+    {
         MemberIterator member = MemberBegin();
-        for ( ; member != MemberEnd(); ++member)
-            if (name.StringEqual(member->name))
+        for(; member != MemberEnd(); ++member)
+            if(name.StringEqual(member->name))
                 break;
         return member;
     }
 
-    void DoClearMembers() {
-        for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m)
+    void DoClearMembers()
+    {
+        for(MemberIterator m = MemberBegin(); m != MemberEnd(); ++m)
             m->~Member();
         data_.o.size = 0;
     }
 
-    void DoFreeMembers() {
-        for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m)
+    void DoFreeMembers()
+    {
+        for(MemberIterator m = MemberBegin(); m != MemberEnd(); ++m)
             m->~Member();
         Allocator::Free(GetMembersPointer());
     }
 
 #endif // !RAPIDJSON_USE_MEMBERSMAP
 
-    void DoAddMember(GenericValue& name, GenericValue& value, Allocator& allocator) {
+    void DoAddMember(GenericValue& name, GenericValue& value, Allocator& allocator)
+    {
         ObjectData& o = data_.o;
-        if (o.size >= o.capacity)
-            DoReserveMembers(o.capacity ? (o.capacity + (o.capacity + 1) / 2) : kDefaultObjectCapacity, allocator);
+        if(o.size >= o.capacity)
+            DoReserveMembers(o.capacity ? (o.capacity + (o.capacity + 1) / 2)
+                                        : kDefaultObjectCapacity,
+                             allocator);
         Member* members = GetMembersPointer();
-        Member* m = members + o.size;
+        Member* m       = members + o.size;
         m->name.RawAssign(name);
         m->value.RawAssign(value);
 #if RAPIDJSON_USE_MEMBERSMAP
-        Map* &map = GetMap(members);
+        Map*& map        = GetMap(members);
         MapIterator* mit = GetMapIterators(map);
-        new (&mit[o.size]) MapIterator(map->insert(MapPair(m->name.data_, o.size)));
+        new(&mit[o.size]) MapIterator(map->insert(MapPair(m->name.data_, o.size)));
 #endif
         ++o.size;
     }
 
-    MemberIterator DoRemoveMember(MemberIterator m) {
-        ObjectData& o = data_.o;
+    MemberIterator DoRemoveMember(MemberIterator m)
+    {
+        ObjectData& o   = data_.o;
         Member* members = GetMembersPointer();
 #if RAPIDJSON_USE_MEMBERSMAP
-        Map* &map = GetMap(members);
+        Map*& map        = GetMap(members);
         MapIterator* mit = GetMapIterators(map);
-        SizeType mpos = static_cast<SizeType>(&*m - members);
+        SizeType mpos    = static_cast<SizeType>(&*m - members);
         map->erase(DropMapIterator(mit[mpos]));
 #endif
         MemberIterator last(members + (o.size - 1));
-        if (o.size > 1 && m != last) {
+        if(o.size > 1 && m != last)
+        {
 #if RAPIDJSON_USE_MEMBERSMAP
-            new (&mit[mpos]) MapIterator(DropMapIterator(mit[&*last - members]));
+            new(&mit[mpos]) MapIterator(DropMapIterator(mit[&*last - members]));
             mit[mpos]->second = mpos;
 #endif
             *m = *last; // Move the last one to this place
         }
-        else {
+        else
+        {
             m->~Member(); // Only one left, just destroy
         }
         --o.size;
         return m;
     }
 
-    MemberIterator DoEraseMembers(ConstMemberIterator first, ConstMemberIterator last) {
-        ObjectData& o = data_.o;
-        MemberIterator beg = MemberBegin(),
-                       pos = beg + (first - beg),
-                       end = MemberEnd();
+    MemberIterator DoEraseMembers(ConstMemberIterator first, ConstMemberIterator last)
+    {
+        ObjectData& o      = data_.o;
+        MemberIterator beg = MemberBegin(), pos = beg + (first - beg), end = MemberEnd();
 #if RAPIDJSON_USE_MEMBERSMAP
-        Map* &map = GetMap(GetMembersPointer());
+        Map*& map        = GetMap(GetMembersPointer());
         MapIterator* mit = GetMapIterators(map);
 #endif
-        for (MemberIterator itr = pos; itr != last; ++itr) {
+        for(MemberIterator itr = pos; itr != last; ++itr)
+        {
 #if RAPIDJSON_USE_MEMBERSMAP
             map->erase(DropMapIterator(mit[itr - beg]));
 #endif
             itr->~Member();
         }
 #if RAPIDJSON_USE_MEMBERSMAP
-        if (first != last) {
+        if(first != last)
+        {
             // Move remaining members/iterators
             MemberIterator next = pos + (last - first);
-            for (MemberIterator itr = pos; next != end; ++itr, ++next) {
+            for(MemberIterator itr = pos; next != end; ++itr, ++next)
+            {
                 std::memcpy(static_cast<void*>(&*itr), &*next, sizeof(Member));
                 SizeType mpos = static_cast<SizeType>(itr - beg);
-                new (&mit[mpos]) MapIterator(DropMapIterator(mit[next - beg]));
+                new(&mit[mpos]) MapIterator(DropMapIterator(mit[next - beg]));
                 mit[mpos]->second = mpos;
             }
         }
 #else
-        std::memmove(static_cast<void*>(&*pos), &*last,
-                     static_cast<size_t>(end - last) * sizeof(Member));
+        std::memmove(
+            static_cast<void*>(&*pos), &*last, static_cast<size_t>(end - last) * sizeof(Member));
 #endif
         o.size -= static_cast<SizeType>(last - first);
         return pos;
     }
 
     template <typename SourceAllocator>
-    void DoCopyMembers(const GenericValue<Encoding,SourceAllocator>& rhs, Allocator& allocator, bool copyConstStrings) {
+    void DoCopyMembers(const GenericValue<Encoding, SourceAllocator>& rhs,
+                       Allocator& allocator,
+                       bool copyConstStrings)
+    {
         RAPIDJSON_ASSERT(rhs.GetType() == kObjectType);
 
-        data_.f.flags = kObjectFlag;
+        data_.f.flags  = kObjectFlag;
         SizeType count = rhs.data_.o.size;
-        Member* lm = DoAllocMembers(count, allocator);
-        const typename GenericValue<Encoding,SourceAllocator>::Member* rm = rhs.GetMembersPointer();
+        Member* lm     = DoAllocMembers(count, allocator);
+        const typename GenericValue<Encoding, SourceAllocator>::Member* rm =
+            rhs.GetMembersPointer();
 #if RAPIDJSON_USE_MEMBERSMAP
-        Map* &map = GetMap(lm);
+        Map*& map        = GetMap(lm);
         MapIterator* mit = GetMapIterators(map);
 #endif
-        for (SizeType i = 0; i < count; i++) {
-            new (&lm[i].name) GenericValue(rm[i].name, allocator, copyConstStrings);
-            new (&lm[i].value) GenericValue(rm[i].value, allocator, copyConstStrings);
+        for(SizeType i = 0; i < count; i++)
+        {
+            new(&lm[i].name) GenericValue(rm[i].name, allocator, copyConstStrings);
+            new(&lm[i].value) GenericValue(rm[i].value, allocator, copyConstStrings);
 #if RAPIDJSON_USE_MEMBERSMAP
-            new (&mit[i]) MapIterator(map->insert(MapPair(lm[i].name.data_, i)));
+            new(&mit[i]) MapIterator(map->insert(MapPair(lm[i].name.data_, i)));
 #endif
         }
         data_.o.size = data_.o.capacity = count;
@@ -2399,10 +3018,13 @@ private:
     }
 
     // Initialize this value as array with initial data, without calling destructor.
-    void SetArrayRaw(GenericValue* values, SizeType count, Allocator& allocator) {
+    void SetArrayRaw(GenericValue* values, SizeType count, Allocator& allocator)
+    {
         data_.f.flags = kArrayFlag;
-        if (count) {
-            GenericValue* e = static_cast<GenericValue*>(allocator.Malloc(count * sizeof(GenericValue)));
+        if(count)
+        {
+            GenericValue* e =
+                static_cast<GenericValue*>(allocator.Malloc(count * sizeof(GenericValue)));
             SetElementsPointer(e);
             std::memcpy(static_cast<void*>(e), values, count * sizeof(GenericValue));
         }
@@ -2412,17 +3034,20 @@ private:
     }
 
     //! Initialize this value as object with initial data, without calling destructor.
-    void SetObjectRaw(Member* members, SizeType count, Allocator& allocator) {
+    void SetObjectRaw(Member* members, SizeType count, Allocator& allocator)
+    {
         data_.f.flags = kObjectFlag;
-        if (count) {
+        if(count)
+        {
             Member* m = DoAllocMembers(count, allocator);
             SetMembersPointer(m);
             std::memcpy(static_cast<void*>(m), members, count * sizeof(Member));
 #if RAPIDJSON_USE_MEMBERSMAP
-            Map* &map = GetMap(m);
+            Map*& map        = GetMap(m);
             MapIterator* mit = GetMapIterators(map);
-            for (SizeType i = 0; i < count; i++) {
-                new (&mit[i]) MapIterator(map->insert(MapPair(m[i].name.data_, i)));
+            for(SizeType i = 0; i < count; i++)
+            {
+                new(&mit[i]) MapIterator(map->insert(MapPair(m[i].name.data_, i)));
             }
 #endif
         }
@@ -2432,24 +3057,29 @@ private:
     }
 
     //! Initialize this value as constant string, without calling destructor.
-    void SetStringRaw(StringRefType s) RAPIDJSON_NOEXCEPT {
+    void SetStringRaw(StringRefType s) RAPIDJSON_NOEXCEPT
+    {
         data_.f.flags = kConstStringFlag;
         SetStringPointer(s);
         data_.s.length = s.length;
     }
 
     //! Initialize this value as copy string with initial data, without calling destructor.
-    void SetStringRaw(StringRefType s, Allocator& allocator) {
+    void SetStringRaw(StringRefType s, Allocator& allocator)
+    {
         Ch* str = 0;
-        if (ShortString::Usable(s.length)) {
+        if(ShortString::Usable(s.length))
+        {
             data_.f.flags = kShortStringFlag;
             data_.ss.SetLength(s.length);
             str = data_.ss.str;
             std::memmove(str, s, s.length * sizeof(Ch));
-        } else {
-            data_.f.flags = kCopyStringFlag;
+        }
+        else
+        {
+            data_.f.flags  = kCopyStringFlag;
             data_.s.length = s.length;
-            str = static_cast<Ch *>(allocator.Malloc((s.length + 1) * sizeof(Ch)));
+            str            = static_cast<Ch*>(allocator.Malloc((s.length + 1) * sizeof(Ch)));
             SetStringPointer(str);
             std::memcpy(str, s, s.length * sizeof(Ch));
         }
@@ -2457,24 +3087,32 @@ private:
     }
 
     //! Assignment without calling destructor
-    void RawAssign(GenericValue& rhs) RAPIDJSON_NOEXCEPT {
+    void RawAssign(GenericValue& rhs) RAPIDJSON_NOEXCEPT
+    {
         data_ = rhs.data_;
         // data_.f.flags = rhs.data_.f.flags;
         rhs.data_.f.flags = kNullFlag;
     }
 
     template <typename SourceAllocator>
-    bool StringEqual(const GenericValue<Encoding, SourceAllocator>& rhs) const {
+    bool StringEqual(const GenericValue<Encoding, SourceAllocator>& rhs) const
+    {
         RAPIDJSON_ASSERT(IsString());
         RAPIDJSON_ASSERT(rhs.IsString());
 
         const SizeType len1 = GetStringLength();
         const SizeType len2 = rhs.GetStringLength();
-        if(len1 != len2) { return false; }
+        if(len1 != len2)
+        {
+            return false;
+        }
 
         const Ch* const str1 = GetString();
         const Ch* const str2 = rhs.GetString();
-        if(str1 == str2) { return true; } // fast path for constant string
+        if(str1 == str2)
+        {
+            return true;
+        } // fast path for constant string
 
         return (std::memcmp(str1, str2, sizeof(Ch) * len1) == 0);
     }
@@ -2483,10 +3121,10 @@ private:
 };
 
 //! GenericValue with UTF8 encoding
-typedef GenericValue<UTF8<> > Value;
+typedef GenericValue<UTF8<>> Value;
 
 ///////////////////////////////////////////////////////////////////////////////
-// GenericDocument 
+// GenericDocument
 
 //! A document for parsing JSON text as DOM.
 /*!
@@ -2494,15 +3132,20 @@ typedef GenericValue<UTF8<> > Value;
     \tparam Encoding Encoding for both parsing and string storage.
     \tparam Allocator Allocator for allocating memory for the DOM
     \tparam StackAllocator Allocator for allocating memory for stack during parsing.
-    \warning Although GenericDocument inherits from GenericValue, the API does \b not provide any virtual functions, especially no virtual destructor.  To avoid memory leaks, do not \c delete a GenericDocument object via a pointer to a GenericValue.
+    \warning Although GenericDocument inherits from GenericValue, the API does \b not provide any
+   virtual functions, especially no virtual destructor.  To avoid memory leaks, do not \c delete a
+   GenericDocument object via a pointer to a GenericValue.
 */
-template <typename Encoding, typename Allocator = RAPIDJSON_DEFAULT_ALLOCATOR, typename StackAllocator = RAPIDJSON_DEFAULT_STACK_ALLOCATOR >
-class GenericDocument : public GenericValue<Encoding, Allocator> {
-public:
-    typedef typename Encoding::Ch Ch;                       //!< Character type derived from Encoding.
-    typedef GenericValue<Encoding, Allocator> ValueType;    //!< Value type of the document.
-    typedef Allocator AllocatorType;                        //!< Allocator type from template parameter.
-    typedef StackAllocator StackAllocatorType;              //!< StackAllocator type from template parameter.
+template <typename Encoding,
+          typename Allocator      = RAPIDJSON_DEFAULT_ALLOCATOR,
+          typename StackAllocator = RAPIDJSON_DEFAULT_STACK_ALLOCATOR>
+class GenericDocument : public GenericValue<Encoding, Allocator>
+{
+    public:
+    typedef typename Encoding::Ch Ch;                    //!< Character type derived from Encoding.
+    typedef GenericValue<Encoding, Allocator> ValueType; //!< Value type of the document.
+    typedef Allocator AllocatorType;           //!< Allocator type from template parameter.
+    typedef StackAllocator StackAllocatorType; //!< StackAllocator type from template parameter.
 
     //! Constructor
     /*! Creates an empty document of specified type.
@@ -2511,47 +3154,62 @@ public:
         \param stackCapacity    Optional initial capacity of stack in bytes.
         \param stackAllocator   Optional allocator for allocating memory for stack.
     */
-    explicit GenericDocument(Type type, Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) :
-        GenericValue<Encoding, Allocator>(type),  allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_()
+    explicit GenericDocument(Type type,
+                             Allocator* allocator           = 0,
+                             size_t stackCapacity           = kDefaultStackCapacity,
+                             StackAllocator* stackAllocator = 0)
+        : GenericValue<Encoding, Allocator>(type),
+          allocator_(allocator),
+          ownAllocator_(0),
+          stack_(stackAllocator, stackCapacity),
+          parseResult_()
     {
-        if (!allocator_)
+        if(!allocator_)
             ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
     }
 
     //! Constructor
-    /*! Creates an empty document which type is Null. 
+    /*! Creates an empty document which type is Null.
         \param allocator        Optional allocator for allocating memory.
         \param stackCapacity    Optional initial capacity of stack in bytes.
         \param stackAllocator   Optional allocator for allocating memory for stack.
     */
-    GenericDocument(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) : 
-        allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_()
+    GenericDocument(Allocator* allocator           = 0,
+                    size_t stackCapacity           = kDefaultStackCapacity,
+                    StackAllocator* stackAllocator = 0)
+        : allocator_(allocator),
+          ownAllocator_(0),
+          stack_(stackAllocator, stackCapacity),
+          parseResult_()
     {
-        if (!allocator_)
+        if(!allocator_)
             ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
     }
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
     //! Move constructor in C++11
     GenericDocument(GenericDocument&& rhs) RAPIDJSON_NOEXCEPT
-        : ValueType(std::forward<ValueType>(rhs)), // explicit cast to avoid prohibited move from Document
+        : ValueType(
+              std::forward<ValueType>(rhs)), // explicit cast to avoid prohibited move from Document
           allocator_(rhs.allocator_),
           ownAllocator_(rhs.ownAllocator_),
           stack_(std::move(rhs.stack_)),
           parseResult_(rhs.parseResult_)
     {
-        rhs.allocator_ = 0;
+        rhs.allocator_    = 0;
         rhs.ownAllocator_ = 0;
-        rhs.parseResult_ = ParseResult();
+        rhs.parseResult_  = ParseResult();
     }
 #endif
 
-    ~GenericDocument() {
+    ~GenericDocument()
+    {
         // Clear the ::ValueType before ownAllocator is destroyed, ~ValueType()
         // runs last and may access its elements or members which would be freed
         // with an allocator like MemoryPoolAllocator (CrtAllocator does not
         // free its data when destroyed, but MemoryPoolAllocator does).
-        if (ownAllocator_) {
+        if(ownAllocator_)
+        {
             ValueType::SetNull();
         }
         Destroy();
@@ -2568,14 +3226,14 @@ public:
         // Calling the destructor here would prematurely call stack_'s destructor
         Destroy();
 
-        allocator_ = rhs.allocator_;
+        allocator_    = rhs.allocator_;
         ownAllocator_ = rhs.ownAllocator_;
-        stack_ = std::move(rhs.stack_);
-        parseResult_ = rhs.parseResult_;
+        stack_        = std::move(rhs.stack_);
+        parseResult_  = rhs.parseResult_;
 
-        rhs.allocator_ = 0;
+        rhs.allocator_    = 0;
         rhs.ownAllocator_ = 0;
-        rhs.parseResult_ = ParseResult();
+        rhs.parseResult_  = ParseResult();
 
         return *this;
     }
@@ -2587,7 +3245,8 @@ public:
         \note Constant complexity.
         \see GenericValue::Swap
     */
-    GenericDocument& Swap(GenericDocument& rhs) RAPIDJSON_NOEXCEPT {
+    GenericDocument& Swap(GenericDocument& rhs) RAPIDJSON_NOEXCEPT
+    {
         ValueType::Swap(rhs);
         stack_.Swap(rhs.stack_);
         internal::Swap(allocator_, rhs.allocator_);
@@ -2602,17 +3261,17 @@ public:
 
     //! free-standing swap function helper
     /*!
-        Helper function to enable support for common swap implementation pattern based on \c std::swap:
-        \code
-        void swap(MyClass& a, MyClass& b) {
-            using std::swap;
-            swap(a.doc, b.doc);
+        Helper function to enable support for common swap implementation pattern based on \c
+       std::swap: \code void swap(MyClass& a, MyClass& b) { using std::swap; swap(a.doc, b.doc);
             // ...
         }
         \endcode
         \see Swap()
      */
-    friend inline void swap(GenericDocument& a, GenericDocument& b) RAPIDJSON_NOEXCEPT { a.Swap(b); }
+    friend inline void swap(GenericDocument& a, GenericDocument& b) RAPIDJSON_NOEXCEPT
+    {
+        a.Swap(b);
+    }
 
     //! Populate this document by a generator which produces SAX events.
     /*! \tparam Generator A functor with <tt>bool f(Handler)</tt> prototype.
@@ -2620,11 +3279,15 @@ public:
         \return The document itself for fluent API.
     */
     template <typename Generator>
-    GenericDocument& Populate(Generator& g) {
+    GenericDocument& Populate(Generator& g)
+    {
         ClearStackOnExit scope(*this);
-        if (g(*this)) {
-            RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object
-            ValueType::operator=(*stack_.template Pop<ValueType>(1));// Move value from stack to document
+        if(g(*this))
+        {
+            RAPIDJSON_ASSERT(stack_.GetSize() ==
+                             sizeof(ValueType)); // Got one and only one root object
+            ValueType::operator=(
+                *stack_.template Pop<ValueType>(1)); // Move value from stack to document
         }
         return *this;
     }
@@ -2640,14 +3303,18 @@ public:
         \return The document itself for fluent API.
     */
     template <unsigned parseFlags, typename SourceEncoding, typename InputStream>
-    GenericDocument& ParseStream(InputStream& is) {
+    GenericDocument& ParseStream(InputStream& is)
+    {
         GenericReader<SourceEncoding, Encoding, StackAllocator> reader(
             stack_.HasAllocator() ? &stack_.GetAllocator() : 0);
         ClearStackOnExit scope(*this);
         parseResult_ = reader.template Parse<parseFlags>(is, *this);
-        if (parseResult_) {
-            RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object
-            ValueType::operator=(*stack_.template Pop<ValueType>(1));// Move value from stack to document
+        if(parseResult_)
+        {
+            RAPIDJSON_ASSERT(stack_.GetSize() ==
+                             sizeof(ValueType)); // Got one and only one root object
+            ValueType::operator=(
+                *stack_.template Pop<ValueType>(1)); // Move value from stack to document
         }
         return *this;
     }
@@ -2659,7 +3326,8 @@ public:
         \return The document itself for fluent API.
     */
     template <unsigned parseFlags, typename InputStream>
-    GenericDocument& ParseStream(InputStream& is) {
+    GenericDocument& ParseStream(InputStream& is)
+    {
         return ParseStream<parseFlags, Encoding, InputStream>(is);
     }
 
@@ -2669,7 +3337,8 @@ public:
         \return The document itself for fluent API.
     */
     template <typename InputStream>
-    GenericDocument& ParseStream(InputStream& is) {
+    GenericDocument& ParseStream(InputStream& is)
+    {
         return ParseStream<kParseDefaultFlags, Encoding, InputStream>(is);
     }
     //!@}
@@ -2683,7 +3352,8 @@ public:
         \return The document itself for fluent API.
     */
     template <unsigned parseFlags>
-    GenericDocument& ParseInsitu(Ch* str) {
+    GenericDocument& ParseInsitu(Ch* str)
+    {
         GenericInsituStringStream<Encoding> s(str);
         return ParseStream<parseFlags | kParseInsituFlag>(s);
     }
@@ -2692,9 +3362,7 @@ public:
     /*! \param str Mutable zero-terminated string to be parsed.
         \return The document itself for fluent API.
     */
-    GenericDocument& ParseInsitu(Ch* str) {
-        return ParseInsitu<kParseDefaultFlags>(str);
-    }
+    GenericDocument& ParseInsitu(Ch* str) { return ParseInsitu<kParseDefaultFlags>(str); }
     //!@}
 
     //!@name Parse from read-only string
@@ -2706,7 +3374,8 @@ public:
         \param str Read-only zero-terminated string to be parsed.
     */
     template <unsigned parseFlags, typename SourceEncoding>
-    GenericDocument& Parse(const typename SourceEncoding::Ch* str) {
+    GenericDocument& Parse(const typename SourceEncoding::Ch* str)
+    {
         RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag));
         GenericStringStream<SourceEncoding> s(str);
         return ParseStream<parseFlags, SourceEncoding>(s);
@@ -2717,51 +3386,58 @@ public:
         \param str Read-only zero-terminated string to be parsed.
     */
     template <unsigned parseFlags>
-    GenericDocument& Parse(const Ch* str) {
+    GenericDocument& Parse(const Ch* str)
+    {
         return Parse<parseFlags, Encoding>(str);
     }
 
     //! Parse JSON text from a read-only string (with \ref kParseDefaultFlags)
     /*! \param str Read-only zero-terminated string to be parsed.
-    */
-    GenericDocument& Parse(const Ch* str) {
-        return Parse<kParseDefaultFlags>(str);
-    }
+     */
+    GenericDocument& Parse(const Ch* str) { return Parse<kParseDefaultFlags>(str); }
 
     template <unsigned parseFlags, typename SourceEncoding>
-    GenericDocument& Parse(const typename SourceEncoding::Ch* str, size_t length) {
+    GenericDocument& Parse(const typename SourceEncoding::Ch* str, size_t length)
+    {
         RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag));
-        MemoryStream ms(reinterpret_cast<const char*>(str), length * sizeof(typename SourceEncoding::Ch));
+        MemoryStream ms(reinterpret_cast<const char*>(str),
+                        length * sizeof(typename SourceEncoding::Ch));
         EncodedInputStream<SourceEncoding, MemoryStream> is(ms);
         ParseStream<parseFlags, SourceEncoding>(is);
         return *this;
     }
 
     template <unsigned parseFlags>
-    GenericDocument& Parse(const Ch* str, size_t length) {
+    GenericDocument& Parse(const Ch* str, size_t length)
+    {
         return Parse<parseFlags, Encoding>(str, length);
     }
-    
-    GenericDocument& Parse(const Ch* str, size_t length) {
+
+    GenericDocument& Parse(const Ch* str, size_t length)
+    {
         return Parse<kParseDefaultFlags>(str, length);
     }
 
 #if RAPIDJSON_HAS_STDSTRING
     template <unsigned parseFlags, typename SourceEncoding>
-    GenericDocument& Parse(const std::basic_string<typename SourceEncoding::Ch>& str) {
-        // c_str() is constant complexity according to standard. Should be faster than Parse(const char*, size_t)
+    GenericDocument& Parse(const std::basic_string<typename SourceEncoding::Ch>& str)
+    {
+        // c_str() is constant complexity according to standard. Should be faster than Parse(const
+        // char*, size_t)
         return Parse<parseFlags, SourceEncoding>(str.c_str());
     }
 
     template <unsigned parseFlags>
-    GenericDocument& Parse(const std::basic_string<Ch>& str) {
+    GenericDocument& Parse(const std::basic_string<Ch>& str)
+    {
         return Parse<parseFlags, Encoding>(str.c_str());
     }
 
-    GenericDocument& Parse(const std::basic_string<Ch>& str) {
+    GenericDocument& Parse(const std::basic_string<Ch>& str)
+    {
         return Parse<kParseDefaultFlags>(str);
     }
-#endif // RAPIDJSON_HAS_STDSTRING    
+#endif // RAPIDJSON_HAS_STDSTRING
 
     //!@}
 
@@ -2793,7 +3469,8 @@ public:
     //!@}
 
     //! Get the allocator of this document.
-    Allocator& GetAllocator() {
+    Allocator& GetAllocator()
+    {
         RAPIDJSON_ASSERT(allocator_);
         return *allocator_;
     }
@@ -2801,12 +3478,14 @@ public:
     //! Get the capacity of stack in bytes.
     size_t GetStackCapacity() const { return stack_.GetCapacity(); }
 
-private:
+    private:
     // clear stack on any exit from ParseStream, e.g. due to exception
-    struct ClearStackOnExit {
+    struct ClearStackOnExit
+    {
         explicit ClearStackOnExit(GenericDocument& d) : d_(d) {}
         ~ClearStackOnExit() { d_.ClearStack(); }
-    private:
+
+        private:
         ClearStackOnExit(const ClearStackOnExit&);
         ClearStackOnExit& operator=(const ClearStackOnExit&);
         GenericDocument& d_;
@@ -2814,70 +3493,112 @@ private:
 
     // callers of the following private Handler functions
     // template <typename,typename,typename> friend class GenericReader; // for parsing
-    template <typename, typename> friend class GenericValue; // for deep copying
+    template <typename, typename>
+    friend class GenericValue; // for deep copying
 
-public:
+    public:
     // Implementation of Handler
-    bool Null() { new (stack_.template Push<ValueType>()) ValueType(); return true; }
-    bool Bool(bool b) { new (stack_.template Push<ValueType>()) ValueType(b); return true; }
-    bool Int(int i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; }
-    bool Uint(unsigned i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; }
-    bool Int64(int64_t i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; }
-    bool Uint64(uint64_t i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; }
-    bool Double(double d) { new (stack_.template Push<ValueType>()) ValueType(d); return true; }
-
-    bool RawNumber(const Ch* str, SizeType length, bool copy) { 
-        if (copy) 
-            new (stack_.template Push<ValueType>()) ValueType(str, length, GetAllocator());
-        else
-            new (stack_.template Push<ValueType>()) ValueType(str, length);
+    bool Null()
+    {
+        new(stack_.template Push<ValueType>()) ValueType();
+        return true;
+    }
+    bool Bool(bool b)
+    {
+        new(stack_.template Push<ValueType>()) ValueType(b);
+        return true;
+    }
+    bool Int(int i)
+    {
+        new(stack_.template Push<ValueType>()) ValueType(i);
+        return true;
+    }
+    bool Uint(unsigned i)
+    {
+        new(stack_.template Push<ValueType>()) ValueType(i);
+        return true;
+    }
+    bool Int64(int64_t i)
+    {
+        new(stack_.template Push<ValueType>()) ValueType(i);
+        return true;
+    }
+    bool Uint64(uint64_t i)
+    {
+        new(stack_.template Push<ValueType>()) ValueType(i);
+        return true;
+    }
+    bool Double(double d)
+    {
+        new(stack_.template Push<ValueType>()) ValueType(d);
         return true;
     }
 
-    bool String(const Ch* str, SizeType length, bool copy) { 
-        if (copy) 
-            new (stack_.template Push<ValueType>()) ValueType(str, length, GetAllocator());
+    bool RawNumber(const Ch* str, SizeType length, bool copy)
+    {
+        if(copy)
+            new(stack_.template Push<ValueType>()) ValueType(str, length, GetAllocator());
         else
-            new (stack_.template Push<ValueType>()) ValueType(str, length);
+            new(stack_.template Push<ValueType>()) ValueType(str, length);
+        return true;
+    }
+
+    bool String(const Ch* str, SizeType length, bool copy)
+    {
+        if(copy)
+            new(stack_.template Push<ValueType>()) ValueType(str, length, GetAllocator());
+        else
+            new(stack_.template Push<ValueType>()) ValueType(str, length);
+        return true;
+    }
+
+    bool StartObject()
+    {
+        new(stack_.template Push<ValueType>()) ValueType(kObjectType);
         return true;
     }
 
-    bool StartObject() { new (stack_.template Push<ValueType>()) ValueType(kObjectType); return true; }
-    
     bool Key(const Ch* str, SizeType length, bool copy) { return String(str, length, copy); }
 
-    bool EndObject(SizeType memberCount) {
-        typename ValueType::Member* members = stack_.template Pop<typename ValueType::Member>(memberCount);
+    bool EndObject(SizeType memberCount)
+    {
+        typename ValueType::Member* members =
+            stack_.template Pop<typename ValueType::Member>(memberCount);
         stack_.template Top<ValueType>()->SetObjectRaw(members, memberCount, GetAllocator());
         return true;
     }
 
-    bool StartArray() { new (stack_.template Push<ValueType>()) ValueType(kArrayType); return true; }
-    
-    bool EndArray(SizeType elementCount) {
+    bool StartArray()
+    {
+        new(stack_.template Push<ValueType>()) ValueType(kArrayType);
+        return true;
+    }
+
+    bool EndArray(SizeType elementCount)
+    {
         ValueType* elements = stack_.template Pop<ValueType>(elementCount);
         stack_.template Top<ValueType>()->SetArrayRaw(elements, elementCount, GetAllocator());
         return true;
     }
 
-private:
+    private:
     //! Prohibit copying
     GenericDocument(const GenericDocument&);
     //! Prohibit assignment
     GenericDocument& operator=(const GenericDocument&);
 
-    void ClearStack() {
-        if (Allocator::kNeedFree)
-            while (stack_.GetSize() > 0)    // Here assumes all elements in stack array are GenericValue (Member is actually 2 GenericValue objects)
+    void ClearStack()
+    {
+        if(Allocator::kNeedFree)
+            while(stack_.GetSize() > 0) // Here assumes all elements in stack array are GenericValue
+                                        // (Member is actually 2 GenericValue objects)
                 (stack_.template Pop<ValueType>(1))->~ValueType();
         else
             stack_.Clear();
         stack_.ShrinkToFit();
     }
 
-    void Destroy() {
-        RAPIDJSON_DELETE(ownAllocator_);
-    }
+    void Destroy() { RAPIDJSON_DELETE(ownAllocator_); }
 
     static const size_t kDefaultStackCapacity = 1024;
     Allocator* allocator_;
@@ -2887,22 +3608,23 @@ private:
 };
 
 //! GenericDocument with UTF8 encoding
-typedef GenericDocument<UTF8<> > Document;
-
+typedef GenericDocument<UTF8<>> Document;
 
 //! Helper class for accessing Value of array type.
 /*!
     Instance of this helper class is obtained by \c GenericValue::GetArray().
-    In addition to all APIs for array type, it provides range-based for loop if \c RAPIDJSON_HAS_CXX11_RANGE_FOR=1.
+    In addition to all APIs for array type, it provides range-based for loop if \c
+   RAPIDJSON_HAS_CXX11_RANGE_FOR=1.
 */
 template <bool Const, typename ValueT>
-class GenericArray {
-public:
+class GenericArray
+{
+    public:
     typedef GenericArray<true, ValueT> ConstArray;
     typedef GenericArray<false, ValueT> Array;
     typedef ValueT PlainType;
-    typedef typename internal::MaybeAddConst<Const,PlainType>::Type ValueType;
-    typedef ValueType* ValueIterator;  // This may be const or non-const iterator
+    typedef typename internal::MaybeAddConst<Const, PlainType>::Type ValueType;
+    typedef ValueType* ValueIterator; // This may be const or non-const iterator
     typedef const ValueT* ConstValueIterator;
     typedef typename ValueType::AllocatorType AllocatorType;
     typedef typename ValueType::StringRefType StringRefType;
@@ -2911,7 +3633,11 @@ public:
     friend class GenericValue;
 
     GenericArray(const GenericArray& rhs) : value_(rhs.value_) {}
-    GenericArray& operator=(const GenericArray& rhs) { value_ = rhs.value_; return *this; }
+    GenericArray& operator=(const GenericArray& rhs)
+    {
+        value_ = rhs.value_;
+        return *this;
+    }
     ~GenericArray() {}
 
     operator ValueType&() const { return value_; }
@@ -2919,26 +3645,57 @@ public:
     SizeType Capacity() const { return value_.Capacity(); }
     bool Empty() const { return value_.Empty(); }
     void Clear() const { value_.Clear(); }
-    ValueType& operator[](SizeType index) const {  return value_[index]; }
+    ValueType& operator[](SizeType index) const { return value_[index]; }
     ValueIterator Begin() const { return value_.Begin(); }
     ValueIterator End() const { return value_.End(); }
-    GenericArray Reserve(SizeType newCapacity, AllocatorType &allocator) const { value_.Reserve(newCapacity, allocator); return *this; }
-    GenericArray PushBack(ValueType& value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
+    GenericArray Reserve(SizeType newCapacity, AllocatorType& allocator) const
+    {
+        value_.Reserve(newCapacity, allocator);
+        return *this;
+    }
+    GenericArray PushBack(ValueType& value, AllocatorType& allocator) const
+    {
+        value_.PushBack(value, allocator);
+        return *this;
+    }
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
-    GenericArray PushBack(ValueType&& value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
+    GenericArray PushBack(ValueType&& value, AllocatorType& allocator) const
+    {
+        value_.PushBack(value, allocator);
+        return *this;
+    }
 #endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
-    GenericArray PushBack(StringRefType value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
-    template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (const GenericArray&)) PushBack(T value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
-    GenericArray PopBack() const { value_.PopBack(); return *this; }
+    GenericArray PushBack(StringRefType value, AllocatorType& allocator) const
+    {
+        value_.PushBack(value, allocator);
+        return *this;
+    }
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN(
+        (internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T>>),
+        (const GenericArray&))
+    PushBack(T value, AllocatorType& allocator) const
+    {
+        value_.PushBack(value, allocator);
+        return *this;
+    }
+    GenericArray PopBack() const
+    {
+        value_.PopBack();
+        return *this;
+    }
     ValueIterator Erase(ConstValueIterator pos) const { return value_.Erase(pos); }
-    ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) const { return value_.Erase(first, last); }
+    ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) const
+    {
+        return value_.Erase(first, last);
+    }
 
 #if RAPIDJSON_HAS_CXX11_RANGE_FOR
     ValueIterator begin() const { return value_.Begin(); }
     ValueIterator end() const { return value_.End(); }
 #endif
 
-private:
+    private:
     GenericArray();
     GenericArray(ValueType& value) : value_(value) {}
     ValueType& value_;
@@ -2947,17 +3704,25 @@ private:
 //! Helper class for accessing Value of object type.
 /*!
     Instance of this helper class is obtained by \c GenericValue::GetObject().
-    In addition to all APIs for array type, it provides range-based for loop if \c RAPIDJSON_HAS_CXX11_RANGE_FOR=1.
+    In addition to all APIs for array type, it provides range-based for loop if \c
+   RAPIDJSON_HAS_CXX11_RANGE_FOR=1.
 */
 template <bool Const, typename ValueT>
-class GenericObject {
-public:
+class GenericObject
+{
+    public:
     typedef GenericObject<true, ValueT> ConstObject;
     typedef GenericObject<false, ValueT> Object;
     typedef ValueT PlainType;
-    typedef typename internal::MaybeAddConst<Const,PlainType>::Type ValueType;
-    typedef GenericMemberIterator<Const, typename ValueT::EncodingType, typename ValueT::AllocatorType> MemberIterator;  // This may be const or non-const iterator
-    typedef GenericMemberIterator<true, typename ValueT::EncodingType, typename ValueT::AllocatorType> ConstMemberIterator;
+    typedef typename internal::MaybeAddConst<Const, PlainType>::Type ValueType;
+    typedef GenericMemberIterator<Const,
+                                  typename ValueT::EncodingType,
+                                  typename ValueT::AllocatorType>
+        MemberIterator; // This may be const or non-const iterator
+    typedef GenericMemberIterator<true,
+                                  typename ValueT::EncodingType,
+                                  typename ValueT::AllocatorType>
+        ConstMemberIterator;
     typedef typename ValueType::AllocatorType AllocatorType;
     typedef typename ValueType::StringRefType StringRefType;
     typedef typename ValueType::EncodingType EncodingType;
@@ -2967,67 +3732,159 @@ public:
     friend class GenericValue;
 
     GenericObject(const GenericObject& rhs) : value_(rhs.value_) {}
-    GenericObject& operator=(const GenericObject& rhs) { value_ = rhs.value_; return *this; }
+    GenericObject& operator=(const GenericObject& rhs)
+    {
+        value_ = rhs.value_;
+        return *this;
+    }
     ~GenericObject() {}
 
     operator ValueType&() const { return value_; }
     SizeType MemberCount() const { return value_.MemberCount(); }
     SizeType MemberCapacity() const { return value_.MemberCapacity(); }
     bool ObjectEmpty() const { return value_.ObjectEmpty(); }
-    template <typename T> ValueType& operator[](T* name) const { return value_[name]; }
-    template <typename SourceAllocator> ValueType& operator[](const GenericValue<EncodingType, SourceAllocator>& name) const { return value_[name]; }
+    template <typename T>
+    ValueType& operator[](T* name) const
+    {
+        return value_[name];
+    }
+    template <typename SourceAllocator>
+    ValueType& operator[](const GenericValue<EncodingType, SourceAllocator>& name) const
+    {
+        return value_[name];
+    }
 #if RAPIDJSON_HAS_STDSTRING
     ValueType& operator[](const std::basic_string<Ch>& name) const { return value_[name]; }
 #endif
     MemberIterator MemberBegin() const { return value_.MemberBegin(); }
     MemberIterator MemberEnd() const { return value_.MemberEnd(); }
-    GenericObject MemberReserve(SizeType newCapacity, AllocatorType &allocator) const { value_.MemberReserve(newCapacity, allocator); return *this; }
+    GenericObject MemberReserve(SizeType newCapacity, AllocatorType& allocator) const
+    {
+        value_.MemberReserve(newCapacity, allocator);
+        return *this;
+    }
     bool HasMember(const Ch* name) const { return value_.HasMember(name); }
 #if RAPIDJSON_HAS_STDSTRING
     bool HasMember(const std::basic_string<Ch>& name) const { return value_.HasMember(name); }
 #endif
-    template <typename SourceAllocator> bool HasMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.HasMember(name); }
+    template <typename SourceAllocator>
+    bool HasMember(const GenericValue<EncodingType, SourceAllocator>& name) const
+    {
+        return value_.HasMember(name);
+    }
     MemberIterator FindMember(const Ch* name) const { return value_.FindMember(name); }
-    template <typename SourceAllocator> MemberIterator FindMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.FindMember(name); }
+    template <typename SourceAllocator>
+    MemberIterator FindMember(const GenericValue<EncodingType, SourceAllocator>& name) const
+    {
+        return value_.FindMember(name);
+    }
 #if RAPIDJSON_HAS_STDSTRING
-    MemberIterator FindMember(const std::basic_string<Ch>& name) const { return value_.FindMember(name); }
+    MemberIterator FindMember(const std::basic_string<Ch>& name) const
+    {
+        return value_.FindMember(name);
+    }
 #endif
-    GenericObject AddMember(ValueType& name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
-    GenericObject AddMember(ValueType& name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    GenericObject AddMember(ValueType& name, ValueType& value, AllocatorType& allocator) const
+    {
+        value_.AddMember(name, value, allocator);
+        return *this;
+    }
+    GenericObject AddMember(ValueType& name, StringRefType value, AllocatorType& allocator) const
+    {
+        value_.AddMember(name, value, allocator);
+        return *this;
+    }
 #if RAPIDJSON_HAS_STDSTRING
-    GenericObject AddMember(ValueType& name, std::basic_string<Ch>& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    GenericObject
+    AddMember(ValueType& name, std::basic_string<Ch>& value, AllocatorType& allocator) const
+    {
+        value_.AddMember(name, value, allocator);
+        return *this;
+    }
 #endif
-    template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&)) AddMember(ValueType& name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN(
+        (internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T>>), (ValueType&))
+    AddMember(ValueType& name, T value, AllocatorType& allocator) const
+    {
+        value_.AddMember(name, value, allocator);
+        return *this;
+    }
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
-    GenericObject AddMember(ValueType&& name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
-    GenericObject AddMember(ValueType&& name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
-    GenericObject AddMember(ValueType& name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
-    GenericObject AddMember(StringRefType name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    GenericObject AddMember(ValueType&& name, ValueType&& value, AllocatorType& allocator) const
+    {
+        value_.AddMember(name, value, allocator);
+        return *this;
+    }
+    GenericObject AddMember(ValueType&& name, ValueType& value, AllocatorType& allocator) const
+    {
+        value_.AddMember(name, value, allocator);
+        return *this;
+    }
+    GenericObject AddMember(ValueType& name, ValueType&& value, AllocatorType& allocator) const
+    {
+        value_.AddMember(name, value, allocator);
+        return *this;
+    }
+    GenericObject AddMember(StringRefType name, ValueType&& value, AllocatorType& allocator) const
+    {
+        value_.AddMember(name, value, allocator);
+        return *this;
+    }
 #endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
-    GenericObject AddMember(StringRefType name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
-    GenericObject AddMember(StringRefType name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
-    template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericObject)) AddMember(StringRefType name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    GenericObject AddMember(StringRefType name, ValueType& value, AllocatorType& allocator) const
+    {
+        value_.AddMember(name, value, allocator);
+        return *this;
+    }
+    GenericObject AddMember(StringRefType name, StringRefType value, AllocatorType& allocator) const
+    {
+        value_.AddMember(name, value, allocator);
+        return *this;
+    }
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN(
+        (internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T>>), (GenericObject))
+    AddMember(StringRefType name, T value, AllocatorType& allocator) const
+    {
+        value_.AddMember(name, value, allocator);
+        return *this;
+    }
     void RemoveAllMembers() { value_.RemoveAllMembers(); }
     bool RemoveMember(const Ch* name) const { return value_.RemoveMember(name); }
 #if RAPIDJSON_HAS_STDSTRING
     bool RemoveMember(const std::basic_string<Ch>& name) const { return value_.RemoveMember(name); }
 #endif
-    template <typename SourceAllocator> bool RemoveMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.RemoveMember(name); }
+    template <typename SourceAllocator>
+    bool RemoveMember(const GenericValue<EncodingType, SourceAllocator>& name) const
+    {
+        return value_.RemoveMember(name);
+    }
     MemberIterator RemoveMember(MemberIterator m) const { return value_.RemoveMember(m); }
     MemberIterator EraseMember(ConstMemberIterator pos) const { return value_.EraseMember(pos); }
-    MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) const { return value_.EraseMember(first, last); }
+    MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) const
+    {
+        return value_.EraseMember(first, last);
+    }
     bool EraseMember(const Ch* name) const { return value_.EraseMember(name); }
 #if RAPIDJSON_HAS_STDSTRING
-    bool EraseMember(const std::basic_string<Ch>& name) const { return EraseMember(ValueType(StringRef(name))); }
+    bool EraseMember(const std::basic_string<Ch>& name) const
+    {
+        return EraseMember(ValueType(StringRef(name)));
+    }
 #endif
-    template <typename SourceAllocator> bool EraseMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.EraseMember(name); }
+    template <typename SourceAllocator>
+    bool EraseMember(const GenericValue<EncodingType, SourceAllocator>& name) const
+    {
+        return value_.EraseMember(name);
+    }
 
 #if RAPIDJSON_HAS_CXX11_RANGE_FOR
     MemberIterator begin() const { return value_.MemberBegin(); }
     MemberIterator end() const { return value_.MemberEnd(); }
 #endif
 
-private:
+    private:
     GenericObject();
     GenericObject(ValueType& value) : value_(value) {}
     ValueType& value_;
diff --git a/include/rapidjson/encodedstream.h b/include/rapidjson/encodedstream.h
index cf046b8923..4b96e79b7b 100644
--- a/include/rapidjson/encodedstream.h
+++ b/include/rapidjson/encodedstream.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_ENCODEDSTREAM_H_
@@ -32,30 +32,43 @@ RAPIDJSON_NAMESPACE_BEGIN
 
 //! Input byte stream wrapper with a statically bound encoding.
 /*!
-    \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
-    \tparam InputByteStream Type of input byte stream. For example, FileReadStream.
+    \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE,
+   UTF32LE, UTF32BE. \tparam InputByteStream Type of input byte stream. For example, FileReadStream.
 */
 template <typename Encoding, typename InputByteStream>
-class EncodedInputStream {
+class EncodedInputStream
+{
     RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
-public:
+
+    public:
     typedef typename Encoding::Ch Ch;
 
-    EncodedInputStream(InputByteStream& is) : is_(is) { 
-        current_ = Encoding::TakeBOM(is_);
-    }
+    EncodedInputStream(InputByteStream& is) : is_(is) { current_ = Encoding::TakeBOM(is_); }
 
     Ch Peek() const { return current_; }
-    Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; }
+    Ch Take()
+    {
+        Ch c     = current_;
+        current_ = Encoding::Take(is_);
+        return c;
+    }
     size_t Tell() const { return is_.Tell(); }
 
     // Not implemented
     void Put(Ch) { RAPIDJSON_ASSERT(false); }
-    void Flush() { RAPIDJSON_ASSERT(false); } 
-    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+    void Flush() { RAPIDJSON_ASSERT(false); }
+    Ch* PutBegin()
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    size_t PutEnd(Ch*)
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
 
-private:
+    private:
     EncodedInputStream(const EncodedInputStream&);
     EncodedInputStream& operator=(const EncodedInputStream&);
 
@@ -65,14 +78,19 @@ private:
 
 //! Specialized for UTF8 MemoryStream.
 template <>
-class EncodedInputStream<UTF8<>, MemoryStream> {
-public:
+class EncodedInputStream<UTF8<>, MemoryStream>
+{
+    public:
     typedef UTF8<>::Ch Ch;
 
-    EncodedInputStream(MemoryStream& is) : is_(is) {
-        if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take();
-        if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take();
-        if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take();
+    EncodedInputStream(MemoryStream& is) : is_(is)
+    {
+        if(static_cast<unsigned char>(is_.Peek()) == 0xEFu)
+            is_.Take();
+        if(static_cast<unsigned char>(is_.Peek()) == 0xBBu)
+            is_.Take();
+        if(static_cast<unsigned char>(is_.Peek()) == 0xBFu)
+            is_.Take();
     }
     Ch Peek() const { return is_.Peek(); }
     Ch Take() { return is_.Take(); }
@@ -80,51 +98,76 @@ public:
 
     // Not implemented
     void Put(Ch) {}
-    void Flush() {} 
+    void Flush() {}
     Ch* PutBegin() { return 0; }
     size_t PutEnd(Ch*) { return 0; }
 
     MemoryStream& is_;
 
-private:
+    private:
     EncodedInputStream(const EncodedInputStream&);
     EncodedInputStream& operator=(const EncodedInputStream&);
 };
 
 //! Output byte stream wrapper with statically bound encoding.
 /*!
-    \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
-    \tparam OutputByteStream Type of input byte stream. For example, FileWriteStream.
+    \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE,
+   UTF32LE, UTF32BE. \tparam OutputByteStream Type of input byte stream. For example,
+   FileWriteStream.
 */
 template <typename Encoding, typename OutputByteStream>
-class EncodedOutputStream {
+class EncodedOutputStream
+{
     RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
-public:
+
+    public:
     typedef typename Encoding::Ch Ch;
 
-    EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) { 
-        if (putBOM)
+    EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os)
+    {
+        if(putBOM)
             Encoding::PutBOM(os_);
     }
 
-    void Put(Ch c) { Encoding::Put(os_, c);  }
+    void Put(Ch c) { Encoding::Put(os_, c); }
     void Flush() { os_.Flush(); }
 
     // Not implemented
-    Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
-    Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
-    size_t Tell() const { RAPIDJSON_ASSERT(false);  return 0; }
-    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+    Ch Peek() const
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    Ch Take()
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    size_t Tell() const
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    Ch* PutBegin()
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    size_t PutEnd(Ch*)
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
 
-private:
+    private:
     EncodedOutputStream(const EncodedOutputStream&);
     EncodedOutputStream& operator=(const EncodedOutputStream&);
 
     OutputByteStream& os_;
 };
 
-#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
+#define RAPIDJSON_ENCODINGS_FUNC(x) \
+    UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
 
 //! Input stream wrapper with dynamically bound encoding and automatic encoding detection.
 /*!
@@ -132,9 +175,11 @@ private:
     \tparam InputByteStream type of input byte stream to be wrapped.
 */
 template <typename CharType, typename InputByteStream>
-class AutoUTFInputStream {
+class AutoUTFInputStream
+{
     RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
-public:
+
+    public:
     typedef CharType Ch;
 
     //! Constructor.
@@ -142,33 +187,49 @@ public:
         \param is input stream to be wrapped.
         \param type UTF encoding type if it is not detected from the stream.
     */
-    AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) {
-        RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);        
+    AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8)
+        : is_(&is), type_(type), hasBOM_(false)
+    {
+        RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
         DetectType();
-        static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
-        takeFunc_ = f[type_];
-        current_ = takeFunc_(*is_);
+        static const TakeFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Take)};
+        takeFunc_                 = f[type_];
+        current_                  = takeFunc_(*is_);
     }
 
     UTFType GetType() const { return type_; }
     bool HasBOM() const { return hasBOM_; }
 
     Ch Peek() const { return current_; }
-    Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; }
+    Ch Take()
+    {
+        Ch c     = current_;
+        current_ = takeFunc_(*is_);
+        return c;
+    }
     size_t Tell() const { return is_->Tell(); }
 
     // Not implemented
     void Put(Ch) { RAPIDJSON_ASSERT(false); }
-    void Flush() { RAPIDJSON_ASSERT(false); } 
-    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+    void Flush() { RAPIDJSON_ASSERT(false); }
+    Ch* PutBegin()
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    size_t PutEnd(Ch*)
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
 
-private:
+    private:
     AutoUTFInputStream(const AutoUTFInputStream&);
     AutoUTFInputStream& operator=(const AutoUTFInputStream&);
 
     // Detect encoding type with BOM or RFC 4627
-    void DetectType() {
+    void DetectType()
+    {
         // BOM (Byte Order Mark):
         // 00 00 FE FF  UTF-32BE
         // FF FE 00 00  UTF-32LE
@@ -176,17 +237,52 @@ private:
         // FF FE        UTF-16LE
         // EF BB BF     UTF-8
 
-        const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4());
-        if (!c)
+        const unsigned char* c = reinterpret_cast<const unsigned char*>(is_->Peek4());
+        if(!c)
             return;
 
         unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24));
-        hasBOM_ = false;
-        if (bom == 0xFFFE0000)                  { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
-        else if (bom == 0x0000FEFF)             { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
-        else if ((bom & 0xFFFF) == 0xFFFE)      { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take();                           }
-        else if ((bom & 0xFFFF) == 0xFEFF)      { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take();                           }
-        else if ((bom & 0xFFFFFF) == 0xBFBBEF)  { type_ = kUTF8;    hasBOM_ = true; is_->Take(); is_->Take(); is_->Take();              }
+        hasBOM_      = false;
+        if(bom == 0xFFFE0000)
+        {
+            type_   = kUTF32BE;
+            hasBOM_ = true;
+            is_->Take();
+            is_->Take();
+            is_->Take();
+            is_->Take();
+        }
+        else if(bom == 0x0000FEFF)
+        {
+            type_   = kUTF32LE;
+            hasBOM_ = true;
+            is_->Take();
+            is_->Take();
+            is_->Take();
+            is_->Take();
+        }
+        else if((bom & 0xFFFF) == 0xFFFE)
+        {
+            type_   = kUTF16BE;
+            hasBOM_ = true;
+            is_->Take();
+            is_->Take();
+        }
+        else if((bom & 0xFFFF) == 0xFEFF)
+        {
+            type_   = kUTF16LE;
+            hasBOM_ = true;
+            is_->Take();
+            is_->Take();
+        }
+        else if((bom & 0xFFFFFF) == 0xBFBBEF)
+        {
+            type_   = kUTF8;
+            hasBOM_ = true;
+            is_->Take();
+            is_->Take();
+            is_->Take();
+        }
 
         // RFC 4627: Section 3
         // "Since the first two characters of a JSON text will always be ASCII
@@ -199,21 +295,26 @@ private:
         // xx 00 xx 00  UTF-16LE
         // xx xx xx xx  UTF-8
 
-        if (!hasBOM_) {
+        if(!hasBOM_)
+        {
             int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
-            switch (pattern) {
+            switch(pattern)
+            {
             case 0x08: type_ = kUTF32BE; break;
             case 0x0A: type_ = kUTF16BE; break;
             case 0x01: type_ = kUTF32LE; break;
             case 0x05: type_ = kUTF16LE; break;
-            case 0x0F: type_ = kUTF8;    break;
+            case 0x0F: type_ = kUTF8; break;
             default: break; // Use type defined by user.
             }
         }
 
-        // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
-        if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
-        if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
+        // Runtime check whether the size of character type is sufficient. It only perform checks
+        // with assertion.
+        if(type_ == kUTF16LE || type_ == kUTF16BE)
+            RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
+        if(type_ == kUTF32LE || type_ == kUTF32BE)
+            RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
     }
 
     typedef Ch (*TakeFunc)(InputByteStream& is);
@@ -230,9 +331,11 @@ private:
     \tparam OutputByteStream type of output byte stream to be wrapped.
 */
 template <typename CharType, typename OutputByteStream>
-class AutoUTFOutputStream {
+class AutoUTFOutputStream
+{
     RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
-public:
+
+    public:
     typedef CharType Ch;
 
     //! Constructor.
@@ -241,39 +344,64 @@ public:
         \param type UTF encoding type.
         \param putBOM Whether to write BOM at the beginning of the stream.
     */
-    AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) {
+    AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type)
+    {
         RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
 
-        // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
-        if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
-        if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
+        // Runtime check whether the size of character type is sufficient. It only perform checks
+        // with assertion.
+        if(type_ == kUTF16LE || type_ == kUTF16BE)
+            RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
+        if(type_ == kUTF32LE || type_ == kUTF32BE)
+            RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
 
-        static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) };
-        putFunc_ = f[type_];
+        static const PutFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Put)};
+        putFunc_                 = f[type_];
 
-        if (putBOM)
+        if(putBOM)
             PutBOM();
     }
 
     UTFType GetType() const { return type_; }
 
     void Put(Ch c) { putFunc_(*os_, c); }
-    void Flush() { os_->Flush(); } 
+    void Flush() { os_->Flush(); }
 
     // Not implemented
-    Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
-    Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
-    size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
-    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+    Ch Peek() const
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    Ch Take()
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    size_t Tell() const
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    Ch* PutBegin()
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    size_t PutEnd(Ch*)
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
 
-private:
+    private:
     AutoUTFOutputStream(const AutoUTFOutputStream&);
     AutoUTFOutputStream& operator=(const AutoUTFOutputStream&);
 
-    void PutBOM() { 
+    void PutBOM()
+    {
         typedef void (*PutBOMFunc)(OutputByteStream&);
-        static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) };
+        static const PutBOMFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(PutBOM)};
         f[type_](*os_);
     }
 
diff --git a/include/rapidjson/encodings.h b/include/rapidjson/encodings.h
index c453c0da31..0315d725fd 100644
--- a/include/rapidjson/encodings.h
+++ b/include/rapidjson/encodings.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_ENCODINGS_H_
@@ -20,7 +20,7 @@
 #if defined(_MSC_VER) && !defined(__clang__)
 RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
-RAPIDJSON_DIAG_OFF(4702)  // unreachable code
+RAPIDJSON_DIAG_OFF(4702) // unreachable code
 #elif defined(__GNUC__)
 RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(effc++)
@@ -37,7 +37,8 @@ RAPIDJSON_NAMESPACE_BEGIN
 
 \code
 concept Encoding {
-    typename Ch;    //! Type of character. A "character" is actually a code unit in unicode's definition.
+    typename Ch;    //! Type of character. A "character" is actually a code unit in unicode's
+definition.
 
     enum { supportUnicode = 1 }; // or 0 if not supporting unicode
 
@@ -92,26 +93,34 @@ concept Encoding {
     \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char.
     \note implements Encoding concept
 */
-template<typename CharType = char>
-struct UTF8 {
+template <typename CharType = char>
+struct UTF8
+{
     typedef CharType Ch;
 
-    enum { supportUnicode = 1 };
+    enum
+    {
+        supportUnicode = 1
+    };
 
-    template<typename OutputStream>
-    static void Encode(OutputStream& os, unsigned codepoint) {
-        if (codepoint <= 0x7F) 
+    template <typename OutputStream>
+    static void Encode(OutputStream& os, unsigned codepoint)
+    {
+        if(codepoint <= 0x7F)
             os.Put(static_cast<Ch>(codepoint & 0xFF));
-        else if (codepoint <= 0x7FF) {
+        else if(codepoint <= 0x7FF)
+        {
             os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
             os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
         }
-        else if (codepoint <= 0xFFFF) {
+        else if(codepoint <= 0xFFFF)
+        {
             os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
             os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
             os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
         }
-        else {
+        else
+        {
             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
             os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
             os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
@@ -120,20 +129,24 @@ struct UTF8 {
         }
     }
 
-    template<typename OutputStream>
-    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
-        if (codepoint <= 0x7F) 
+    template <typename OutputStream>
+    static void EncodeUnsafe(OutputStream& os, unsigned codepoint)
+    {
+        if(codepoint <= 0x7F)
             PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
-        else if (codepoint <= 0x7FF) {
+        else if(codepoint <= 0x7FF)
+        {
             PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
             PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
         }
-        else if (codepoint <= 0xFFFF) {
+        else if(codepoint <= 0xFFFF)
+        {
             PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
             PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
             PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
         }
-        else {
+        else
+        {
             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
             PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
             PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
@@ -143,31 +156,66 @@ struct UTF8 {
     }
 
     template <typename InputStream>
-    static bool Decode(InputStream& is, unsigned* codepoint) {
-#define RAPIDJSON_COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
+    static bool Decode(InputStream& is, unsigned* codepoint)
+    {
+#define RAPIDJSON_COPY()    \
+    c          = is.Take(); \
+    *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
 #define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
-#define RAPIDJSON_TAIL() RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x70)
+#define RAPIDJSON_TAIL() \
+    RAPIDJSON_COPY();    \
+    RAPIDJSON_TRANS(0x70)
         typename InputStream::Ch c = is.Take();
-        if (!(c & 0x80)) {
+        if(!(c & 0x80))
+        {
             *codepoint = static_cast<unsigned char>(c);
             return true;
         }
 
         unsigned char type = GetRange(static_cast<unsigned char>(c));
-        if (type >= 32) {
+        if(type >= 32)
+        {
             *codepoint = 0;
-        } else {
+        }
+        else
+        {
             *codepoint = (0xFFu >> type) & static_cast<unsigned char>(c);
         }
         bool result = true;
-        switch (type) {
+        switch(type)
+        {
         case 2: RAPIDJSON_TAIL(); return result;
-        case 3: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
-        case 4: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x50); RAPIDJSON_TAIL(); return result;
-        case 5: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x10); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
-        case 6: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
-        case 10: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x20); RAPIDJSON_TAIL(); return result;
-        case 11: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x60); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
+        case 3:
+            RAPIDJSON_TAIL();
+            RAPIDJSON_TAIL();
+            return result;
+        case 4:
+            RAPIDJSON_COPY();
+            RAPIDJSON_TRANS(0x50);
+            RAPIDJSON_TAIL();
+            return result;
+        case 5:
+            RAPIDJSON_COPY();
+            RAPIDJSON_TRANS(0x10);
+            RAPIDJSON_TAIL();
+            RAPIDJSON_TAIL();
+            return result;
+        case 6:
+            RAPIDJSON_TAIL();
+            RAPIDJSON_TAIL();
+            RAPIDJSON_TAIL();
+            return result;
+        case 10:
+            RAPIDJSON_COPY();
+            RAPIDJSON_TRANS(0x20);
+            RAPIDJSON_TAIL();
+            return result;
+        case 11:
+            RAPIDJSON_COPY();
+            RAPIDJSON_TRANS(0x60);
+            RAPIDJSON_TAIL();
+            RAPIDJSON_TAIL();
+            return result;
         default: return false;
         }
 #undef RAPIDJSON_COPY
@@ -176,24 +224,55 @@ struct UTF8 {
     }
 
     template <typename InputStream, typename OutputStream>
-    static bool Validate(InputStream& is, OutputStream& os) {
-#define RAPIDJSON_COPY() if (c != '\0') os.Put(c = is.Take())
+    static bool Validate(InputStream& is, OutputStream& os)
+    {
+#define RAPIDJSON_COPY() \
+    if(c != '\0')        \
+    os.Put(c = is.Take())
 #define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
-#define RAPIDJSON_TAIL() RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x70)
+#define RAPIDJSON_TAIL() \
+    RAPIDJSON_COPY();    \
+    RAPIDJSON_TRANS(0x70)
         Ch c = static_cast<Ch>(-1);
         RAPIDJSON_COPY();
-        if (!(c & 0x80))
+        if(!(c & 0x80))
             return true;
 
         bool result = true;
-        switch (GetRange(static_cast<unsigned char>(c))) {
+        switch(GetRange(static_cast<unsigned char>(c)))
+        {
         case 2: RAPIDJSON_TAIL(); return result;
-        case 3: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
-        case 4: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x50); RAPIDJSON_TAIL(); return result;
-        case 5: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x10); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
-        case 6: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
-        case 10: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x20); RAPIDJSON_TAIL(); return result;
-        case 11: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x60); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
+        case 3:
+            RAPIDJSON_TAIL();
+            RAPIDJSON_TAIL();
+            return result;
+        case 4:
+            RAPIDJSON_COPY();
+            RAPIDJSON_TRANS(0x50);
+            RAPIDJSON_TAIL();
+            return result;
+        case 5:
+            RAPIDJSON_COPY();
+            RAPIDJSON_TRANS(0x10);
+            RAPIDJSON_TAIL();
+            RAPIDJSON_TAIL();
+            return result;
+        case 6:
+            RAPIDJSON_TAIL();
+            RAPIDJSON_TAIL();
+            RAPIDJSON_TAIL();
+            return result;
+        case 10:
+            RAPIDJSON_COPY();
+            RAPIDJSON_TRANS(0x20);
+            RAPIDJSON_TAIL();
+            return result;
+        case 11:
+            RAPIDJSON_COPY();
+            RAPIDJSON_TRANS(0x60);
+            RAPIDJSON_TAIL();
+            RAPIDJSON_TAIL();
+            return result;
         default: return false;
         }
 #undef RAPIDJSON_COPY
@@ -201,45 +280,62 @@ struct UTF8 {
 #undef RAPIDJSON_TAIL
     }
 
-    static unsigned char GetRange(unsigned char c) {
+    static unsigned char GetRange(unsigned char c)
+    {
         // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
-        // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
+        // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test
+        // multiple types.
         static const unsigned char type[] = {
-            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-            0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
-            0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
-            0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
-            0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
-            8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-            10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+            0,    0,    0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+            0x10, 0x10, 0x10, 0x10, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+            0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+            0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+            0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 8,    8,    2,    2,
+            2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,
+            2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,
+            10,   3,    3,    3,    3,    3,    3,    3,    3,    3,    3,    3,    3,    4,
+            3,    3,    11,   6,    6,    6,    5,    8,    8,    8,    8,    8,    8,    8,
+            8,    8,    8,    8,
         };
         return type[c];
     }
 
     template <typename InputByteStream>
-    static CharType TakeBOM(InputByteStream& is) {
+    static CharType TakeBOM(InputByteStream& is)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         typename InputByteStream::Ch c = Take(is);
-        if (static_cast<unsigned char>(c) != 0xEFu) return c;
+        if(static_cast<unsigned char>(c) != 0xEFu)
+            return c;
         c = is.Take();
-        if (static_cast<unsigned char>(c) != 0xBBu) return c;
+        if(static_cast<unsigned char>(c) != 0xBBu)
+            return c;
         c = is.Take();
-        if (static_cast<unsigned char>(c) != 0xBFu) return c;
+        if(static_cast<unsigned char>(c) != 0xBFu)
+            return c;
         c = is.Take();
         return c;
     }
 
     template <typename InputByteStream>
-    static Ch Take(InputByteStream& is) {
+    static Ch Take(InputByteStream& is)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         return static_cast<Ch>(is.Take());
     }
 
     template <typename OutputByteStream>
-    static void PutBOM(OutputByteStream& os) {
+    static void PutBOM(OutputByteStream& os)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
         os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu));
         os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu));
@@ -247,7 +343,8 @@ struct UTF8 {
     }
 
     template <typename OutputByteStream>
-    static void Put(OutputByteStream& os, Ch c) {
+    static void Put(OutputByteStream& os, Ch c)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
         os.Put(static_cast<typename OutputByteStream::Ch>(c));
     }
@@ -259,27 +356,35 @@ struct UTF8 {
 //! UTF-16 encoding.
 /*! http://en.wikipedia.org/wiki/UTF-16
     http://tools.ietf.org/html/rfc2781
-    \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead.
-    \note implements Encoding concept
+    \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t
+   instead. \note implements Encoding concept
 
-    \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
-    For streaming, use UTF16LE and UTF16BE, which handle endianness.
+    \note For in-memory access, no need to concern endianness. The code units and code points are
+   represented by CPU's endianness. For streaming, use UTF16LE and UTF16BE, which handle endianness.
 */
-template<typename CharType = wchar_t>
-struct UTF16 {
+template <typename CharType = wchar_t>
+struct UTF16
+{
     typedef CharType Ch;
     RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2);
 
-    enum { supportUnicode = 1 };
+    enum
+    {
+        supportUnicode = 1
+    };
 
-    template<typename OutputStream>
-    static void Encode(OutputStream& os, unsigned codepoint) {
+    template <typename OutputStream>
+    static void Encode(OutputStream& os, unsigned codepoint)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
-        if (codepoint <= 0xFFFF) {
-            RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 
+        if(codepoint <= 0xFFFF)
+        {
+            RAPIDJSON_ASSERT(codepoint < 0xD800 ||
+                             codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
             os.Put(static_cast<typename OutputStream::Ch>(codepoint));
         }
-        else {
+        else
+        {
             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
             unsigned v = codepoint - 0x10000;
             os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
@@ -287,15 +392,18 @@ struct UTF16 {
         }
     }
 
-
-    template<typename OutputStream>
-    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+    template <typename OutputStream>
+    static void EncodeUnsafe(OutputStream& os, unsigned codepoint)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
-        if (codepoint <= 0xFFFF) {
-            RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 
+        if(codepoint <= 0xFFFF)
+        {
+            RAPIDJSON_ASSERT(codepoint < 0xD800 ||
+                             codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
             PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint));
         }
-        else {
+        else
+        {
             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
             unsigned v = codepoint - 0x10000;
             PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
@@ -304,16 +412,19 @@ struct UTF16 {
     }
 
     template <typename InputStream>
-    static bool Decode(InputStream& is, unsigned* codepoint) {
+    static bool Decode(InputStream& is, unsigned* codepoint)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
         typename InputStream::Ch c = is.Take();
-        if (c < 0xD800 || c > 0xDFFF) {
+        if(c < 0xD800 || c > 0xDFFF)
+        {
             *codepoint = static_cast<unsigned>(c);
             return true;
         }
-        else if (c <= 0xDBFF) {
+        else if(c <= 0xDBFF)
+        {
             *codepoint = (static_cast<unsigned>(c) & 0x3FF) << 10;
-            c = is.Take();
+            c          = is.Take();
             *codepoint |= (static_cast<unsigned>(c) & 0x3FF);
             *codepoint += 0x10000;
             return c >= 0xDC00 && c <= 0xDFFF;
@@ -322,14 +433,16 @@ struct UTF16 {
     }
 
     template <typename InputStream, typename OutputStream>
-    static bool Validate(InputStream& is, OutputStream& os) {
+    static bool Validate(InputStream& is, OutputStream& os)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
         typename InputStream::Ch c;
         os.Put(static_cast<typename OutputStream::Ch>(c = is.Take()));
-        if (c < 0xD800 || c > 0xDFFF)
+        if(c < 0xD800 || c > 0xDFFF)
             return true;
-        else if (c <= 0xDBFF) {
+        else if(c <= 0xDBFF)
+        {
             os.Put(c = is.Take());
             return c >= 0xDC00 && c <= 0xDFFF;
         }
@@ -338,17 +451,20 @@ struct UTF16 {
 };
 
 //! UTF-16 little endian encoding.
-template<typename CharType = wchar_t>
-struct UTF16LE : UTF16<CharType> {
+template <typename CharType = wchar_t>
+struct UTF16LE : UTF16<CharType>
+{
     template <typename InputByteStream>
-    static CharType TakeBOM(InputByteStream& is) {
+    static CharType TakeBOM(InputByteStream& is)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         CharType c = Take(is);
         return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
     }
 
     template <typename InputByteStream>
-    static CharType Take(InputByteStream& is) {
+    static CharType Take(InputByteStream& is)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         unsigned c = static_cast<uint8_t>(is.Take());
         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
@@ -356,14 +472,16 @@ struct UTF16LE : UTF16<CharType> {
     }
 
     template <typename OutputByteStream>
-    static void PutBOM(OutputByteStream& os) {
+    static void PutBOM(OutputByteStream& os)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
         os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
         os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
     }
 
     template <typename OutputByteStream>
-    static void Put(OutputByteStream& os, CharType c) {
+    static void Put(OutputByteStream& os, CharType c)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
         os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
         os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
@@ -371,17 +489,20 @@ struct UTF16LE : UTF16<CharType> {
 };
 
 //! UTF-16 big endian encoding.
-template<typename CharType = wchar_t>
-struct UTF16BE : UTF16<CharType> {
+template <typename CharType = wchar_t>
+struct UTF16BE : UTF16<CharType>
+{
     template <typename InputByteStream>
-    static CharType TakeBOM(InputByteStream& is) {
+    static CharType TakeBOM(InputByteStream& is)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         CharType c = Take(is);
         return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
     }
 
     template <typename InputByteStream>
-    static CharType Take(InputByteStream& is) {
+    static CharType Take(InputByteStream& is)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take()));
@@ -389,14 +510,16 @@ struct UTF16BE : UTF16<CharType> {
     }
 
     template <typename OutputByteStream>
-    static void PutBOM(OutputByteStream& os) {
+    static void PutBOM(OutputByteStream& os)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
         os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
         os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
     }
 
     template <typename OutputByteStream>
-    static void Put(OutputByteStream& os, CharType c) {
+    static void Put(OutputByteStream& os, CharType c)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
         os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
         os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
@@ -406,45 +529,53 @@ struct UTF16BE : UTF16<CharType> {
 ///////////////////////////////////////////////////////////////////////////////
 // UTF32
 
-//! UTF-32 encoding. 
+//! UTF-32 encoding.
 /*! http://en.wikipedia.org/wiki/UTF-32
-    \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead.
-    \note implements Encoding concept
+    \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use
+   char32_t instead. \note implements Encoding concept
 
-    \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
-    For streaming, use UTF32LE and UTF32BE, which handle endianness.
+    \note For in-memory access, no need to concern endianness. The code units and code points are
+   represented by CPU's endianness. For streaming, use UTF32LE and UTF32BE, which handle endianness.
 */
-template<typename CharType = unsigned>
-struct UTF32 {
+template <typename CharType = unsigned>
+struct UTF32
+{
     typedef CharType Ch;
     RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4);
 
-    enum { supportUnicode = 1 };
+    enum
+    {
+        supportUnicode = 1
+    };
 
-    template<typename OutputStream>
-    static void Encode(OutputStream& os, unsigned codepoint) {
+    template <typename OutputStream>
+    static void Encode(OutputStream& os, unsigned codepoint)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
         RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
         os.Put(codepoint);
     }
 
-    template<typename OutputStream>
-    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+    template <typename OutputStream>
+    static void EncodeUnsafe(OutputStream& os, unsigned codepoint)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
         RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
         PutUnsafe(os, codepoint);
     }
 
     template <typename InputStream>
-    static bool Decode(InputStream& is, unsigned* codepoint) {
+    static bool Decode(InputStream& is, unsigned* codepoint)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
-        Ch c = is.Take();
+        Ch c       = is.Take();
         *codepoint = c;
         return c <= 0x10FFFF;
     }
 
     template <typename InputStream, typename OutputStream>
-    static bool Validate(InputStream& is, OutputStream& os) {
+    static bool Validate(InputStream& is, OutputStream& os)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
         Ch c;
         os.Put(c = is.Take());
@@ -453,17 +584,20 @@ struct UTF32 {
 };
 
 //! UTF-32 little endian enocoding.
-template<typename CharType = unsigned>
-struct UTF32LE : UTF32<CharType> {
+template <typename CharType = unsigned>
+struct UTF32LE : UTF32<CharType>
+{
     template <typename InputByteStream>
-    static CharType TakeBOM(InputByteStream& is) {
+    static CharType TakeBOM(InputByteStream& is)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         CharType c = Take(is);
         return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c;
     }
 
     template <typename InputByteStream>
-    static CharType Take(InputByteStream& is) {
+    static CharType Take(InputByteStream& is)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         unsigned c = static_cast<uint8_t>(is.Take());
         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
@@ -473,7 +607,8 @@ struct UTF32LE : UTF32<CharType> {
     }
 
     template <typename OutputByteStream>
-    static void PutBOM(OutputByteStream& os) {
+    static void PutBOM(OutputByteStream& os)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
         os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
         os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
@@ -482,7 +617,8 @@ struct UTF32LE : UTF32<CharType> {
     }
 
     template <typename OutputByteStream>
-    static void Put(OutputByteStream& os, CharType c) {
+    static void Put(OutputByteStream& os, CharType c)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
         os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
@@ -492,17 +628,20 @@ struct UTF32LE : UTF32<CharType> {
 };
 
 //! UTF-32 big endian encoding.
-template<typename CharType = unsigned>
-struct UTF32BE : UTF32<CharType> {
+template <typename CharType = unsigned>
+struct UTF32BE : UTF32<CharType>
+{
     template <typename InputByteStream>
-    static CharType TakeBOM(InputByteStream& is) {
+    static CharType TakeBOM(InputByteStream& is)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         CharType c = Take(is);
-        return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; 
+        return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c;
     }
 
     template <typename InputByteStream>
-    static CharType Take(InputByteStream& is) {
+    static CharType Take(InputByteStream& is)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
@@ -512,7 +651,8 @@ struct UTF32BE : UTF32<CharType> {
     }
 
     template <typename OutputByteStream>
-    static void PutBOM(OutputByteStream& os) {
+    static void PutBOM(OutputByteStream& os)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
         os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
         os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
@@ -521,7 +661,8 @@ struct UTF32BE : UTF32<CharType> {
     }
 
     template <typename OutputByteStream>
-    static void Put(OutputByteStream& os, CharType c) {
+    static void Put(OutputByteStream& os, CharType c)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
@@ -538,59 +679,71 @@ struct UTF32BE : UTF32<CharType> {
     \tparam CharType Code unit for storing 7-bit ASCII data. Default is char.
     \note implements Encoding concept
 */
-template<typename CharType = char>
-struct ASCII {
+template <typename CharType = char>
+struct ASCII
+{
     typedef CharType Ch;
 
-    enum { supportUnicode = 0 };
+    enum
+    {
+        supportUnicode = 0
+    };
 
-    template<typename OutputStream>
-    static void Encode(OutputStream& os, unsigned codepoint) {
+    template <typename OutputStream>
+    static void Encode(OutputStream& os, unsigned codepoint)
+    {
         RAPIDJSON_ASSERT(codepoint <= 0x7F);
         os.Put(static_cast<Ch>(codepoint & 0xFF));
     }
 
-    template<typename OutputStream>
-    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+    template <typename OutputStream>
+    static void EncodeUnsafe(OutputStream& os, unsigned codepoint)
+    {
         RAPIDJSON_ASSERT(codepoint <= 0x7F);
         PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
     }
 
     template <typename InputStream>
-    static bool Decode(InputStream& is, unsigned* codepoint) {
-        uint8_t c = static_cast<uint8_t>(is.Take());
+    static bool Decode(InputStream& is, unsigned* codepoint)
+    {
+        uint8_t c  = static_cast<uint8_t>(is.Take());
         *codepoint = c;
         return c <= 0X7F;
     }
 
     template <typename InputStream, typename OutputStream>
-    static bool Validate(InputStream& is, OutputStream& os) {
+    static bool Validate(InputStream& is, OutputStream& os)
+    {
         uint8_t c = static_cast<uint8_t>(is.Take());
         os.Put(static_cast<typename OutputStream::Ch>(c));
         return c <= 0x7F;
     }
 
     template <typename InputByteStream>
-    static CharType TakeBOM(InputByteStream& is) {
+    static CharType TakeBOM(InputByteStream& is)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         uint8_t c = static_cast<uint8_t>(Take(is));
         return static_cast<Ch>(c);
     }
 
     template <typename InputByteStream>
-    static Ch Take(InputByteStream& is) {
+    static Ch Take(InputByteStream& is)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         return static_cast<Ch>(is.Take());
     }
 
     template <typename OutputByteStream>
-    static void PutBOM(OutputByteStream& os) {
+    static void PutBOM(OutputByteStream& os)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
         (void)os;
     }
 
     template <typename OutputByteStream>
-    static void Put(OutputByteStream& os, Ch c) {
+    static void Put(OutputByteStream& os, Ch c)
+    {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
         os.Put(static_cast<typename OutputByteStream::Ch>(c));
     }
@@ -600,50 +753,61 @@ struct ASCII {
 // AutoUTF
 
 //! Runtime-specified UTF encoding type of a stream.
-enum UTFType {
-    kUTF8 = 0,      //!< UTF-8.
-    kUTF16LE = 1,   //!< UTF-16 little endian.
-    kUTF16BE = 2,   //!< UTF-16 big endian.
-    kUTF32LE = 3,   //!< UTF-32 little endian.
-    kUTF32BE = 4    //!< UTF-32 big endian.
+enum UTFType
+{
+    kUTF8    = 0, //!< UTF-8.
+    kUTF16LE = 1, //!< UTF-16 little endian.
+    kUTF16BE = 2, //!< UTF-16 big endian.
+    kUTF32LE = 3, //!< UTF-32 little endian.
+    kUTF32BE = 4  //!< UTF-32 big endian.
 };
 
 //! Dynamically select encoding according to stream's runtime-specified UTF encoding type.
-/*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType().
-*/
-template<typename CharType>
-struct AutoUTF {
+/*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides
+ * GetType().
+ */
+template <typename CharType>
+struct AutoUTF
+{
     typedef CharType Ch;
 
-    enum { supportUnicode = 1 };
+    enum
+    {
+        supportUnicode = 1
+    };
 
-#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
+#define RAPIDJSON_ENCODINGS_FUNC(x) \
+    UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
 
-    template<typename OutputStream>
-    static RAPIDJSON_FORCEINLINE void Encode(OutputStream& os, unsigned codepoint) {
+    template <typename OutputStream>
+    static RAPIDJSON_FORCEINLINE void Encode(OutputStream& os, unsigned codepoint)
+    {
         typedef void (*EncodeFunc)(OutputStream&, unsigned);
-        static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) };
+        static const EncodeFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Encode)};
         (*f[os.GetType()])(os, codepoint);
     }
 
-    template<typename OutputStream>
-    static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+    template <typename OutputStream>
+    static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream& os, unsigned codepoint)
+    {
         typedef void (*EncodeFunc)(OutputStream&, unsigned);
-        static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) };
+        static const EncodeFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe)};
         (*f[os.GetType()])(os, codepoint);
     }
 
     template <typename InputStream>
-    static RAPIDJSON_FORCEINLINE bool Decode(InputStream& is, unsigned* codepoint) {
+    static RAPIDJSON_FORCEINLINE bool Decode(InputStream& is, unsigned* codepoint)
+    {
         typedef bool (*DecodeFunc)(InputStream&, unsigned*);
-        static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) };
+        static const DecodeFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Decode)};
         return (*f[is.GetType()])(is, codepoint);
     }
 
     template <typename InputStream, typename OutputStream>
-    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
+    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os)
+    {
         typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
-        static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) };
+        static const ValidateFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Validate)};
         return (*f[is.GetType()])(is, os);
     }
 
@@ -654,56 +818,67 @@ struct AutoUTF {
 // Transcoder
 
 //! Encoding conversion.
-template<typename SourceEncoding, typename TargetEncoding>
-struct Transcoder {
-    //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream.
-    template<typename InputStream, typename OutputStream>
-    static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) {
+template <typename SourceEncoding, typename TargetEncoding>
+struct Transcoder
+{
+    //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to
+    //! the output stream.
+    template <typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os)
+    {
         unsigned codepoint;
-        if (!SourceEncoding::Decode(is, &codepoint))
+        if(!SourceEncoding::Decode(is, &codepoint))
             return false;
         TargetEncoding::Encode(os, codepoint);
         return true;
     }
 
-    template<typename InputStream, typename OutputStream>
-    static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
+    template <typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os)
+    {
         unsigned codepoint;
-        if (!SourceEncoding::Decode(is, &codepoint))
+        if(!SourceEncoding::Decode(is, &codepoint))
             return false;
         TargetEncoding::EncodeUnsafe(os, codepoint);
         return true;
     }
 
     //! Validate one Unicode codepoint from an encoded stream.
-    template<typename InputStream, typename OutputStream>
-    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
-        return Transcode(is, os);   // Since source/target encoding is different, must transcode.
+    template <typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os)
+    {
+        return Transcode(is, os); // Since source/target encoding is different, must transcode.
     }
 };
 
 // Forward declaration.
-template<typename Stream>
+template <typename Stream>
 inline void PutUnsafe(Stream& stream, typename Stream::Ch c);
 
 //! Specialization of Transcoder with same source and target encoding.
-template<typename Encoding>
-struct Transcoder<Encoding, Encoding> {
-    template<typename InputStream, typename OutputStream>
-    static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) {
-        os.Put(is.Take());  // Just copy one code unit. This semantic is different from primary template class.
+template <typename Encoding>
+struct Transcoder<Encoding, Encoding>
+{
+    template <typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os)
+    {
+        os.Put(is.Take()); // Just copy one code unit. This semantic is different from primary
+                           // template class.
         return true;
     }
-    
-    template<typename InputStream, typename OutputStream>
-    static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
-        PutUnsafe(os, is.Take());  // Just copy one code unit. This semantic is different from primary template class.
+
+    template <typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os)
+    {
+        PutUnsafe(os, is.Take()); // Just copy one code unit. This semantic is different from
+                                  // primary template class.
         return true;
     }
-    
-    template<typename InputStream, typename OutputStream>
-    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
-        return Encoding::Validate(is, os);  // source/target encoding are the same
+
+    template <typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os)
+    {
+        return Encoding::Validate(is, os); // source/target encoding are the same
     }
 };
 
diff --git a/include/rapidjson/error/en.h b/include/rapidjson/error/en.h
index c87b04eb13..9ea11e40ec 100644
--- a/include/rapidjson/error/en.h
+++ b/include/rapidjson/error/en.h
@@ -19,8 +19,8 @@
 
 #ifdef __clang__
 RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(switch-enum)
-RAPIDJSON_DIAG_OFF(covered-switch-default)
+RAPIDJSON_DIAG_OFF(switch - enum)
+RAPIDJSON_DIAG_OFF(covered - switch - default)
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
@@ -33,35 +33,51 @@ RAPIDJSON_NAMESPACE_BEGIN
     \note User can make a copy of this function for localization.
         Using switch-case is safer for future modification of error codes.
 */
-inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErrorCode) {
-    switch (parseErrorCode) {
-        case kParseErrorNone:                           return RAPIDJSON_ERROR_STRING("No error.");
+inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErrorCode)
+{
+    switch(parseErrorCode)
+    {
+    case kParseErrorNone: return RAPIDJSON_ERROR_STRING("No error.");
 
-        case kParseErrorDocumentEmpty:                  return RAPIDJSON_ERROR_STRING("The document is empty.");
-        case kParseErrorDocumentRootNotSingular:        return RAPIDJSON_ERROR_STRING("The document root must not be followed by other values.");
+    case kParseErrorDocumentEmpty: return RAPIDJSON_ERROR_STRING("The document is empty.");
+    case kParseErrorDocumentRootNotSingular:
+        return RAPIDJSON_ERROR_STRING("The document root must not be followed by other values.");
 
-        case kParseErrorValueInvalid:                   return RAPIDJSON_ERROR_STRING("Invalid value.");
+    case kParseErrorValueInvalid: return RAPIDJSON_ERROR_STRING("Invalid value.");
 
-        case kParseErrorObjectMissName:                 return RAPIDJSON_ERROR_STRING("Missing a name for object member.");
-        case kParseErrorObjectMissColon:                return RAPIDJSON_ERROR_STRING("Missing a colon after a name of object member.");
-        case kParseErrorObjectMissCommaOrCurlyBracket:  return RAPIDJSON_ERROR_STRING("Missing a comma or '}' after an object member.");
+    case kParseErrorObjectMissName:
+        return RAPIDJSON_ERROR_STRING("Missing a name for object member.");
+    case kParseErrorObjectMissColon:
+        return RAPIDJSON_ERROR_STRING("Missing a colon after a name of object member.");
+    case kParseErrorObjectMissCommaOrCurlyBracket:
+        return RAPIDJSON_ERROR_STRING("Missing a comma or '}' after an object member.");
 
-        case kParseErrorArrayMissCommaOrSquareBracket:  return RAPIDJSON_ERROR_STRING("Missing a comma or ']' after an array element.");
+    case kParseErrorArrayMissCommaOrSquareBracket:
+        return RAPIDJSON_ERROR_STRING("Missing a comma or ']' after an array element.");
 
-        case kParseErrorStringUnicodeEscapeInvalidHex:  return RAPIDJSON_ERROR_STRING("Incorrect hex digit after \\u escape in string.");
-        case kParseErrorStringUnicodeSurrogateInvalid:  return RAPIDJSON_ERROR_STRING("The surrogate pair in string is invalid.");
-        case kParseErrorStringEscapeInvalid:            return RAPIDJSON_ERROR_STRING("Invalid escape character in string.");
-        case kParseErrorStringMissQuotationMark:        return RAPIDJSON_ERROR_STRING("Missing a closing quotation mark in string.");
-        case kParseErrorStringInvalidEncoding:          return RAPIDJSON_ERROR_STRING("Invalid encoding in string.");
+    case kParseErrorStringUnicodeEscapeInvalidHex:
+        return RAPIDJSON_ERROR_STRING("Incorrect hex digit after \\u escape in string.");
+    case kParseErrorStringUnicodeSurrogateInvalid:
+        return RAPIDJSON_ERROR_STRING("The surrogate pair in string is invalid.");
+    case kParseErrorStringEscapeInvalid:
+        return RAPIDJSON_ERROR_STRING("Invalid escape character in string.");
+    case kParseErrorStringMissQuotationMark:
+        return RAPIDJSON_ERROR_STRING("Missing a closing quotation mark in string.");
+    case kParseErrorStringInvalidEncoding:
+        return RAPIDJSON_ERROR_STRING("Invalid encoding in string.");
 
-        case kParseErrorNumberTooBig:                   return RAPIDJSON_ERROR_STRING("Number too big to be stored in double.");
-        case kParseErrorNumberMissFraction:             return RAPIDJSON_ERROR_STRING("Miss fraction part in number.");
-        case kParseErrorNumberMissExponent:             return RAPIDJSON_ERROR_STRING("Miss exponent in number.");
+    case kParseErrorNumberTooBig:
+        return RAPIDJSON_ERROR_STRING("Number too big to be stored in double.");
+    case kParseErrorNumberMissFraction:
+        return RAPIDJSON_ERROR_STRING("Miss fraction part in number.");
+    case kParseErrorNumberMissExponent: return RAPIDJSON_ERROR_STRING("Miss exponent in number.");
 
-        case kParseErrorTermination:                    return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error.");
-        case kParseErrorUnspecificSyntaxError:          return RAPIDJSON_ERROR_STRING("Unspecific syntax error.");
+    case kParseErrorTermination:
+        return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error.");
+    case kParseErrorUnspecificSyntaxError:
+        return RAPIDJSON_ERROR_STRING("Unspecific syntax error.");
 
-        default:                                        return RAPIDJSON_ERROR_STRING("Unknown error.");
+    default: return RAPIDJSON_ERROR_STRING("Unknown error.");
     }
 }
 
@@ -73,46 +89,102 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro
     \note User can make a copy of this function for localization.
         Using switch-case is safer for future modification of error codes.
 */
-inline const RAPIDJSON_ERROR_CHARTYPE* GetValidateError_En(ValidateErrorCode validateErrorCode) {
-    switch (validateErrorCode) {
-        case kValidateErrors:                           return RAPIDJSON_ERROR_STRING("One or more validation errors have occurred");
-        case kValidateErrorNone:                        return RAPIDJSON_ERROR_STRING("No error.");
+inline const RAPIDJSON_ERROR_CHARTYPE* GetValidateError_En(ValidateErrorCode validateErrorCode)
+{
+    switch(validateErrorCode)
+    {
+    case kValidateErrors:
+        return RAPIDJSON_ERROR_STRING("One or more validation errors have occurred");
+    case kValidateErrorNone: return RAPIDJSON_ERROR_STRING("No error.");
 
-        case kValidateErrorMultipleOf:                  return RAPIDJSON_ERROR_STRING("Number '%actual' is not a multiple of the 'multipleOf' value '%expected'.");
-        case kValidateErrorMaximum:                     return RAPIDJSON_ERROR_STRING("Number '%actual' is greater than the 'maximum' value '%expected'.");
-        case kValidateErrorExclusiveMaximum:            return RAPIDJSON_ERROR_STRING("Number '%actual' is greater than or equal to the 'exclusiveMaximum' value '%expected'.");
-        case kValidateErrorMinimum:                     return RAPIDJSON_ERROR_STRING("Number '%actual' is less than the 'minimum' value '%expected'.");
-        case kValidateErrorExclusiveMinimum:            return RAPIDJSON_ERROR_STRING("Number '%actual' is less than or equal to the 'exclusiveMinimum' value '%expected'.");
+    case kValidateErrorMultipleOf:
+        return RAPIDJSON_ERROR_STRING(
+            "Number '%actual' is not a multiple of the 'multipleOf' value '%expected'.");
+    case kValidateErrorMaximum:
+        return RAPIDJSON_ERROR_STRING(
+            "Number '%actual' is greater than the 'maximum' value '%expected'.");
+    case kValidateErrorExclusiveMaximum:
+        return RAPIDJSON_ERROR_STRING("Number '%actual' is greater than or equal to the "
+                                      "'exclusiveMaximum' value '%expected'.");
+    case kValidateErrorMinimum:
+        return RAPIDJSON_ERROR_STRING(
+            "Number '%actual' is less than the 'minimum' value '%expected'.");
+    case kValidateErrorExclusiveMinimum:
+        return RAPIDJSON_ERROR_STRING(
+            "Number '%actual' is less than or equal to the 'exclusiveMinimum' value '%expected'.");
 
-        case kValidateErrorMaxLength:                   return RAPIDJSON_ERROR_STRING("String '%actual' is longer than the 'maxLength' value '%expected'.");
-        case kValidateErrorMinLength:                   return RAPIDJSON_ERROR_STRING("String '%actual' is shorter than the 'minLength' value '%expected'.");
-        case kValidateErrorPattern:                     return RAPIDJSON_ERROR_STRING("String '%actual' does not match the 'pattern' regular expression.");
+    case kValidateErrorMaxLength:
+        return RAPIDJSON_ERROR_STRING(
+            "String '%actual' is longer than the 'maxLength' value '%expected'.");
+    case kValidateErrorMinLength:
+        return RAPIDJSON_ERROR_STRING(
+            "String '%actual' is shorter than the 'minLength' value '%expected'.");
+    case kValidateErrorPattern:
+        return RAPIDJSON_ERROR_STRING(
+            "String '%actual' does not match the 'pattern' regular expression.");
 
-        case kValidateErrorMaxItems:                    return RAPIDJSON_ERROR_STRING("Array of length '%actual' is longer than the 'maxItems' value '%expected'.");
-        case kValidateErrorMinItems:                    return RAPIDJSON_ERROR_STRING("Array of length '%actual' is shorter than the 'minItems' value '%expected'.");
-        case kValidateErrorUniqueItems:                 return RAPIDJSON_ERROR_STRING("Array has duplicate items at indices '%duplicates' but 'uniqueItems' is true.");
-        case kValidateErrorAdditionalItems:             return RAPIDJSON_ERROR_STRING("Array has an additional item at index '%disallowed' that is not allowed by the schema.");
+    case kValidateErrorMaxItems:
+        return RAPIDJSON_ERROR_STRING(
+            "Array of length '%actual' is longer than the 'maxItems' value '%expected'.");
+    case kValidateErrorMinItems:
+        return RAPIDJSON_ERROR_STRING(
+            "Array of length '%actual' is shorter than the 'minItems' value '%expected'.");
+    case kValidateErrorUniqueItems:
+        return RAPIDJSON_ERROR_STRING(
+            "Array has duplicate items at indices '%duplicates' but 'uniqueItems' is true.");
+    case kValidateErrorAdditionalItems:
+        return RAPIDJSON_ERROR_STRING("Array has an additional item at index '%disallowed' that is "
+                                      "not allowed by the schema.");
 
-        case kValidateErrorMaxProperties:               return RAPIDJSON_ERROR_STRING("Object has '%actual' members which is more than 'maxProperties' value '%expected'.");
-        case kValidateErrorMinProperties:               return RAPIDJSON_ERROR_STRING("Object has '%actual' members which is less than 'minProperties' value '%expected'.");
-        case kValidateErrorRequired:                    return RAPIDJSON_ERROR_STRING("Object is missing the following members required by the schema: '%missing'.");
-        case kValidateErrorAdditionalProperties:        return RAPIDJSON_ERROR_STRING("Object has an additional member '%disallowed' that is not allowed by the schema.");
-        case kValidateErrorPatternProperties:           return RAPIDJSON_ERROR_STRING("Object has 'patternProperties' that are not allowed by the schema.");
-        case kValidateErrorDependencies:                return RAPIDJSON_ERROR_STRING("Object has missing property or schema dependencies, refer to following errors.");
+    case kValidateErrorMaxProperties:
+        return RAPIDJSON_ERROR_STRING(
+            "Object has '%actual' members which is more than 'maxProperties' value '%expected'.");
+    case kValidateErrorMinProperties:
+        return RAPIDJSON_ERROR_STRING(
+            "Object has '%actual' members which is less than 'minProperties' value '%expected'.");
+    case kValidateErrorRequired:
+        return RAPIDJSON_ERROR_STRING(
+            "Object is missing the following members required by the schema: '%missing'.");
+    case kValidateErrorAdditionalProperties:
+        return RAPIDJSON_ERROR_STRING(
+            "Object has an additional member '%disallowed' that is not allowed by the schema.");
+    case kValidateErrorPatternProperties:
+        return RAPIDJSON_ERROR_STRING(
+            "Object has 'patternProperties' that are not allowed by the schema.");
+    case kValidateErrorDependencies:
+        return RAPIDJSON_ERROR_STRING(
+            "Object has missing property or schema dependencies, refer to following errors.");
 
-        case kValidateErrorEnum:                        return RAPIDJSON_ERROR_STRING("Property has a value that is not one of its allowed enumerated values.");
-        case kValidateErrorType:                        return RAPIDJSON_ERROR_STRING("Property has a type '%actual' that is not in the following list: '%expected'.");
+    case kValidateErrorEnum:
+        return RAPIDJSON_ERROR_STRING(
+            "Property has a value that is not one of its allowed enumerated values.");
+    case kValidateErrorType:
+        return RAPIDJSON_ERROR_STRING(
+            "Property has a type '%actual' that is not in the following list: '%expected'.");
 
-        case kValidateErrorOneOf:                       return RAPIDJSON_ERROR_STRING("Property did not match any of the sub-schemas specified by 'oneOf', refer to following errors.");
-        case kValidateErrorOneOfMatch:                  return RAPIDJSON_ERROR_STRING("Property matched more than one of the sub-schemas specified by 'oneOf', indices '%matches'.");
-        case kValidateErrorAllOf:                       return RAPIDJSON_ERROR_STRING("Property did not match all of the sub-schemas specified by 'allOf', refer to following errors.");
-        case kValidateErrorAnyOf:                       return RAPIDJSON_ERROR_STRING("Property did not match any of the sub-schemas specified by 'anyOf', refer to following errors.");
-        case kValidateErrorNot:                         return RAPIDJSON_ERROR_STRING("Property matched the sub-schema specified by 'not'.");
+    case kValidateErrorOneOf:
+        return RAPIDJSON_ERROR_STRING("Property did not match any of the sub-schemas specified by "
+                                      "'oneOf', refer to following errors.");
+    case kValidateErrorOneOfMatch:
+        return RAPIDJSON_ERROR_STRING("Property matched more than one of the sub-schemas specified "
+                                      "by 'oneOf', indices '%matches'.");
+    case kValidateErrorAllOf:
+        return RAPIDJSON_ERROR_STRING("Property did not match all of the sub-schemas specified by "
+                                      "'allOf', refer to following errors.");
+    case kValidateErrorAnyOf:
+        return RAPIDJSON_ERROR_STRING("Property did not match any of the sub-schemas specified by "
+                                      "'anyOf', refer to following errors.");
+    case kValidateErrorNot:
+        return RAPIDJSON_ERROR_STRING("Property matched the sub-schema specified by 'not'.");
 
-        case kValidateErrorReadOnly:                    return RAPIDJSON_ERROR_STRING("Property is read-only but has been provided when validation is for writing.");
-        case kValidateErrorWriteOnly:                   return RAPIDJSON_ERROR_STRING("Property is write-only but has been provided when validation is for reading.");
+    case kValidateErrorReadOnly:
+        return RAPIDJSON_ERROR_STRING(
+            "Property is read-only but has been provided when validation is for writing.");
+    case kValidateErrorWriteOnly:
+        return RAPIDJSON_ERROR_STRING(
+            "Property is write-only but has been provided when validation is for reading.");
 
-        default:                                        return RAPIDJSON_ERROR_STRING("Unknown error.");
+    default: return RAPIDJSON_ERROR_STRING("Unknown error.");
     }
 }
 
@@ -124,27 +196,46 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetValidateError_En(ValidateErrorCode val
     \note User can make a copy of this function for localization.
         Using switch-case is safer for future modification of error codes.
 */
-  inline const RAPIDJSON_ERROR_CHARTYPE* GetSchemaError_En(SchemaErrorCode schemaErrorCode) {
-      switch (schemaErrorCode) {
-          case kSchemaErrorNone:                        return RAPIDJSON_ERROR_STRING("No error.");
+inline const RAPIDJSON_ERROR_CHARTYPE* GetSchemaError_En(SchemaErrorCode schemaErrorCode)
+{
+    switch(schemaErrorCode)
+    {
+    case kSchemaErrorNone: return RAPIDJSON_ERROR_STRING("No error.");
 
-          case kSchemaErrorStartUnknown:                return RAPIDJSON_ERROR_STRING("Pointer '%value' to start of schema does not resolve to a location in the document.");
-          case kSchemaErrorRefPlainName:                return RAPIDJSON_ERROR_STRING("$ref fragment '%value' must be a JSON pointer.");
-          case kSchemaErrorRefInvalid:                  return RAPIDJSON_ERROR_STRING("$ref must not be an empty string.");
-          case kSchemaErrorRefPointerInvalid:           return RAPIDJSON_ERROR_STRING("$ref fragment '%value' is not a valid JSON pointer at offset '%offset'.");
-          case kSchemaErrorRefUnknown:                  return RAPIDJSON_ERROR_STRING("$ref '%value' does not resolve to a location in the target document.");
-          case kSchemaErrorRefCyclical:                 return RAPIDJSON_ERROR_STRING("$ref '%value' is cyclical.");
-          case kSchemaErrorRefNoRemoteProvider:         return RAPIDJSON_ERROR_STRING("$ref is remote but there is no remote provider.");
-          case kSchemaErrorRefNoRemoteSchema:           return RAPIDJSON_ERROR_STRING("$ref '%value' is remote but the remote provider did not return a schema.");
-          case kSchemaErrorRegexInvalid:                return RAPIDJSON_ERROR_STRING("Invalid regular expression '%value' in 'pattern' or 'patternProperties'.");
-          case kSchemaErrorSpecUnknown:                 return RAPIDJSON_ERROR_STRING("JSON schema draft or OpenAPI version is not recognized.");
-          case kSchemaErrorSpecUnsupported:             return RAPIDJSON_ERROR_STRING("JSON schema draft or OpenAPI version is not supported.");
-          case kSchemaErrorSpecIllegal:                 return RAPIDJSON_ERROR_STRING("Both JSON schema draft and OpenAPI version found in document.");
-          case kSchemaErrorReadOnlyAndWriteOnly:        return RAPIDJSON_ERROR_STRING("Property must not be both 'readOnly' and 'writeOnly'.");
+    case kSchemaErrorStartUnknown:
+        return RAPIDJSON_ERROR_STRING(
+            "Pointer '%value' to start of schema does not resolve to a location in the document.");
+    case kSchemaErrorRefPlainName:
+        return RAPIDJSON_ERROR_STRING("$ref fragment '%value' must be a JSON pointer.");
+    case kSchemaErrorRefInvalid: return RAPIDJSON_ERROR_STRING("$ref must not be an empty string.");
+    case kSchemaErrorRefPointerInvalid:
+        return RAPIDJSON_ERROR_STRING(
+            "$ref fragment '%value' is not a valid JSON pointer at offset '%offset'.");
+    case kSchemaErrorRefUnknown:
+        return RAPIDJSON_ERROR_STRING(
+            "$ref '%value' does not resolve to a location in the target document.");
+    case kSchemaErrorRefCyclical: return RAPIDJSON_ERROR_STRING("$ref '%value' is cyclical.");
+    case kSchemaErrorRefNoRemoteProvider:
+        return RAPIDJSON_ERROR_STRING("$ref is remote but there is no remote provider.");
+    case kSchemaErrorRefNoRemoteSchema:
+        return RAPIDJSON_ERROR_STRING(
+            "$ref '%value' is remote but the remote provider did not return a schema.");
+    case kSchemaErrorRegexInvalid:
+        return RAPIDJSON_ERROR_STRING(
+            "Invalid regular expression '%value' in 'pattern' or 'patternProperties'.");
+    case kSchemaErrorSpecUnknown:
+        return RAPIDJSON_ERROR_STRING("JSON schema draft or OpenAPI version is not recognized.");
+    case kSchemaErrorSpecUnsupported:
+        return RAPIDJSON_ERROR_STRING("JSON schema draft or OpenAPI version is not supported.");
+    case kSchemaErrorSpecIllegal:
+        return RAPIDJSON_ERROR_STRING(
+            "Both JSON schema draft and OpenAPI version found in document.");
+    case kSchemaErrorReadOnlyAndWriteOnly:
+        return RAPIDJSON_ERROR_STRING("Property must not be both 'readOnly' and 'writeOnly'.");
 
-          default:                                      return RAPIDJSON_ERROR_STRING("Unknown error.");
+    default: return RAPIDJSON_ERROR_STRING("Unknown error.");
     }
-  }
+}
 
 //! Maps error code of pointer parse into error message.
 /*!
@@ -154,16 +245,22 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetValidateError_En(ValidateErrorCode val
     \note User can make a copy of this function for localization.
         Using switch-case is safer for future modification of error codes.
 */
-inline const RAPIDJSON_ERROR_CHARTYPE* GetPointerParseError_En(PointerParseErrorCode pointerParseErrorCode) {
-    switch (pointerParseErrorCode) {
-        case kPointerParseErrorNone:                       return RAPIDJSON_ERROR_STRING("No error.");
+inline const RAPIDJSON_ERROR_CHARTYPE*
+GetPointerParseError_En(PointerParseErrorCode pointerParseErrorCode)
+{
+    switch(pointerParseErrorCode)
+    {
+    case kPointerParseErrorNone: return RAPIDJSON_ERROR_STRING("No error.");
 
-        case kPointerParseErrorTokenMustBeginWithSolidus:  return RAPIDJSON_ERROR_STRING("A token must begin with a '/'.");
-        case kPointerParseErrorInvalidEscape:              return RAPIDJSON_ERROR_STRING("Invalid escape.");
-        case kPointerParseErrorInvalidPercentEncoding:     return RAPIDJSON_ERROR_STRING("Invalid percent encoding in URI fragment.");
-        case kPointerParseErrorCharacterMustPercentEncode: return RAPIDJSON_ERROR_STRING("A character must be percent encoded in a URI fragment.");
+    case kPointerParseErrorTokenMustBeginWithSolidus:
+        return RAPIDJSON_ERROR_STRING("A token must begin with a '/'.");
+    case kPointerParseErrorInvalidEscape: return RAPIDJSON_ERROR_STRING("Invalid escape.");
+    case kPointerParseErrorInvalidPercentEncoding:
+        return RAPIDJSON_ERROR_STRING("Invalid percent encoding in URI fragment.");
+    case kPointerParseErrorCharacterMustPercentEncode:
+        return RAPIDJSON_ERROR_STRING("A character must be percent encoded in a URI fragment.");
 
-        default:                                           return RAPIDJSON_ERROR_STRING("Unknown error.");
+    default: return RAPIDJSON_ERROR_STRING("Unknown error.");
     }
 }
 
diff --git a/include/rapidjson/error/error.h b/include/rapidjson/error/error.h
index cae345db36..12ca3e085c 100644
--- a/include/rapidjson/error/error.h
+++ b/include/rapidjson/error/error.h
@@ -61,32 +61,33 @@ RAPIDJSON_NAMESPACE_BEGIN
 /*! \ingroup RAPIDJSON_ERRORS
     \see GenericReader::Parse, GenericReader::GetParseErrorCode
 */
-enum ParseErrorCode {
-    kParseErrorNone = 0,                        //!< No error.
+enum ParseErrorCode
+{
+    kParseErrorNone = 0, //!< No error.
 
-    kParseErrorDocumentEmpty,                   //!< The document is empty.
-    kParseErrorDocumentRootNotSingular,         //!< The document root must not follow by other values.
+    kParseErrorDocumentEmpty,           //!< The document is empty.
+    kParseErrorDocumentRootNotSingular, //!< The document root must not follow by other values.
 
-    kParseErrorValueInvalid,                    //!< Invalid value.
+    kParseErrorValueInvalid, //!< Invalid value.
 
-    kParseErrorObjectMissName,                  //!< Missing a name for object member.
-    kParseErrorObjectMissColon,                 //!< Missing a colon after a name of object member.
-    kParseErrorObjectMissCommaOrCurlyBracket,   //!< Missing a comma or '}' after an object member.
+    kParseErrorObjectMissName,                //!< Missing a name for object member.
+    kParseErrorObjectMissColon,               //!< Missing a colon after a name of object member.
+    kParseErrorObjectMissCommaOrCurlyBracket, //!< Missing a comma or '}' after an object member.
 
-    kParseErrorArrayMissCommaOrSquareBracket,   //!< Missing a comma or ']' after an array element.
+    kParseErrorArrayMissCommaOrSquareBracket, //!< Missing a comma or ']' after an array element.
 
-    kParseErrorStringUnicodeEscapeInvalidHex,   //!< Incorrect hex digit after \\u escape in string.
-    kParseErrorStringUnicodeSurrogateInvalid,   //!< The surrogate pair in string is invalid.
-    kParseErrorStringEscapeInvalid,             //!< Invalid escape character in string.
-    kParseErrorStringMissQuotationMark,         //!< Missing a closing quotation mark in string.
-    kParseErrorStringInvalidEncoding,           //!< Invalid encoding in string.
+    kParseErrorStringUnicodeEscapeInvalidHex, //!< Incorrect hex digit after \\u escape in string.
+    kParseErrorStringUnicodeSurrogateInvalid, //!< The surrogate pair in string is invalid.
+    kParseErrorStringEscapeInvalid,           //!< Invalid escape character in string.
+    kParseErrorStringMissQuotationMark,       //!< Missing a closing quotation mark in string.
+    kParseErrorStringInvalidEncoding,         //!< Invalid encoding in string.
 
-    kParseErrorNumberTooBig,                    //!< Number too big to be stored in double.
-    kParseErrorNumberMissFraction,              //!< Miss fraction part in number.
-    kParseErrorNumberMissExponent,              //!< Miss exponent in number.
+    kParseErrorNumberTooBig,       //!< Number too big to be stored in double.
+    kParseErrorNumberMissFraction, //!< Miss fraction part in number.
+    kParseErrorNumberMissExponent, //!< Miss exponent in number.
 
-    kParseErrorTermination,                     //!< Parsing was terminated.
-    kParseErrorUnspecificSyntaxError            //!< Unspecific syntax error.
+    kParseErrorTermination,          //!< Parsing was terminated.
+    kParseErrorUnspecificSyntaxError //!< Unspecific syntax error.
 };
 
 //! Result of parsing (wraps ParseErrorCode)
@@ -103,10 +104,12 @@ enum ParseErrorCode {
     \endcode
     \see GenericReader::Parse, GenericDocument::Parse
 */
-struct ParseResult {
+struct ParseResult
+{
     //!! Unspecified boolean type
     typedef bool (ParseResult::*BooleanType)() const;
-public:
+
+    public:
     //! Default constructor, no error.
     ParseResult() : code_(kParseErrorNone), offset_(0) {}
     //! Constructor to set an error.
@@ -124,18 +127,25 @@ public:
 
     bool operator==(const ParseResult& that) const { return code_ == that.code_; }
     bool operator==(ParseErrorCode code) const { return code_ == code; }
-    friend bool operator==(ParseErrorCode code, const ParseResult & err) { return code == err.code_; }
+    friend bool operator==(ParseErrorCode code, const ParseResult& err)
+    {
+        return code == err.code_;
+    }
 
     bool operator!=(const ParseResult& that) const { return !(*this == that); }
     bool operator!=(ParseErrorCode code) const { return !(*this == code); }
-    friend bool operator!=(ParseErrorCode code, const ParseResult & err) { return err != code; }
+    friend bool operator!=(ParseErrorCode code, const ParseResult& err) { return err != code; }
 
     //! Reset error code.
     void Clear() { Set(kParseErrorNone); }
     //! Update error code and offset.
-    void Set(ParseErrorCode code, size_t offset = 0) { code_ = code; offset_ = offset; }
+    void Set(ParseErrorCode code, size_t offset = 0)
+    {
+        code_   = code;
+        offset_ = offset;
+    }
 
-private:
+    private:
     ParseErrorCode code_;
     size_t offset_;
 };
@@ -159,43 +169,49 @@ typedef const RAPIDJSON_ERROR_CHARTYPE* (*GetParseErrorFunc)(ParseErrorCode);
 /*! \ingroup RAPIDJSON_ERRORS
     \see GenericSchemaValidator
 */
-enum ValidateErrorCode {
-    kValidateErrors    = -1,                   //!< Top level error code when kValidateContinueOnErrorsFlag set.
-    kValidateErrorNone = 0,                    //!< No error.
+enum ValidateErrorCode
+{
+    kValidateErrors    = -1, //!< Top level error code when kValidateContinueOnErrorsFlag set.
+    kValidateErrorNone = 0,  //!< No error.
 
-    kValidateErrorMultipleOf,                  //!< Number is not a multiple of the 'multipleOf' value.
-    kValidateErrorMaximum,                     //!< Number is greater than the 'maximum' value.
-    kValidateErrorExclusiveMaximum,            //!< Number is greater than or equal to the 'maximum' value.
-    kValidateErrorMinimum,                     //!< Number is less than the 'minimum' value.
-    kValidateErrorExclusiveMinimum,            //!< Number is less than or equal to the 'minimum' value.
+    kValidateErrorMultipleOf,       //!< Number is not a multiple of the 'multipleOf' value.
+    kValidateErrorMaximum,          //!< Number is greater than the 'maximum' value.
+    kValidateErrorExclusiveMaximum, //!< Number is greater than or equal to the 'maximum' value.
+    kValidateErrorMinimum,          //!< Number is less than the 'minimum' value.
+    kValidateErrorExclusiveMinimum, //!< Number is less than or equal to the 'minimum' value.
 
-    kValidateErrorMaxLength,                   //!< String is longer than the 'maxLength' value.
-    kValidateErrorMinLength,                   //!< String is longer than the 'maxLength' value.
-    kValidateErrorPattern,                     //!< String does not match the 'pattern' regular expression.
+    kValidateErrorMaxLength, //!< String is longer than the 'maxLength' value.
+    kValidateErrorMinLength, //!< String is longer than the 'maxLength' value.
+    kValidateErrorPattern,   //!< String does not match the 'pattern' regular expression.
 
-    kValidateErrorMaxItems,                    //!< Array is longer than the 'maxItems' value.
-    kValidateErrorMinItems,                    //!< Array is shorter than the 'minItems' value.
-    kValidateErrorUniqueItems,                 //!< Array has duplicate items but 'uniqueItems' is true.
-    kValidateErrorAdditionalItems,             //!< Array has additional items that are not allowed by the schema.
+    kValidateErrorMaxItems,        //!< Array is longer than the 'maxItems' value.
+    kValidateErrorMinItems,        //!< Array is shorter than the 'minItems' value.
+    kValidateErrorUniqueItems,     //!< Array has duplicate items but 'uniqueItems' is true.
+    kValidateErrorAdditionalItems, //!< Array has additional items that are not allowed by the
+                                   //!< schema.
 
-    kValidateErrorMaxProperties,               //!< Object has more members than 'maxProperties' value.
-    kValidateErrorMinProperties,               //!< Object has less members than 'minProperties' value.
-    kValidateErrorRequired,                    //!< Object is missing one or more members required by the schema.
-    kValidateErrorAdditionalProperties,        //!< Object has additional members that are not allowed by the schema.
-    kValidateErrorPatternProperties,           //!< See other errors.
-    kValidateErrorDependencies,                //!< Object has missing property or schema dependencies.
+    kValidateErrorMaxProperties, //!< Object has more members than 'maxProperties' value.
+    kValidateErrorMinProperties, //!< Object has less members than 'minProperties' value.
+    kValidateErrorRequired,      //!< Object is missing one or more members required by the schema.
+    kValidateErrorAdditionalProperties, //!< Object has additional members that are not allowed by
+                                        //!< the schema.
+    kValidateErrorPatternProperties,    //!< See other errors.
+    kValidateErrorDependencies,         //!< Object has missing property or schema dependencies.
 
-    kValidateErrorEnum,                        //!< Property has a value that is not one of its allowed enumerated values.
-    kValidateErrorType,                        //!< Property has a type that is not allowed by the schema.
+    kValidateErrorEnum, //!< Property has a value that is not one of its allowed enumerated values.
+    kValidateErrorType, //!< Property has a type that is not allowed by the schema.
 
-    kValidateErrorOneOf,                       //!< Property did not match any of the sub-schemas specified by 'oneOf'.
-    kValidateErrorOneOfMatch,                  //!< Property matched more than one of the sub-schemas specified by 'oneOf'.
-    kValidateErrorAllOf,                       //!< Property did not match all of the sub-schemas specified by 'allOf'.
-    kValidateErrorAnyOf,                       //!< Property did not match any of the sub-schemas specified by 'anyOf'.
-    kValidateErrorNot,                         //!< Property matched the sub-schema specified by 'not'.
+    kValidateErrorOneOf, //!< Property did not match any of the sub-schemas specified by 'oneOf'.
+    kValidateErrorOneOfMatch, //!< Property matched more than one of the sub-schemas specified by
+                              //!< 'oneOf'.
+    kValidateErrorAllOf, //!< Property did not match all of the sub-schemas specified by 'allOf'.
+    kValidateErrorAnyOf, //!< Property did not match any of the sub-schemas specified by 'anyOf'.
+    kValidateErrorNot,   //!< Property matched the sub-schema specified by 'not'.
 
-    kValidateErrorReadOnly,                    //!< Property is read-only but has been provided when validation is for writing
-    kValidateErrorWriteOnly                    //!< Property is write-only but has been provided when validation is for reading
+    kValidateErrorReadOnly, //!< Property is read-only but has been provided when validation is for
+                            //!< writing
+    kValidateErrorWriteOnly //!< Property is write-only but has been provided when validation is for
+                            //!< reading
 };
 
 //! Function pointer type of GetValidateError().
@@ -217,22 +233,25 @@ typedef const RAPIDJSON_ERROR_CHARTYPE* (*GetValidateErrorFunc)(ValidateErrorCod
 /*! \ingroup RAPIDJSON_ERRORS
     \see GenericSchemaValidator
 */
-enum SchemaErrorCode {
-    kSchemaErrorNone = 0,                      //!< No error.
+enum SchemaErrorCode
+{
+    kSchemaErrorNone = 0, //!< No error.
 
-    kSchemaErrorStartUnknown,                  //!< Pointer to start of schema does not resolve to a location in the document
-    kSchemaErrorRefPlainName,                  //!< $ref fragment must be a JSON pointer
-    kSchemaErrorRefInvalid,                    //!< $ref must not be an empty string
-    kSchemaErrorRefPointerInvalid,             //!< $ref fragment is not a valid JSON pointer at offset
-    kSchemaErrorRefUnknown,                    //!< $ref does not resolve to a location in the target document
-    kSchemaErrorRefCyclical,                   //!< $ref is cyclical
-    kSchemaErrorRefNoRemoteProvider,           //!< $ref is remote but there is no remote provider
-    kSchemaErrorRefNoRemoteSchema,             //!< $ref is remote but the remote provider did not return a schema
-    kSchemaErrorRegexInvalid,                  //!< Invalid regular expression in 'pattern' or 'patternProperties'
-    kSchemaErrorSpecUnknown,                   //!< JSON schema draft or OpenAPI version is not recognized
-    kSchemaErrorSpecUnsupported,               //!< JSON schema draft or OpenAPI version is not supported
-    kSchemaErrorSpecIllegal,                   //!< Both JSON schema draft and OpenAPI version found in document
-    kSchemaErrorReadOnlyAndWriteOnly           //!< Property must not be both 'readOnly' and 'writeOnly'
+    kSchemaErrorStartUnknown, //!< Pointer to start of schema does not resolve to a location in the
+                              //!< document
+    kSchemaErrorRefPlainName, //!< $ref fragment must be a JSON pointer
+    kSchemaErrorRefInvalid,   //!< $ref must not be an empty string
+    kSchemaErrorRefPointerInvalid,   //!< $ref fragment is not a valid JSON pointer at offset
+    kSchemaErrorRefUnknown,          //!< $ref does not resolve to a location in the target document
+    kSchemaErrorRefCyclical,         //!< $ref is cyclical
+    kSchemaErrorRefNoRemoteProvider, //!< $ref is remote but there is no remote provider
+    kSchemaErrorRefNoRemoteSchema,   //!< $ref is remote but the remote provider did not return a
+                                     //!< schema
+    kSchemaErrorRegexInvalid,    //!< Invalid regular expression in 'pattern' or 'patternProperties'
+    kSchemaErrorSpecUnknown,     //!< JSON schema draft or OpenAPI version is not recognized
+    kSchemaErrorSpecUnsupported, //!< JSON schema draft or OpenAPI version is not supported
+    kSchemaErrorSpecIllegal,     //!< Both JSON schema draft and OpenAPI version found in document
+    kSchemaErrorReadOnlyAndWriteOnly //!< Property must not be both 'readOnly' and 'writeOnly'
 };
 
 //! Function pointer type of GetSchemaError().
@@ -254,13 +273,15 @@ typedef const RAPIDJSON_ERROR_CHARTYPE* (*GetSchemaErrorFunc)(SchemaErrorCode);
 /*! \ingroup RAPIDJSON_ERRORS
     \see GenericPointer::GenericPointer, GenericPointer::GetParseErrorCode
 */
-enum PointerParseErrorCode {
-    kPointerParseErrorNone = 0,                     //!< The parse is successful
+enum PointerParseErrorCode
+{
+    kPointerParseErrorNone = 0, //!< The parse is successful
 
-    kPointerParseErrorTokenMustBeginWithSolidus,    //!< A token must begin with a '/'
-    kPointerParseErrorInvalidEscape,                //!< Invalid escape
-    kPointerParseErrorInvalidPercentEncoding,       //!< Invalid percent encoding in URI fragment
-    kPointerParseErrorCharacterMustPercentEncode    //!< A character must percent encoded in URI fragment
+    kPointerParseErrorTokenMustBeginWithSolidus, //!< A token must begin with a '/'
+    kPointerParseErrorInvalidEscape,             //!< Invalid escape
+    kPointerParseErrorInvalidPercentEncoding,    //!< Invalid percent encoding in URI fragment
+    kPointerParseErrorCharacterMustPercentEncode //!< A character must percent encoded in URI
+                                                 //!< fragment
 };
 
 //! Function pointer type of GetPointerParseError().
@@ -275,7 +296,6 @@ enum PointerParseErrorCode {
 */
 typedef const RAPIDJSON_ERROR_CHARTYPE* (*GetPointerParseErrorFunc)(PointerParseErrorCode);
 
-
 RAPIDJSON_NAMESPACE_END
 
 #ifdef __clang__
diff --git a/include/rapidjson/filereadstream.h b/include/rapidjson/filereadstream.h
index f8bb43cb0c..8cdd792f44 100644
--- a/include/rapidjson/filereadstream.h
+++ b/include/rapidjson/filereadstream.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_FILEREADSTREAM_H_
@@ -21,8 +21,8 @@
 #ifdef __clang__
 RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(padded)
-RAPIDJSON_DIAG_OFF(unreachable-code)
-RAPIDJSON_DIAG_OFF(missing-noreturn)
+RAPIDJSON_DIAG_OFF(unreachable - code)
+RAPIDJSON_DIAG_OFF(missing - noreturn)
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
@@ -31,9 +31,10 @@ RAPIDJSON_NAMESPACE_BEGIN
 /*!
     \note implements Stream concept
 */
-class FileReadStream {
-public:
-    typedef char Ch;    //!< Character type (byte).
+class FileReadStream
+{
+    public:
+    typedef char Ch; //!< Character type (byte).
 
     //! Constructor.
     /*!
@@ -41,38 +42,61 @@ public:
         \param buffer user-supplied buffer.
         \param bufferSize size of buffer in bytes. Must >=4 bytes.
     */
-    FileReadStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferSize_(bufferSize), bufferLast_(0), current_(buffer_), readCount_(0), count_(0), eof_(false) { 
+    FileReadStream(std::FILE* fp, char* buffer, size_t bufferSize)
+        : fp_(fp),
+          buffer_(buffer),
+          bufferSize_(bufferSize),
+          bufferLast_(0),
+          current_(buffer_),
+          readCount_(0),
+          count_(0),
+          eof_(false)
+    {
         RAPIDJSON_ASSERT(fp_ != 0);
         RAPIDJSON_ASSERT(bufferSize >= 4);
         Read();
     }
 
     Ch Peek() const { return *current_; }
-    Ch Take() { Ch c = *current_; Read(); return c; }
+    Ch Take()
+    {
+        Ch c = *current_;
+        Read();
+        return c;
+    }
     size_t Tell() const { return count_ + static_cast<size_t>(current_ - buffer_); }
 
     // Not implemented
     void Put(Ch) { RAPIDJSON_ASSERT(false); }
-    void Flush() { RAPIDJSON_ASSERT(false); } 
-    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
-
-    // For encoding detection only.
-    const Ch* Peek4() const {
-        return (current_ + 4 - !eof_ <= bufferLast_) ? current_ : 0;
+    void Flush() { RAPIDJSON_ASSERT(false); }
+    Ch* PutBegin()
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    size_t PutEnd(Ch*)
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
     }
 
-private:
-    void Read() {
-        if (current_ < bufferLast_)
-            ++current_;
-        else if (!eof_) {
-            count_ += readCount_;
-            readCount_ = std::fread(buffer_, 1, bufferSize_, fp_);
-            bufferLast_ = buffer_ + readCount_ - 1;
-            current_ = buffer_;
+    // For encoding detection only.
+    const Ch* Peek4() const { return (current_ + 4 - !eof_ <= bufferLast_) ? current_ : 0; }
 
-            if (readCount_ < bufferSize_) {
+    private:
+    void Read()
+    {
+        if(current_ < bufferLast_)
+            ++current_;
+        else if(!eof_)
+        {
+            count_ += readCount_;
+            readCount_  = std::fread(buffer_, 1, bufferSize_, fp_);
+            bufferLast_ = buffer_ + readCount_ - 1;
+            current_    = buffer_;
+
+            if(readCount_ < bufferSize_)
+            {
                 buffer_[readCount_] = '\0';
                 ++bufferLast_;
                 eof_ = true;
@@ -81,12 +105,12 @@ private:
     }
 
     std::FILE* fp_;
-    Ch *buffer_;
+    Ch* buffer_;
     size_t bufferSize_;
-    Ch *bufferLast_;
-    Ch *current_;
+    Ch* bufferLast_;
+    Ch* current_;
     size_t readCount_;
-    size_t count_;  //!< Number of characters read
+    size_t count_; //!< Number of characters read
     bool eof_;
 };
 
diff --git a/include/rapidjson/filewritestream.h b/include/rapidjson/filewritestream.h
index 5d89588c21..fd805d38d0 100644
--- a/include/rapidjson/filewritestream.h
+++ b/include/rapidjson/filewritestream.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_FILEWRITESTREAM_H_
@@ -20,7 +20,7 @@
 
 #ifdef __clang__
 RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(unreachable-code)
+RAPIDJSON_DIAG_OFF(unreachable - code)
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
@@ -29,24 +29,30 @@ RAPIDJSON_NAMESPACE_BEGIN
 /*!
     \note implements Stream concept
 */
-class FileWriteStream {
-public:
-    typedef char Ch;    //!< Character type. Only support char.
+class FileWriteStream
+{
+    public:
+    typedef char Ch; //!< Character type. Only support char.
 
-    FileWriteStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferEnd_(buffer + bufferSize), current_(buffer_) { 
+    FileWriteStream(std::FILE* fp, char* buffer, size_t bufferSize)
+        : fp_(fp), buffer_(buffer), bufferEnd_(buffer + bufferSize), current_(buffer_)
+    {
         RAPIDJSON_ASSERT(fp_ != 0);
     }
 
-    void Put(char c) { 
-        if (current_ >= bufferEnd_)
+    void Put(char c)
+    {
+        if(current_ >= bufferEnd_)
             Flush();
 
         *current_++ = c;
     }
 
-    void PutN(char c, size_t n) {
+    void PutN(char c, size_t n)
+    {
         size_t avail = static_cast<size_t>(bufferEnd_ - current_);
-        while (n > avail) {
+        while(n > avail)
+        {
             std::memset(current_, c, avail);
             current_ += avail;
             Flush();
@@ -54,16 +60,20 @@ public:
             avail = static_cast<size_t>(bufferEnd_ - current_);
         }
 
-        if (n > 0) {
+        if(n > 0)
+        {
             std::memset(current_, c, n);
             current_ += n;
         }
     }
 
-    void Flush() {
-        if (current_ != buffer_) {
+    void Flush()
+    {
+        if(current_ != buffer_)
+        {
             size_t result = std::fwrite(buffer_, 1, static_cast<size_t>(current_ - buffer_), fp_);
-            if (result < static_cast<size_t>(current_ - buffer_)) {
+            if(result < static_cast<size_t>(current_ - buffer_))
+            {
                 // failure deliberately ignored at this time
                 // added to avoid warn_unused_result build errors
             }
@@ -72,26 +82,47 @@ public:
     }
 
     // Not implemented
-    char Peek() const { RAPIDJSON_ASSERT(false); return 0; }
-    char Take() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
-    char* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t PutEnd(char*) { RAPIDJSON_ASSERT(false); return 0; }
+    char Peek() const
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    char Take()
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    size_t Tell() const
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    char* PutBegin()
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    size_t PutEnd(char*)
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
 
-private:
+    private:
     // Prohibit copy constructor & assignment operator.
     FileWriteStream(const FileWriteStream&);
     FileWriteStream& operator=(const FileWriteStream&);
 
     std::FILE* fp_;
-    char *buffer_;
-    char *bufferEnd_;
-    char *current_;
+    char* buffer_;
+    char* bufferEnd_;
+    char* current_;
 };
 
 //! Implement specialized version of PutN() with memset() for better performance.
-template<>
-inline void PutN(FileWriteStream& stream, char c, size_t n) {
+template <>
+inline void PutN(FileWriteStream& stream, char c, size_t n)
+{
     stream.PutN(c, n);
 }
 
diff --git a/include/rapidjson/fwd.h b/include/rapidjson/fwd.h
index d62f77f0ec..7ac8d64601 100644
--- a/include/rapidjson/fwd.h
+++ b/include/rapidjson/fwd.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_FWD_H_
@@ -21,17 +21,26 @@ RAPIDJSON_NAMESPACE_BEGIN
 
 // encodings.h
 
-template<typename CharType> struct UTF8;
-template<typename CharType> struct UTF16;
-template<typename CharType> struct UTF16BE;
-template<typename CharType> struct UTF16LE;
-template<typename CharType> struct UTF32;
-template<typename CharType> struct UTF32BE;
-template<typename CharType> struct UTF32LE;
-template<typename CharType> struct ASCII;
-template<typename CharType> struct AutoUTF;
+template <typename CharType>
+struct UTF8;
+template <typename CharType>
+struct UTF16;
+template <typename CharType>
+struct UTF16BE;
+template <typename CharType>
+struct UTF16LE;
+template <typename CharType>
+struct UTF32;
+template <typename CharType>
+struct UTF32BE;
+template <typename CharType>
+struct UTF32LE;
+template <typename CharType>
+struct ASCII;
+template <typename CharType>
+struct AutoUTF;
 
-template<typename SourceEncoding, typename TargetEncoding>
+template <typename SourceEncoding, typename TargetEncoding>
 struct Transcoder;
 
 // allocators.h
@@ -46,12 +55,12 @@ class MemoryPoolAllocator;
 template <typename Encoding>
 struct GenericStringStream;
 
-typedef GenericStringStream<UTF8<char> > StringStream;
+typedef GenericStringStream<UTF8<char>> StringStream;
 
 template <typename Encoding>
 struct GenericInsituStringStream;
 
-typedef GenericInsituStringStream<UTF8<char> > InsituStringStream;
+typedef GenericInsituStringStream<UTF8<char>> InsituStringStream;
 
 // stringbuffer.h
 
@@ -81,7 +90,7 @@ struct MemoryStream;
 
 // reader.h
 
-template<typename Encoding, typename Derived>
+template <typename Encoding, typename Derived>
 struct BaseReaderHandler;
 
 template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator>
@@ -91,29 +100,37 @@ typedef GenericReader<UTF8<char>, UTF8<char>, CrtAllocator> Reader;
 
 // writer.h
 
-template<typename OutputStream, typename SourceEncoding, typename TargetEncoding, typename StackAllocator, unsigned writeFlags>
+template <typename OutputStream,
+          typename SourceEncoding,
+          typename TargetEncoding,
+          typename StackAllocator,
+          unsigned writeFlags>
 class Writer;
 
 // prettywriter.h
 
-template<typename OutputStream, typename SourceEncoding, typename TargetEncoding, typename StackAllocator, unsigned writeFlags>
+template <typename OutputStream,
+          typename SourceEncoding,
+          typename TargetEncoding,
+          typename StackAllocator,
+          unsigned writeFlags>
 class PrettyWriter;
 
 // document.h
 
-template <typename Encoding, typename Allocator> 
+template <typename Encoding, typename Allocator>
 class GenericMember;
 
 template <bool Const, typename Encoding, typename Allocator>
 class GenericMemberIterator;
 
-template<typename CharType>
+template <typename CharType>
 struct GenericStringRef;
 
-template <typename Encoding, typename Allocator> 
+template <typename Encoding, typename Allocator>
 class GenericValue;
 
-typedef GenericValue<UTF8<char>, MemoryPoolAllocator<CrtAllocator> > Value;
+typedef GenericValue<UTF8<char>, MemoryPoolAllocator<CrtAllocator>> Value;
 
 template <typename Encoding, typename Allocator, typename StackAllocator>
 class GenericDocument;
@@ -138,13 +155,11 @@ class GenericSchemaDocument;
 typedef GenericSchemaDocument<Value, CrtAllocator> SchemaDocument;
 typedef IGenericRemoteSchemaDocumentProvider<SchemaDocument> IRemoteSchemaDocumentProvider;
 
-template <
-    typename SchemaDocumentType,
-    typename OutputHandler,
-    typename StateAllocator>
+template <typename SchemaDocumentType, typename OutputHandler, typename StateAllocator>
 class GenericSchemaValidator;
 
-typedef GenericSchemaValidator<SchemaDocument, BaseReaderHandler<UTF8<char>, void>, CrtAllocator> SchemaValidator;
+typedef GenericSchemaValidator<SchemaDocument, BaseReaderHandler<UTF8<char>, void>, CrtAllocator>
+    SchemaValidator;
 
 RAPIDJSON_NAMESPACE_END
 
diff --git a/include/rapidjson/internal/biginteger.h b/include/rapidjson/internal/biginteger.h
index 4930043dc7..fdf95284af 100644
--- a/include/rapidjson/internal/biginteger.h
+++ b/include/rapidjson/internal/biginteger.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_BIGINTEGER_H_
@@ -22,132 +22,153 @@
 #if !defined(_ARM64EC_)
 #pragma intrinsic(_umul128)
 #else
-#pragma comment(lib,"softintrin")
+#pragma comment(lib, "softintrin")
 #endif
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
 namespace internal {
 
-class BigInteger {
-public:
+class BigInteger
+{
+    public:
     typedef uint64_t Type;
 
-    BigInteger(const BigInteger& rhs) : count_(rhs.count_) {
+    BigInteger(const BigInteger& rhs) : count_(rhs.count_)
+    {
         std::memcpy(digits_, rhs.digits_, count_ * sizeof(Type));
     }
 
-    explicit BigInteger(uint64_t u) : count_(1) {
-        digits_[0] = u;
-    }
+    explicit BigInteger(uint64_t u) : count_(1) { digits_[0] = u; }
 
-    template<typename Ch>
-    BigInteger(const Ch* decimals, size_t length) : count_(1) {
+    template <typename Ch>
+    BigInteger(const Ch* decimals, size_t length) : count_(1)
+    {
         RAPIDJSON_ASSERT(length > 0);
-        digits_[0] = 0;
-        size_t i = 0;
-        const size_t kMaxDigitPerIteration = 19;  // 2^64 = 18446744073709551616 > 10^19
-        while (length >= kMaxDigitPerIteration) {
+        digits_[0]                         = 0;
+        size_t i                           = 0;
+        const size_t kMaxDigitPerIteration = 19; // 2^64 = 18446744073709551616 > 10^19
+        while(length >= kMaxDigitPerIteration)
+        {
             AppendDecimal64(decimals + i, decimals + i + kMaxDigitPerIteration);
             length -= kMaxDigitPerIteration;
             i += kMaxDigitPerIteration;
         }
 
-        if (length > 0)
+        if(length > 0)
             AppendDecimal64(decimals + i, decimals + i + length);
     }
-    
-    BigInteger& operator=(const BigInteger &rhs)
+
+    BigInteger& operator=(const BigInteger& rhs)
     {
-        if (this != &rhs) {
+        if(this != &rhs)
+        {
             count_ = rhs.count_;
             std::memcpy(digits_, rhs.digits_, count_ * sizeof(Type));
         }
         return *this;
     }
-    
-    BigInteger& operator=(uint64_t u) {
-        digits_[0] = u;            
-        count_ = 1;
+
+    BigInteger& operator=(uint64_t u)
+    {
+        digits_[0] = u;
+        count_     = 1;
         return *this;
     }
 
-    BigInteger& operator+=(uint64_t u) {
+    BigInteger& operator+=(uint64_t u)
+    {
         Type backup = digits_[0];
         digits_[0] += u;
-        for (size_t i = 0; i < count_ - 1; i++) {
-            if (digits_[i] >= backup)
+        for(size_t i = 0; i < count_ - 1; i++)
+        {
+            if(digits_[i] >= backup)
                 return *this; // no carry
             backup = digits_[i + 1];
             digits_[i + 1] += 1;
         }
 
         // Last carry
-        if (digits_[count_ - 1] < backup)
+        if(digits_[count_ - 1] < backup)
             PushBack(1);
 
         return *this;
     }
 
-    BigInteger& operator*=(uint64_t u) {
-        if (u == 0) return *this = 0;
-        if (u == 1) return *this;
-        if (*this == 1) return *this = u;
+    BigInteger& operator*=(uint64_t u)
+    {
+        if(u == 0)
+            return *this = 0;
+        if(u == 1)
+            return *this;
+        if(*this == 1)
+            return *this = u;
 
         uint64_t k = 0;
-        for (size_t i = 0; i < count_; i++) {
+        for(size_t i = 0; i < count_; i++)
+        {
             uint64_t hi;
             digits_[i] = MulAdd64(digits_[i], u, k, &hi);
-            k = hi;
+            k          = hi;
         }
-        
-        if (k > 0)
+
+        if(k > 0)
             PushBack(k);
 
         return *this;
     }
 
-    BigInteger& operator*=(uint32_t u) {
-        if (u == 0) return *this = 0;
-        if (u == 1) return *this;
-        if (*this == 1) return *this = u;
+    BigInteger& operator*=(uint32_t u)
+    {
+        if(u == 0)
+            return *this = 0;
+        if(u == 1)
+            return *this;
+        if(*this == 1)
+            return *this = u;
 
         uint64_t k = 0;
-        for (size_t i = 0; i < count_; i++) {
-            const uint64_t c = digits_[i] >> 32;
-            const uint64_t d = digits_[i] & 0xFFFFFFFF;
+        for(size_t i = 0; i < count_; i++)
+        {
+            const uint64_t c  = digits_[i] >> 32;
+            const uint64_t d  = digits_[i] & 0xFFFFFFFF;
             const uint64_t uc = u * c;
             const uint64_t ud = u * d;
             const uint64_t p0 = ud + k;
             const uint64_t p1 = uc + (p0 >> 32);
-            digits_[i] = (p0 & 0xFFFFFFFF) | (p1 << 32);
-            k = p1 >> 32;
+            digits_[i]        = (p0 & 0xFFFFFFFF) | (p1 << 32);
+            k                 = p1 >> 32;
         }
-        
-        if (k > 0)
+
+        if(k > 0)
             PushBack(k);
 
         return *this;
     }
 
-    BigInteger& operator<<=(size_t shift) {
-        if (IsZero() || shift == 0) return *this;
+    BigInteger& operator<<=(size_t shift)
+    {
+        if(IsZero() || shift == 0)
+            return *this;
 
-        size_t offset = shift / kTypeBit;
+        size_t offset     = shift / kTypeBit;
         size_t interShift = shift % kTypeBit;
         RAPIDJSON_ASSERT(count_ + offset <= kCapacity);
 
-        if (interShift == 0) {
+        if(interShift == 0)
+        {
             std::memmove(digits_ + offset, digits_, count_ * sizeof(Type));
             count_ += offset;
         }
-        else {
+        else
+        {
             digits_[count_] = 0;
-            for (size_t i = count_; i > 0; i--)
-                digits_[i + offset] = (digits_[i] << interShift) | (digits_[i - 1] >> (kTypeBit - interShift));
+            for(size_t i = count_; i > 0; i--)
+                digits_[i + offset] =
+                    (digits_[i] << interShift) | (digits_[i - 1] >> (kTypeBit - interShift));
             digits_[offset] = digits_[0] << interShift;
             count_ += offset;
-            if (digits_[count_])
+            if(digits_[count_])
                 count_++;
         }
 
@@ -156,96 +177,121 @@ public:
         return *this;
     }
 
-    bool operator==(const BigInteger& rhs) const {
-        return count_ == rhs.count_ && std::memcmp(digits_, rhs.digits_, count_ * sizeof(Type)) == 0;
+    bool operator==(const BigInteger& rhs) const
+    {
+        return count_ == rhs.count_ &&
+               std::memcmp(digits_, rhs.digits_, count_ * sizeof(Type)) == 0;
     }
 
-    bool operator==(const Type rhs) const {
-        return count_ == 1 && digits_[0] == rhs;
-    }
+    bool operator==(const Type rhs) const { return count_ == 1 && digits_[0] == rhs; }
 
-    BigInteger& MultiplyPow5(unsigned exp) {
-        static const uint32_t kPow5[12] = {
-            5,
-            5 * 5,
-            5 * 5 * 5,
-            5 * 5 * 5 * 5,
-            5 * 5 * 5 * 5 * 5,
-            5 * 5 * 5 * 5 * 5 * 5,
-            5 * 5 * 5 * 5 * 5 * 5 * 5,
-            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
-            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
-            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
-            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
-            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5
-        };
-        if (exp == 0) return *this;
-        for (; exp >= 27; exp -= 27) *this *= RAPIDJSON_UINT64_C2(0X6765C793, 0XFA10079D); // 5^27
-        for (; exp >= 13; exp -= 13) *this *= static_cast<uint32_t>(1220703125u); // 5^13
-        if (exp > 0)                 *this *= kPow5[exp - 1];
+    BigInteger& MultiplyPow5(unsigned exp)
+    {
+        static const uint32_t kPow5[12] = {5,
+                                           5 * 5,
+                                           5 * 5 * 5,
+                                           5 * 5 * 5 * 5,
+                                           5 * 5 * 5 * 5 * 5,
+                                           5 * 5 * 5 * 5 * 5 * 5,
+                                           5 * 5 * 5 * 5 * 5 * 5 * 5,
+                                           5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+                                           5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+                                           5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+                                           5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+                                           5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5};
+        if(exp == 0)
+            return *this;
+        for(; exp >= 27; exp -= 27)
+            *this *= RAPIDJSON_UINT64_C2(0X6765C793, 0XFA10079D); // 5^27
+        for(; exp >= 13; exp -= 13)
+            *this *= static_cast<uint32_t>(1220703125u); // 5^13
+        if(exp > 0)
+            *this *= kPow5[exp - 1];
         return *this;
     }
 
     // Compute absolute difference of this and rhs.
     // Assume this != rhs
-    bool Difference(const BigInteger& rhs, BigInteger* out) const {
+    bool Difference(const BigInteger& rhs, BigInteger* out) const
+    {
         int cmp = Compare(rhs);
         RAPIDJSON_ASSERT(cmp != 0);
-        const BigInteger *a, *b;  // Makes a > b
+        const BigInteger *a, *b; // Makes a > b
         bool ret;
-        if (cmp < 0) { a = &rhs; b = this; ret = true; }
-        else         { a = this; b = &rhs; ret = false; }
+        if(cmp < 0)
+        {
+            a   = &rhs;
+            b   = this;
+            ret = true;
+        }
+        else
+        {
+            a   = this;
+            b   = &rhs;
+            ret = false;
+        }
 
         Type borrow = 0;
-        for (size_t i = 0; i < a->count_; i++) {
+        for(size_t i = 0; i < a->count_; i++)
+        {
             Type d = a->digits_[i] - borrow;
-            if (i < b->count_)
+            if(i < b->count_)
                 d -= b->digits_[i];
-            borrow = (d > a->digits_[i]) ? 1 : 0;
+            borrow          = (d > a->digits_[i]) ? 1 : 0;
             out->digits_[i] = d;
-            if (d != 0)
+            if(d != 0)
                 out->count_ = i + 1;
         }
 
         return ret;
     }
 
-    int Compare(const BigInteger& rhs) const {
-        if (count_ != rhs.count_)
+    int Compare(const BigInteger& rhs) const
+    {
+        if(count_ != rhs.count_)
             return count_ < rhs.count_ ? -1 : 1;
 
-        for (size_t i = count_; i-- > 0;)
-            if (digits_[i] != rhs.digits_[i])
+        for(size_t i = count_; i-- > 0;)
+            if(digits_[i] != rhs.digits_[i])
                 return digits_[i] < rhs.digits_[i] ? -1 : 1;
 
         return 0;
     }
 
     size_t GetCount() const { return count_; }
-    Type GetDigit(size_t index) const { RAPIDJSON_ASSERT(index < count_); return digits_[index]; }
+    Type GetDigit(size_t index) const
+    {
+        RAPIDJSON_ASSERT(index < count_);
+        return digits_[index];
+    }
     bool IsZero() const { return count_ == 1 && digits_[0] == 0; }
 
-private:
-    template<typename Ch>
-    void AppendDecimal64(const Ch* begin, const Ch* end) {
+    private:
+    template <typename Ch>
+    void AppendDecimal64(const Ch* begin, const Ch* end)
+    {
         uint64_t u = ParseUint64(begin, end);
-        if (IsZero())
+        if(IsZero())
             *this = u;
-        else {
+        else
+        {
             unsigned exp = static_cast<unsigned>(end - begin);
-            (MultiplyPow5(exp) <<= exp) += u;   // *this = *this * 10^exp + u
+            (MultiplyPow5(exp) <<= exp) += u; // *this = *this * 10^exp + u
         }
     }
 
-    void PushBack(Type digit) {
+    void PushBack(Type digit)
+    {
         RAPIDJSON_ASSERT(count_ < kCapacity);
         digits_[count_++] = digit;
     }
 
-    template<typename Ch>
-    static uint64_t ParseUint64(const Ch* begin, const Ch* end) {
+    template <typename Ch>
+    static uint64_t ParseUint64(const Ch* begin, const Ch* end)
+    {
         uint64_t r = 0;
-        for (const Ch* p = begin; p != end; ++p) {
+        for(const Ch* p = begin; p != end; ++p)
+        {
             RAPIDJSON_ASSERT(*p >= Ch('0') && *p <= Ch('9'));
             r = r * 10u + static_cast<unsigned>(*p - Ch('0'));
         }
@@ -253,13 +299,15 @@ private:
     }
 
     // Assume a * b + k < 2^128
-    static uint64_t MulAdd64(uint64_t a, uint64_t b, uint64_t k, uint64_t* outHigh) {
+    static uint64_t MulAdd64(uint64_t a, uint64_t b, uint64_t k, uint64_t* outHigh)
+    {
 #if defined(_MSC_VER) && defined(_M_AMD64)
         uint64_t low = _umul128(a, b, outHigh) + k;
-        if (low < k)
+        if(low < k)
             (*outHigh)++;
         return low;
-#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
+#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && \
+    defined(__x86_64__)
         __extension__ typedef unsigned __int128 uint128;
         uint128 p = static_cast<uint128>(a) * static_cast<uint128>(b);
         p += k;
@@ -270,22 +318,22 @@ private:
         uint64_t x0 = a0 * b0, x1 = a0 * b1, x2 = a1 * b0, x3 = a1 * b1;
         x1 += (x0 >> 32); // can't give carry
         x1 += x2;
-        if (x1 < x2)
+        if(x1 < x2)
             x3 += (static_cast<uint64_t>(1) << 32);
         uint64_t lo = (x1 << 32) + (x0 & 0xFFFFFFFF);
         uint64_t hi = x3 + (x1 >> 32);
 
         lo += k;
-        if (lo < k)
+        if(lo < k)
             hi++;
         *outHigh = hi;
         return lo;
 #endif
     }
 
-    static const size_t kBitCount = 3328;  // 64bit * 54 > 10^1000
+    static const size_t kBitCount = 3328; // 64bit * 54 > 10^1000
     static const size_t kCapacity = kBitCount / sizeof(Type);
-    static const size_t kTypeBit = sizeof(Type) * 8;
+    static const size_t kTypeBit  = sizeof(Type) * 8;
 
     Type digits_[kCapacity];
     size_t count_;
diff --git a/include/rapidjson/internal/clzll.h b/include/rapidjson/internal/clzll.h
index 8fc5118aa4..8c9aea4346 100644
--- a/include/rapidjson/internal/clzll.h
+++ b/include/rapidjson/internal/clzll.h
@@ -29,7 +29,8 @@
 RAPIDJSON_NAMESPACE_BEGIN
 namespace internal {
 
-inline uint32_t clzll(uint64_t x) {
+inline uint32_t clzll(uint64_t x)
+{
     // Passing 0 to __builtin_clzll is UB in GCC and results in an
     // infinite loop in the software implementation.
     RAPIDJSON_ASSERT(x != 0);
@@ -40,7 +41,7 @@ inline uint32_t clzll(uint64_t x) {
     _BitScanReverse64(&r, x);
 #else
     // Scan the high 32 bits.
-    if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32)))
+    if(_BitScanReverse(&r, static_cast<uint32_t>(x >> 32)))
         return 63 - (r + 32);
 
     // Scan the low 32 bits.
@@ -48,13 +49,14 @@ inline uint32_t clzll(uint64_t x) {
 #endif // _WIN64
 
     return 63 - r;
-#elif (defined(__GNUC__) && __GNUC__ >= 4) || RAPIDJSON_HAS_BUILTIN(__builtin_clzll)
+#elif(defined(__GNUC__) && __GNUC__ >= 4) || RAPIDJSON_HAS_BUILTIN(__builtin_clzll)
     // __builtin_clzll wrapper
     return static_cast<uint32_t>(__builtin_clzll(x));
 #else
     // naive version
     uint32_t r = 0;
-    while (!(x & (static_cast<uint64_t>(1) << 63))) {
+    while(!(x & (static_cast<uint64_t>(1) << 63)))
+    {
         x <<= 1;
         ++r;
     }
diff --git a/include/rapidjson/internal/diyfp.h b/include/rapidjson/internal/diyfp.h
index 1f60fb60ca..7dfe4aa550 100644
--- a/include/rapidjson/internal/diyfp.h
+++ b/include/rapidjson/internal/diyfp.h
@@ -28,7 +28,7 @@
 #if !defined(_ARM64EC_)
 #pragma intrinsic(_umul128)
 #else
-#pragma comment(lib,"softintrin")
+#pragma comment(lib, "softintrin")
 #endif
 #endif
 
@@ -45,72 +45,80 @@ RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(padded)
 #endif
 
-struct DiyFp {
+struct DiyFp
+{
     DiyFp() : f(), e() {}
 
     DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
 
-    explicit DiyFp(double d) {
-        union {
+    explicit DiyFp(double d)
+    {
+        union
+        {
             double d;
             uint64_t u64;
-        } u = { d };
+        } u = {d};
 
-        int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
+        int biased_e         = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
         uint64_t significand = (u.u64 & kDpSignificandMask);
-        if (biased_e != 0) {
+        if(biased_e != 0)
+        {
             f = significand + kDpHiddenBit;
             e = biased_e - kDpExponentBias;
         }
-        else {
+        else
+        {
             f = significand;
             e = kDpMinExponent + 1;
         }
     }
 
-    DiyFp operator-(const DiyFp& rhs) const {
-        return DiyFp(f - rhs.f, e);
-    }
+    DiyFp operator-(const DiyFp& rhs) const { return DiyFp(f - rhs.f, e); }
 
-    DiyFp operator*(const DiyFp& rhs) const {
+    DiyFp operator*(const DiyFp& rhs) const
+    {
 #if defined(_MSC_VER) && defined(_M_AMD64)
         uint64_t h;
         uint64_t l = _umul128(f, rhs.f, &h);
-        if (l & (uint64_t(1) << 63)) // rounding
+        if(l & (uint64_t(1) << 63)) // rounding
             h++;
         return DiyFp(h, e + rhs.e + 64);
-#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
+#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && \
+    defined(__x86_64__)
         __extension__ typedef unsigned __int128 uint128;
-        uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
+        uint128 p  = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
         uint64_t h = static_cast<uint64_t>(p >> 64);
         uint64_t l = static_cast<uint64_t>(p);
-        if (l & (uint64_t(1) << 63)) // rounding
+        if(l & (uint64_t(1) << 63)) // rounding
             h++;
         return DiyFp(h, e + rhs.e + 64);
 #else
         const uint64_t M32 = 0xFFFFFFFF;
-        const uint64_t a = f >> 32;
-        const uint64_t b = f & M32;
-        const uint64_t c = rhs.f >> 32;
-        const uint64_t d = rhs.f & M32;
-        const uint64_t ac = a * c;
-        const uint64_t bc = b * c;
-        const uint64_t ad = a * d;
-        const uint64_t bd = b * d;
-        uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
-        tmp += 1U << 31;  /// mult_round
+        const uint64_t a   = f >> 32;
+        const uint64_t b   = f & M32;
+        const uint64_t c   = rhs.f >> 32;
+        const uint64_t d   = rhs.f & M32;
+        const uint64_t ac  = a * c;
+        const uint64_t bc  = b * c;
+        const uint64_t ad  = a * d;
+        const uint64_t bd  = b * d;
+        uint64_t tmp       = (bd >> 32) + (ad & M32) + (bc & M32);
+        tmp += 1U << 31; /// mult_round
         return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
 #endif
     }
 
-    DiyFp Normalize() const {
+    DiyFp Normalize() const
+    {
         int s = static_cast<int>(clzll(f));
         return DiyFp(f << s, e - s);
     }
 
-    DiyFp NormalizeBoundary() const {
+    DiyFp NormalizeBoundary() const
+    {
         DiyFp res = *this;
-        while (!(res.f & (kDpHiddenBit << 1))) {
+        while(!(res.f & (kDpHiddenBit << 1)))
+        {
             res.f <<= 1;
             res.e--;
         }
@@ -119,50 +127,57 @@ struct DiyFp {
         return res;
     }
 
-    void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
+    void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const
+    {
         DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
         DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
         mi.f <<= mi.e - pl.e;
-        mi.e = pl.e;
-        *plus = pl;
+        mi.e   = pl.e;
+        *plus  = pl;
         *minus = mi;
     }
 
-    double ToDouble() const {
-        union {
+    double ToDouble() const
+    {
+        union
+        {
             double d;
             uint64_t u64;
-        }u;
+        } u;
         RAPIDJSON_ASSERT(f <= kDpHiddenBit + kDpSignificandMask);
-        if (e < kDpDenormalExponent) {
+        if(e < kDpDenormalExponent)
+        {
             // Underflow.
             return 0.0;
         }
-        if (e >= kDpMaxExponent) {
+        if(e >= kDpMaxExponent)
+        {
             // Overflow.
             return std::numeric_limits<double>::infinity();
         }
-        const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
-            static_cast<uint64_t>(e + kDpExponentBias);
-        u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
+        const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0)
+                                ? 0
+                                : static_cast<uint64_t>(e + kDpExponentBias);
+        u.u64             = (f & kDpSignificandMask) | (be << kDpSignificandSize);
         return u.d;
     }
 
-    static const int kDiySignificandSize = 64;
-    static const int kDpSignificandSize = 52;
-    static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
-    static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
-    static const int kDpMinExponent = -kDpExponentBias;
-    static const int kDpDenormalExponent = -kDpExponentBias + 1;
-    static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
+    static const int kDiySignificandSize     = 64;
+    static const int kDpSignificandSize      = 52;
+    static const int kDpExponentBias         = 0x3FF + kDpSignificandSize;
+    static const int kDpMaxExponent          = 0x7FF - kDpExponentBias;
+    static const int kDpMinExponent          = -kDpExponentBias;
+    static const int kDpDenormalExponent     = -kDpExponentBias + 1;
+    static const uint64_t kDpExponentMask    = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
     static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
-    static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
+    static const uint64_t kDpHiddenBit       = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
 
     uint64_t f;
     int e;
 };
 
-inline DiyFp GetCachedPowerByIndex(size_t index) {
+inline DiyFp GetCachedPowerByIndex(size_t index)
+{
     // 10^-348, 10^-340, ..., 10^340
     static const uint64_t kCachedPowers_F[] = {
         RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
@@ -208,41 +223,40 @@ inline DiyFp GetCachedPowerByIndex(size_t index) {
         RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
         RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
         RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
-        RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
-    };
+        RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)};
     static const int16_t kCachedPowers_E[] = {
-        -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007,  -980,
-        -954,  -927,  -901,  -874,  -847,  -821,  -794,  -768,  -741,  -715,
-        -688,  -661,  -635,  -608,  -582,  -555,  -529,  -502,  -475,  -449,
-        -422,  -396,  -369,  -343,  -316,  -289,  -263,  -236,  -210,  -183,
-        -157,  -130,  -103,   -77,   -50,   -24,     3,    30,    56,    83,
-        109,   136,   162,   189,   216,   242,   269,   295,   322,   348,
-        375,   402,   428,   455,   481,   508,   534,   561,   588,   614,
-        641,   667,   694,   720,   747,   774,   800,   827,   853,   880,
-        907,   933,   960,   986,  1013,  1039,  1066
-    };
+        -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, -927, -901,
+        -874,  -847,  -821,  -794,  -768,  -741,  -715,  -688,  -661,  -635, -608, -582, -555,
+        -529,  -502,  -475,  -449,  -422,  -396,  -369,  -343,  -316,  -289, -263, -236, -210,
+        -183,  -157,  -130,  -103,  -77,   -50,   -24,   3,     30,    56,   83,   109,  136,
+        162,   189,   216,   242,   269,   295,   322,   348,   375,   402,  428,  455,  481,
+        508,   534,   561,   588,   614,   641,   667,   694,   720,   747,  774,  800,  827,
+        853,   880,   907,   933,   960,   986,   1013,  1039,  1066};
     RAPIDJSON_ASSERT(index < 87);
     return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
 }
 
-inline DiyFp GetCachedPower(int e, int* K) {
+inline DiyFp GetCachedPower(int e, int* K)
+{
 
-    //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
-    double dk = (-61 - e) * 0.30102999566398114 + 347;  // dk must be positive, so can do ceiling in positive
+    // int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
+    double dk =
+        (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
     int k = static_cast<int>(dk);
-    if (dk - k > 0.0)
+    if(dk - k > 0.0)
         k++;
 
     unsigned index = static_cast<unsigned>((k >> 3) + 1);
-    *K = -(-348 + static_cast<int>(index << 3));    // decimal exponent no need lookup table
+    *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
 
     return GetCachedPowerByIndex(index);
 }
 
-inline DiyFp GetCachedPower10(int exp, int *outExp) {
+inline DiyFp GetCachedPower10(int exp, int* outExp)
+{
     RAPIDJSON_ASSERT(exp >= -348);
     unsigned index = static_cast<unsigned>(exp + 348) / 8u;
-    *outExp = -348 + static_cast<int>(index) * 8;
+    *outExp        = -348 + static_cast<int>(index) * 8;
     return GetCachedPowerByIndex(index);
 }
 
diff --git a/include/rapidjson/internal/dtoa.h b/include/rapidjson/internal/dtoa.h
index cd456721a7..1058b6fecd 100644
--- a/include/rapidjson/internal/dtoa.h
+++ b/include/rapidjson/internal/dtoa.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 // This is a C++ header-only implementation of Grisu2 algorithm from the publication:
@@ -29,66 +29,126 @@ namespace internal {
 #ifdef __GNUC__
 RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(effc++)
-RAPIDJSON_DIAG_OFF(array-bounds) // some gcc versions generate wrong warnings https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
+RAPIDJSON_DIAG_OFF(array - bounds) // some gcc versions generate wrong warnings
+                                   // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
 #endif
 
-inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) {
-    while (rest < wp_w && delta - rest >= ten_kappa &&
-           (rest + ten_kappa < wp_w ||  /// closer
-            wp_w - rest > rest + ten_kappa - wp_w)) {
+inline void
+GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w)
+{
+    while(rest < wp_w && delta - rest >= ten_kappa &&
+          (rest + ten_kappa < wp_w || /// closer
+           wp_w - rest > rest + ten_kappa - wp_w))
+    {
         buffer[len - 1]--;
         rest += ten_kappa;
     }
 }
 
-inline int CountDecimalDigit32(uint32_t n) {
+inline int CountDecimalDigit32(uint32_t n)
+{
     // Simple pure C++ implementation was faster than __builtin_clz version in this situation.
-    if (n < 10) return 1;
-    if (n < 100) return 2;
-    if (n < 1000) return 3;
-    if (n < 10000) return 4;
-    if (n < 100000) return 5;
-    if (n < 1000000) return 6;
-    if (n < 10000000) return 7;
-    if (n < 100000000) return 8;
+    if(n < 10)
+        return 1;
+    if(n < 100)
+        return 2;
+    if(n < 1000)
+        return 3;
+    if(n < 10000)
+        return 4;
+    if(n < 100000)
+        return 5;
+    if(n < 1000000)
+        return 6;
+    if(n < 10000000)
+        return 7;
+    if(n < 100000000)
+        return 8;
     // Will not reach 10 digits in DigitGen()
-    //if (n < 1000000000) return 9;
-    //return 10;
+    // if (n < 1000000000) return 9;
+    // return 10;
     return 9;
 }
 
-inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) {
-    static const uint64_t kPow10[] = { 1ULL, 10ULL, 100ULL, 1000ULL, 10000ULL, 100000ULL, 1000000ULL, 10000000ULL, 100000000ULL,
-                                       1000000000ULL, 10000000000ULL, 100000000000ULL, 1000000000000ULL,
-                                       10000000000000ULL, 100000000000000ULL, 1000000000000000ULL,
-                                       10000000000000000ULL, 100000000000000000ULL, 1000000000000000000ULL,
-                                       10000000000000000000ULL };
+inline void
+DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K)
+{
+    static const uint64_t kPow10[] = {1ULL,
+                                      10ULL,
+                                      100ULL,
+                                      1000ULL,
+                                      10000ULL,
+                                      100000ULL,
+                                      1000000ULL,
+                                      10000000ULL,
+                                      100000000ULL,
+                                      1000000000ULL,
+                                      10000000000ULL,
+                                      100000000000ULL,
+                                      1000000000000ULL,
+                                      10000000000000ULL,
+                                      100000000000000ULL,
+                                      1000000000000000ULL,
+                                      10000000000000000ULL,
+                                      100000000000000000ULL,
+                                      1000000000000000000ULL,
+                                      10000000000000000000ULL};
     const DiyFp one(uint64_t(1) << -Mp.e, Mp.e);
     const DiyFp wp_w = Mp - W;
-    uint32_t p1 = static_cast<uint32_t>(Mp.f >> -one.e);
-    uint64_t p2 = Mp.f & (one.f - 1);
-    int kappa = CountDecimalDigit32(p1); // kappa in [0, 9]
-    *len = 0;
+    uint32_t p1      = static_cast<uint32_t>(Mp.f >> -one.e);
+    uint64_t p2      = Mp.f & (one.f - 1);
+    int kappa        = CountDecimalDigit32(p1); // kappa in [0, 9]
+    *len             = 0;
 
-    while (kappa > 0) {
+    while(kappa > 0)
+    {
         uint32_t d = 0;
-        switch (kappa) {
-            case  9: d = p1 /  100000000; p1 %=  100000000; break;
-            case  8: d = p1 /   10000000; p1 %=   10000000; break;
-            case  7: d = p1 /    1000000; p1 %=    1000000; break;
-            case  6: d = p1 /     100000; p1 %=     100000; break;
-            case  5: d = p1 /      10000; p1 %=      10000; break;
-            case  4: d = p1 /       1000; p1 %=       1000; break;
-            case  3: d = p1 /        100; p1 %=        100; break;
-            case  2: d = p1 /         10; p1 %=         10; break;
-            case  1: d = p1;              p1 =           0; break;
-            default:;
+        switch(kappa)
+        {
+        case 9:
+            d = p1 / 100000000;
+            p1 %= 100000000;
+            break;
+        case 8:
+            d = p1 / 10000000;
+            p1 %= 10000000;
+            break;
+        case 7:
+            d = p1 / 1000000;
+            p1 %= 1000000;
+            break;
+        case 6:
+            d = p1 / 100000;
+            p1 %= 100000;
+            break;
+        case 5:
+            d = p1 / 10000;
+            p1 %= 10000;
+            break;
+        case 4:
+            d = p1 / 1000;
+            p1 %= 1000;
+            break;
+        case 3:
+            d = p1 / 100;
+            p1 %= 100;
+            break;
+        case 2:
+            d = p1 / 10;
+            p1 %= 10;
+            break;
+        case 1:
+            d  = p1;
+            p1 = 0;
+            break;
+        default:;
         }
-        if (d || *len)
+        if(d || *len)
             buffer[(*len)++] = static_cast<char>('0' + static_cast<char>(d));
         kappa--;
         uint64_t tmp = (static_cast<uint64_t>(p1) << -one.e) + p2;
-        if (tmp <= delta) {
+        if(tmp <= delta)
+        {
             *K += kappa;
             GrisuRound(buffer, *len, delta, tmp, kPow10[kappa] << -one.e, wp_w.f);
             return;
@@ -96,15 +156,17 @@ inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buff
     }
 
     // kappa = 0
-    for (;;) {
+    for(;;)
+    {
         p2 *= 10;
         delta *= 10;
         char d = static_cast<char>(p2 >> -one.e);
-        if (d || *len)
+        if(d || *len)
             buffer[(*len)++] = static_cast<char>('0' + d);
         p2 &= one.f - 1;
         kappa--;
-        if (p2 < delta) {
+        if(p2 < delta)
+        {
             *K += kappa;
             int index = -kappa;
             GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 20 ? kPow10[index] : 0));
@@ -113,37 +175,42 @@ inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buff
     }
 }
 
-inline void Grisu2(double value, char* buffer, int* length, int* K) {
+inline void Grisu2(double value, char* buffer, int* length, int* K)
+{
     const DiyFp v(value);
     DiyFp w_m, w_p;
     v.NormalizedBoundaries(&w_m, &w_p);
 
     const DiyFp c_mk = GetCachedPower(w_p.e, K);
-    const DiyFp W = v.Normalize() * c_mk;
-    DiyFp Wp = w_p * c_mk;
-    DiyFp Wm = w_m * c_mk;
+    const DiyFp W    = v.Normalize() * c_mk;
+    DiyFp Wp         = w_p * c_mk;
+    DiyFp Wm         = w_m * c_mk;
     Wm.f++;
     Wp.f--;
     DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K);
 }
 
-inline char* WriteExponent(int K, char* buffer) {
-    if (K < 0) {
+inline char* WriteExponent(int K, char* buffer)
+{
+    if(K < 0)
+    {
         *buffer++ = '-';
-        K = -K;
+        K         = -K;
     }
 
-    if (K >= 100) {
+    if(K >= 100)
+    {
         *buffer++ = static_cast<char>('0' + static_cast<char>(K / 100));
         K %= 100;
         const char* d = GetDigitsLut() + K * 2;
-        *buffer++ = d[0];
-        *buffer++ = d[1];
+        *buffer++     = d[0];
+        *buffer++     = d[1];
     }
-    else if (K >= 10) {
+    else if(K >= 10)
+    {
         const char* d = GetDigitsLut() + K * 2;
-        *buffer++ = d[0];
-        *buffer++ = d[1];
+        *buffer++     = d[0];
+        *buffer++     = d[1];
     }
     else
         *buffer++ = static_cast<char>('0' + static_cast<char>(K));
@@ -151,87 +218,100 @@ inline char* WriteExponent(int K, char* buffer) {
     return buffer;
 }
 
-inline char* Prettify(char* buffer, int length, int k, int maxDecimalPlaces) {
-    const int kk = length + k;  // 10^(kk-1) <= v < 10^kk
+inline char* Prettify(char* buffer, int length, int k, int maxDecimalPlaces)
+{
+    const int kk = length + k; // 10^(kk-1) <= v < 10^kk
 
-    if (0 <= k && kk <= 21) {
+    if(0 <= k && kk <= 21)
+    {
         // 1234e7 -> 12340000000
-        for (int i = length; i < kk; i++)
+        for(int i = length; i < kk; i++)
             buffer[i] = '0';
-        buffer[kk] = '.';
+        buffer[kk]     = '.';
         buffer[kk + 1] = '0';
         return &buffer[kk + 2];
     }
-    else if (0 < kk && kk <= 21) {
+    else if(0 < kk && kk <= 21)
+    {
         // 1234e-2 -> 12.34
         std::memmove(&buffer[kk + 1], &buffer[kk], static_cast<size_t>(length - kk));
         buffer[kk] = '.';
-        if (0 > k + maxDecimalPlaces) {
+        if(0 > k + maxDecimalPlaces)
+        {
             // When maxDecimalPlaces = 2, 1.2345 -> 1.23, 1.102 -> 1.1
             // Remove extra trailing zeros (at least one) after truncation.
-            for (int i = kk + maxDecimalPlaces; i > kk + 1; i--)
-                if (buffer[i] != '0')
+            for(int i = kk + maxDecimalPlaces; i > kk + 1; i--)
+                if(buffer[i] != '0')
                     return &buffer[i + 1];
             return &buffer[kk + 2]; // Reserve one zero
         }
         else
             return &buffer[length + 1];
     }
-    else if (-6 < kk && kk <= 0) {
+    else if(-6 < kk && kk <= 0)
+    {
         // 1234e-6 -> 0.001234
         const int offset = 2 - kk;
         std::memmove(&buffer[offset], &buffer[0], static_cast<size_t>(length));
         buffer[0] = '0';
         buffer[1] = '.';
-        for (int i = 2; i < offset; i++)
+        for(int i = 2; i < offset; i++)
             buffer[i] = '0';
-        if (length - kk > maxDecimalPlaces) {
+        if(length - kk > maxDecimalPlaces)
+        {
             // When maxDecimalPlaces = 2, 0.123 -> 0.12, 0.102 -> 0.1
             // Remove extra trailing zeros (at least one) after truncation.
-            for (int i = maxDecimalPlaces + 1; i > 2; i--)
-                if (buffer[i] != '0')
+            for(int i = maxDecimalPlaces + 1; i > 2; i--)
+                if(buffer[i] != '0')
                     return &buffer[i + 1];
             return &buffer[3]; // Reserve one zero
         }
         else
             return &buffer[length + offset];
     }
-    else if (kk < -maxDecimalPlaces) {
+    else if(kk < -maxDecimalPlaces)
+    {
         // Truncate to zero
         buffer[0] = '0';
         buffer[1] = '.';
         buffer[2] = '0';
         return &buffer[3];
     }
-    else if (length == 1) {
+    else if(length == 1)
+    {
         // 1e30
         buffer[1] = 'e';
         return WriteExponent(kk - 1, &buffer[2]);
     }
-    else {
+    else
+    {
         // 1234e30 -> 1.234e33
         std::memmove(&buffer[2], &buffer[1], static_cast<size_t>(length - 1));
-        buffer[1] = '.';
+        buffer[1]          = '.';
         buffer[length + 1] = 'e';
         return WriteExponent(kk - 1, &buffer[0 + length + 2]);
     }
 }
 
-inline char* dtoa(double value, char* buffer, int maxDecimalPlaces = 324) {
+inline char* dtoa(double value, char* buffer, int maxDecimalPlaces = 324)
+{
     RAPIDJSON_ASSERT(maxDecimalPlaces >= 1);
     Double d(value);
-    if (d.IsZero()) {
-        if (d.Sign())
-            *buffer++ = '-';     // -0.0, Issue #289
+    if(d.IsZero())
+    {
+        if(d.Sign())
+            *buffer++ = '-'; // -0.0, Issue #289
         buffer[0] = '0';
         buffer[1] = '.';
         buffer[2] = '0';
         return &buffer[3];
     }
-    else {
-        if (value < 0) {
+    else
+    {
+        if(value < 0)
+        {
             *buffer++ = '-';
-            value = -value;
+            value     = -value;
         }
         int length, K;
         Grisu2(value, buffer, &length, &K);
diff --git a/include/rapidjson/internal/ieee754.h b/include/rapidjson/internal/ieee754.h
index 68c9e96649..f237b4277a 100644
--- a/include/rapidjson/internal/ieee754.h
+++ b/include/rapidjson/internal/ieee754.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_IEEE754_
@@ -20,8 +20,9 @@
 RAPIDJSON_NAMESPACE_BEGIN
 namespace internal {
 
-class Double {
-public:
+class Double
+{
+    public:
     Double() {}
     Double(double d) : d_(d) {}
     Double(uint64_t u) : u_(u) {}
@@ -29,14 +30,18 @@ public:
     double Value() const { return d_; }
     uint64_t Uint64Value() const { return u_; }
 
-    double NextPositiveDouble() const {
+    double NextPositiveDouble() const
+    {
         RAPIDJSON_ASSERT(!Sign());
         return Double(u_ + 1).Value();
     }
 
     bool Sign() const { return (u_ & kSignMask) != 0; }
     uint64_t Significand() const { return u_ & kSignificandMask; }
-    int Exponent() const { return static_cast<int>(((u_ & kExponentMask) >> kSignificandSize) - kExponentBias); }
+    int Exponent() const
+    {
+        return static_cast<int>(((u_ & kExponentMask) >> kSignificandSize) - kExponentBias);
+    }
 
     bool IsNan() const { return (u_ & kExponentMask) == kExponentMask && Significand() != 0; }
     bool IsInf() const { return (u_ & kExponentMask) == kExponentMask && Significand() == 0; }
@@ -44,29 +49,37 @@ public:
     bool IsNormal() const { return (u_ & kExponentMask) != 0 || Significand() == 0; }
     bool IsZero() const { return (u_ & (kExponentMask | kSignificandMask)) == 0; }
 
-    uint64_t IntegerSignificand() const { return IsNormal() ? Significand() | kHiddenBit : Significand(); }
-    int IntegerExponent() const { return (IsNormal() ? Exponent() : kDenormalExponent) - kSignificandSize; }
+    uint64_t IntegerSignificand() const
+    {
+        return IsNormal() ? Significand() | kHiddenBit : Significand();
+    }
+    int IntegerExponent() const
+    {
+        return (IsNormal() ? Exponent() : kDenormalExponent) - kSignificandSize;
+    }
     uint64_t ToBias() const { return (u_ & kSignMask) ? ~u_ + 1 : u_ | kSignMask; }
 
-    static int EffectiveSignificandSize(int order) {
-        if (order >= -1021)
+    static int EffectiveSignificandSize(int order)
+    {
+        if(order >= -1021)
             return 53;
-        else if (order <= -1074)
+        else if(order <= -1074)
             return 0;
         else
             return order + 1074;
     }
 
-private:
-    static const int kSignificandSize = 52;
-    static const int kExponentBias = 0x3FF;
-    static const int kDenormalExponent = 1 - kExponentBias;
-    static const uint64_t kSignMask = RAPIDJSON_UINT64_C2(0x80000000, 0x00000000);
-    static const uint64_t kExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
+    private:
+    static const int kSignificandSize      = 52;
+    static const int kExponentBias         = 0x3FF;
+    static const int kDenormalExponent     = 1 - kExponentBias;
+    static const uint64_t kSignMask        = RAPIDJSON_UINT64_C2(0x80000000, 0x00000000);
+    static const uint64_t kExponentMask    = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
     static const uint64_t kSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
-    static const uint64_t kHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
+    static const uint64_t kHiddenBit       = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
 
-    union {
+    union
+    {
         double d_;
         uint64_t u_;
     };
diff --git a/include/rapidjson/internal/itoa.h b/include/rapidjson/internal/itoa.h
index 9fe8c932ff..7084175a57 100644
--- a/include/rapidjson/internal/itoa.h
+++ b/include/rapidjson/internal/itoa.h
@@ -20,40 +20,45 @@
 RAPIDJSON_NAMESPACE_BEGIN
 namespace internal {
 
-inline const char* GetDigitsLut() {
+inline const char* GetDigitsLut()
+{
     static const char cDigitsLut[200] = {
-        '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9',
-        '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9',
-        '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9',
-        '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9',
-        '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9',
-        '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9',
-        '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9',
-        '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9',
-        '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9',
-        '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9'
-    };
+        '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0',
+        '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6',
+        '1', '7', '1', '8', '1', '9', '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2',
+        '5', '2', '6', '2', '7', '2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3',
+        '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1', '4',
+        '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', '5', '0',
+        '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5',
+        '9', '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7',
+        '6', '8', '6', '9', '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7',
+        '6', '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3', '8', '4',
+        '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0', '9', '1', '9', '2', '9',
+        '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'};
     return cDigitsLut;
 }
 
-inline char* u32toa(uint32_t value, char* buffer) {
+inline char* u32toa(uint32_t value, char* buffer)
+{
     RAPIDJSON_ASSERT(buffer != 0);
 
     const char* cDigitsLut = GetDigitsLut();
 
-    if (value < 10000) {
+    if(value < 10000)
+    {
         const uint32_t d1 = (value / 100) << 1;
         const uint32_t d2 = (value % 100) << 1;
 
-        if (value >= 1000)
+        if(value >= 1000)
             *buffer++ = cDigitsLut[d1];
-        if (value >= 100)
+        if(value >= 100)
             *buffer++ = cDigitsLut[d1 + 1];
-        if (value >= 10)
+        if(value >= 10)
             *buffer++ = cDigitsLut[d2];
         *buffer++ = cDigitsLut[d2 + 1];
     }
-    else if (value < 100000000) {
+    else if(value < 100000000)
+    {
         // value = bbbbcccc
         const uint32_t b = value / 10000;
         const uint32_t c = value % 10000;
@@ -64,11 +69,11 @@ inline char* u32toa(uint32_t value, char* buffer) {
         const uint32_t d3 = (c / 100) << 1;
         const uint32_t d4 = (c % 100) << 1;
 
-        if (value >= 10000000)
+        if(value >= 10000000)
             *buffer++ = cDigitsLut[d1];
-        if (value >= 1000000)
+        if(value >= 1000000)
             *buffer++ = cDigitsLut[d1 + 1];
-        if (value >= 100000)
+        if(value >= 100000)
             *buffer++ = cDigitsLut[d2];
         *buffer++ = cDigitsLut[d2 + 1];
 
@@ -77,16 +82,18 @@ inline char* u32toa(uint32_t value, char* buffer) {
         *buffer++ = cDigitsLut[d4];
         *buffer++ = cDigitsLut[d4 + 1];
     }
-    else {
+    else
+    {
         // value = aabbbbcccc in decimal
 
         const uint32_t a = value / 100000000; // 1 to 42
         value %= 100000000;
 
-        if (a >= 10) {
+        if(a >= 10)
+        {
             const unsigned i = a << 1;
-            *buffer++ = cDigitsLut[i];
-            *buffer++ = cDigitsLut[i + 1];
+            *buffer++        = cDigitsLut[i];
+            *buffer++        = cDigitsLut[i + 1];
         }
         else
             *buffer++ = static_cast<char>('0' + static_cast<char>(a));
@@ -112,45 +119,51 @@ inline char* u32toa(uint32_t value, char* buffer) {
     return buffer;
 }
 
-inline char* i32toa(int32_t value, char* buffer) {
+inline char* i32toa(int32_t value, char* buffer)
+{
     RAPIDJSON_ASSERT(buffer != 0);
     uint32_t u = static_cast<uint32_t>(value);
-    if (value < 0) {
+    if(value < 0)
+    {
         *buffer++ = '-';
-        u = ~u + 1;
+        u         = ~u + 1;
     }
 
     return u32toa(u, buffer);
 }
 
-inline char* u64toa(uint64_t value, char* buffer) {
+inline char* u64toa(uint64_t value, char* buffer)
+{
     RAPIDJSON_ASSERT(buffer != 0);
     const char* cDigitsLut = GetDigitsLut();
-    const uint64_t  kTen8 = 100000000;
-    const uint64_t  kTen9 = kTen8 * 10;
-    const uint64_t kTen10 = kTen8 * 100;
-    const uint64_t kTen11 = kTen8 * 1000;
-    const uint64_t kTen12 = kTen8 * 10000;
-    const uint64_t kTen13 = kTen8 * 100000;
-    const uint64_t kTen14 = kTen8 * 1000000;
-    const uint64_t kTen15 = kTen8 * 10000000;
-    const uint64_t kTen16 = kTen8 * kTen8;
+    const uint64_t kTen8   = 100000000;
+    const uint64_t kTen9   = kTen8 * 10;
+    const uint64_t kTen10  = kTen8 * 100;
+    const uint64_t kTen11  = kTen8 * 1000;
+    const uint64_t kTen12  = kTen8 * 10000;
+    const uint64_t kTen13  = kTen8 * 100000;
+    const uint64_t kTen14  = kTen8 * 1000000;
+    const uint64_t kTen15  = kTen8 * 10000000;
+    const uint64_t kTen16  = kTen8 * kTen8;
 
-    if (value < kTen8) {
+    if(value < kTen8)
+    {
         uint32_t v = static_cast<uint32_t>(value);
-        if (v < 10000) {
+        if(v < 10000)
+        {
             const uint32_t d1 = (v / 100) << 1;
             const uint32_t d2 = (v % 100) << 1;
 
-            if (v >= 1000)
+            if(v >= 1000)
                 *buffer++ = cDigitsLut[d1];
-            if (v >= 100)
+            if(v >= 100)
                 *buffer++ = cDigitsLut[d1 + 1];
-            if (v >= 10)
+            if(v >= 10)
                 *buffer++ = cDigitsLut[d2];
             *buffer++ = cDigitsLut[d2 + 1];
         }
-        else {
+        else
+        {
             // value = bbbbcccc
             const uint32_t b = v / 10000;
             const uint32_t c = v % 10000;
@@ -161,11 +174,11 @@ inline char* u64toa(uint64_t value, char* buffer) {
             const uint32_t d3 = (c / 100) << 1;
             const uint32_t d4 = (c % 100) << 1;
 
-            if (value >= 10000000)
+            if(value >= 10000000)
                 *buffer++ = cDigitsLut[d1];
-            if (value >= 1000000)
+            if(value >= 1000000)
                 *buffer++ = cDigitsLut[d1 + 1];
-            if (value >= 100000)
+            if(value >= 100000)
                 *buffer++ = cDigitsLut[d2];
             *buffer++ = cDigitsLut[d2 + 1];
 
@@ -175,7 +188,8 @@ inline char* u64toa(uint64_t value, char* buffer) {
             *buffer++ = cDigitsLut[d4 + 1];
         }
     }
-    else if (value < kTen16) {
+    else if(value < kTen16)
+    {
         const uint32_t v0 = static_cast<uint32_t>(value / kTen8);
         const uint32_t v1 = static_cast<uint32_t>(value % kTen8);
 
@@ -197,19 +211,19 @@ inline char* u64toa(uint64_t value, char* buffer) {
         const uint32_t d7 = (c1 / 100) << 1;
         const uint32_t d8 = (c1 % 100) << 1;
 
-        if (value >= kTen15)
+        if(value >= kTen15)
             *buffer++ = cDigitsLut[d1];
-        if (value >= kTen14)
+        if(value >= kTen14)
             *buffer++ = cDigitsLut[d1 + 1];
-        if (value >= kTen13)
+        if(value >= kTen13)
             *buffer++ = cDigitsLut[d2];
-        if (value >= kTen12)
+        if(value >= kTen12)
             *buffer++ = cDigitsLut[d2 + 1];
-        if (value >= kTen11)
+        if(value >= kTen11)
             *buffer++ = cDigitsLut[d3];
-        if (value >= kTen10)
+        if(value >= kTen10)
             *buffer++ = cDigitsLut[d3 + 1];
-        if (value >= kTen9)
+        if(value >= kTen9)
             *buffer++ = cDigitsLut[d4];
 
         *buffer++ = cDigitsLut[d4 + 1];
@@ -222,31 +236,35 @@ inline char* u64toa(uint64_t value, char* buffer) {
         *buffer++ = cDigitsLut[d8];
         *buffer++ = cDigitsLut[d8 + 1];
     }
-    else {
+    else
+    {
         const uint32_t a = static_cast<uint32_t>(value / kTen16); // 1 to 1844
         value %= kTen16;
 
-        if (a < 10)
+        if(a < 10)
             *buffer++ = static_cast<char>('0' + static_cast<char>(a));
-        else if (a < 100) {
+        else if(a < 100)
+        {
             const uint32_t i = a << 1;
-            *buffer++ = cDigitsLut[i];
-            *buffer++ = cDigitsLut[i + 1];
+            *buffer++        = cDigitsLut[i];
+            *buffer++        = cDigitsLut[i + 1];
         }
-        else if (a < 1000) {
+        else if(a < 1000)
+        {
             *buffer++ = static_cast<char>('0' + static_cast<char>(a / 100));
 
             const uint32_t i = (a % 100) << 1;
-            *buffer++ = cDigitsLut[i];
-            *buffer++ = cDigitsLut[i + 1];
+            *buffer++        = cDigitsLut[i];
+            *buffer++        = cDigitsLut[i + 1];
         }
-        else {
+        else
+        {
             const uint32_t i = (a / 100) << 1;
             const uint32_t j = (a % 100) << 1;
-            *buffer++ = cDigitsLut[i];
-            *buffer++ = cDigitsLut[i + 1];
-            *buffer++ = cDigitsLut[j];
-            *buffer++ = cDigitsLut[j + 1];
+            *buffer++        = cDigitsLut[i];
+            *buffer++        = cDigitsLut[i + 1];
+            *buffer++        = cDigitsLut[j];
+            *buffer++        = cDigitsLut[j + 1];
         }
 
         const uint32_t v0 = static_cast<uint32_t>(value / kTen8);
@@ -291,12 +309,14 @@ inline char* u64toa(uint64_t value, char* buffer) {
     return buffer;
 }
 
-inline char* i64toa(int64_t value, char* buffer) {
+inline char* i64toa(int64_t value, char* buffer)
+{
     RAPIDJSON_ASSERT(buffer != 0);
     uint64_t u = static_cast<uint64_t>(value);
-    if (value < 0) {
+    if(value < 0)
+    {
         *buffer++ = '-';
-        u = ~u + 1;
+        u         = ~u + 1;
     }
 
     return u64toa(u, buffer);
diff --git a/include/rapidjson/internal/meta.h b/include/rapidjson/internal/meta.h
index 27092dc0d6..abdfaeba8e 100644
--- a/include/rapidjson/internal/meta.h
+++ b/include/rapidjson/internal/meta.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_INTERNAL_META_H_
@@ -36,140 +36,253 @@ RAPIDJSON_NAMESPACE_BEGIN
 namespace internal {
 
 // Helper to wrap/convert arbitrary types to void, useful for arbitrary type matching
-template <typename T> struct Void { typedef void Type; };
+template <typename T>
+struct Void
+{
+    typedef void Type;
+};
 
 ///////////////////////////////////////////////////////////////////////////////
 // BoolType, TrueType, FalseType
 //
-template <bool Cond> struct BoolType {
+template <bool Cond>
+struct BoolType
+{
     static const bool Value = Cond;
     typedef BoolType Type;
 };
 typedef BoolType<true> TrueType;
 typedef BoolType<false> FalseType;
 
-
 ///////////////////////////////////////////////////////////////////////////////
 // SelectIf, BoolExpr, NotExpr, AndExpr, OrExpr
 //
 
-template <bool C> struct SelectIfImpl { template <typename T1, typename T2> struct Apply { typedef T1 Type; }; };
-template <> struct SelectIfImpl<false> { template <typename T1, typename T2> struct Apply { typedef T2 Type; }; };
-template <bool C, typename T1, typename T2> struct SelectIfCond : SelectIfImpl<C>::template Apply<T1,T2> {};
-template <typename C, typename T1, typename T2> struct SelectIf : SelectIfCond<C::Value, T1, T2> {};
+template <bool C>
+struct SelectIfImpl
+{
+    template <typename T1, typename T2>
+    struct Apply
+    {
+        typedef T1 Type;
+    };
+};
+template <>
+struct SelectIfImpl<false>
+{
+    template <typename T1, typename T2>
+    struct Apply
+    {
+        typedef T2 Type;
+    };
+};
+template <bool C, typename T1, typename T2>
+struct SelectIfCond : SelectIfImpl<C>::template Apply<T1, T2>
+{
+};
+template <typename C, typename T1, typename T2>
+struct SelectIf : SelectIfCond<C::Value, T1, T2>
+{
+};
 
-template <bool Cond1, bool Cond2> struct AndExprCond : FalseType {};
-template <> struct AndExprCond<true, true> : TrueType {};
-template <bool Cond1, bool Cond2> struct OrExprCond : TrueType {};
-template <> struct OrExprCond<false, false> : FalseType {};
-
-template <typename C> struct BoolExpr : SelectIf<C,TrueType,FalseType>::Type {};
-template <typename C> struct NotExpr  : SelectIf<C,FalseType,TrueType>::Type {};
-template <typename C1, typename C2> struct AndExpr : AndExprCond<C1::Value, C2::Value>::Type {};
-template <typename C1, typename C2> struct OrExpr  : OrExprCond<C1::Value, C2::Value>::Type {};
+template <bool Cond1, bool Cond2>
+struct AndExprCond : FalseType
+{
+};
+template <>
+struct AndExprCond<true, true> : TrueType
+{
+};
+template <bool Cond1, bool Cond2>
+struct OrExprCond : TrueType
+{
+};
+template <>
+struct OrExprCond<false, false> : FalseType
+{
+};
 
+template <typename C>
+struct BoolExpr : SelectIf<C, TrueType, FalseType>::Type
+{
+};
+template <typename C>
+struct NotExpr : SelectIf<C, FalseType, TrueType>::Type
+{
+};
+template <typename C1, typename C2>
+struct AndExpr : AndExprCond<C1::Value, C2::Value>::Type
+{
+};
+template <typename C1, typename C2>
+struct OrExpr : OrExprCond<C1::Value, C2::Value>::Type
+{
+};
 
 ///////////////////////////////////////////////////////////////////////////////
 // AddConst, MaybeAddConst, RemoveConst
-template <typename T> struct AddConst { typedef const T Type; };
-template <bool Constify, typename T> struct MaybeAddConst : SelectIfCond<Constify, const T, T> {};
-template <typename T> struct RemoveConst { typedef T Type; };
-template <typename T> struct RemoveConst<const T> { typedef T Type; };
-
+template <typename T>
+struct AddConst
+{
+    typedef const T Type;
+};
+template <bool Constify, typename T>
+struct MaybeAddConst : SelectIfCond<Constify, const T, T>
+{
+};
+template <typename T>
+struct RemoveConst
+{
+    typedef T Type;
+};
+template <typename T>
+struct RemoveConst<const T>
+{
+    typedef T Type;
+};
 
 ///////////////////////////////////////////////////////////////////////////////
 // IsSame, IsConst, IsMoreConst, IsPointer
 //
-template <typename T, typename U> struct IsSame : FalseType {};
-template <typename T> struct IsSame<T, T> : TrueType {};
+template <typename T, typename U>
+struct IsSame : FalseType
+{
+};
+template <typename T>
+struct IsSame<T, T> : TrueType
+{
+};
 
-template <typename T> struct IsConst : FalseType {};
-template <typename T> struct IsConst<const T> : TrueType {};
+template <typename T>
+struct IsConst : FalseType
+{
+};
+template <typename T>
+struct IsConst<const T> : TrueType
+{
+};
 
 template <typename CT, typename T>
-struct IsMoreConst
-    : AndExpr<IsSame<typename RemoveConst<CT>::Type, typename RemoveConst<T>::Type>,
-              BoolType<IsConst<CT>::Value >= IsConst<T>::Value> >::Type {};
+struct IsMoreConst : AndExpr<IsSame<typename RemoveConst<CT>::Type, typename RemoveConst<T>::Type>,
+                             BoolType<IsConst<CT>::Value >= IsConst<T>::Value>>::Type
+{
+};
 
-template <typename T> struct IsPointer : FalseType {};
-template <typename T> struct IsPointer<T*> : TrueType {};
+template <typename T>
+struct IsPointer : FalseType
+{
+};
+template <typename T>
+struct IsPointer<T*> : TrueType
+{
+};
 
 ///////////////////////////////////////////////////////////////////////////////
 // IsBaseOf
 //
 #if RAPIDJSON_HAS_CXX11_TYPETRAITS
 
-template <typename B, typename D> struct IsBaseOf
-    : BoolType< ::std::is_base_of<B,D>::value> {};
+template <typename B, typename D>
+struct IsBaseOf : BoolType<::std::is_base_of<B, D>::value>
+{
+};
 
 #else // simplified version adopted from Boost
 
-template<typename B, typename D> struct IsBaseOfImpl {
+template <typename B, typename D>
+struct IsBaseOfImpl
+{
     RAPIDJSON_STATIC_ASSERT(sizeof(B) != 0);
     RAPIDJSON_STATIC_ASSERT(sizeof(D) != 0);
 
     typedef char (&Yes)[1];
-    typedef char (&No) [2];
+    typedef char (&No)[2];
 
     template <typename T>
     static Yes Check(const D*, T);
-    static No  Check(const B*, int);
+    static No Check(const B*, int);
 
-    struct Host {
+    struct Host
+    {
         operator const B*() const;
         operator const D*();
     };
 
-    enum { Value = (sizeof(Check(Host(), 0)) == sizeof(Yes)) };
+    enum
+    {
+        Value = (sizeof(Check(Host(), 0)) == sizeof(Yes))
+    };
 };
 
-template <typename B, typename D> struct IsBaseOf
-    : OrExpr<IsSame<B, D>, BoolExpr<IsBaseOfImpl<B, D> > >::Type {};
+template <typename B, typename D>
+struct IsBaseOf : OrExpr<IsSame<B, D>, BoolExpr<IsBaseOfImpl<B, D>>>::Type
+{
+};
 
 #endif // RAPIDJSON_HAS_CXX11_TYPETRAITS
 
-
 //////////////////////////////////////////////////////////////////////////
 // EnableIf / DisableIf
 //
-template <bool Condition, typename T = void> struct EnableIfCond  { typedef T Type; };
-template <typename T> struct EnableIfCond<false, T> { /* empty */ };
+template <bool Condition, typename T = void>
+struct EnableIfCond
+{
+    typedef T Type;
+};
+template <typename T>
+struct EnableIfCond<false, T>
+{ /* empty */
+};
 
-template <bool Condition, typename T = void> struct DisableIfCond { typedef T Type; };
-template <typename T> struct DisableIfCond<true, T> { /* empty */ };
+template <bool Condition, typename T = void>
+struct DisableIfCond
+{
+    typedef T Type;
+};
+template <typename T>
+struct DisableIfCond<true, T>
+{ /* empty */
+};
 
 template <typename Condition, typename T = void>
-struct EnableIf : EnableIfCond<Condition::Value, T> {};
+struct EnableIf : EnableIfCond<Condition::Value, T>
+{
+};
 
 template <typename Condition, typename T = void>
-struct DisableIf : DisableIfCond<Condition::Value, T> {};
+struct DisableIf : DisableIfCond<Condition::Value, T>
+{
+};
 
 // SFINAE helpers
-struct SfinaeTag {};
-template <typename T> struct RemoveSfinaeTag;
-template <typename T> struct RemoveSfinaeTag<SfinaeTag&(*)(T)> { typedef T Type; };
+struct SfinaeTag
+{
+};
+template <typename T>
+struct RemoveSfinaeTag;
+template <typename T>
+struct RemoveSfinaeTag<SfinaeTag& (*)(T)>
+{
+    typedef T Type;
+};
 
-#define RAPIDJSON_REMOVEFPTR_(type) \
-    typename ::RAPIDJSON_NAMESPACE::internal::RemoveSfinaeTag \
-        < ::RAPIDJSON_NAMESPACE::internal::SfinaeTag&(*) type>::Type
+#define RAPIDJSON_REMOVEFPTR_(type)                            \
+    typename ::RAPIDJSON_NAMESPACE::internal::RemoveSfinaeTag< \
+        ::RAPIDJSON_NAMESPACE::internal::SfinaeTag&(*)type>::Type
 
 #define RAPIDJSON_ENABLEIF(cond) \
-    typename ::RAPIDJSON_NAMESPACE::internal::EnableIf \
-        <RAPIDJSON_REMOVEFPTR_(cond)>::Type * = NULL
+    typename ::RAPIDJSON_NAMESPACE::internal::EnableIf<RAPIDJSON_REMOVEFPTR_(cond)>::Type* = NULL
 
 #define RAPIDJSON_DISABLEIF(cond) \
-    typename ::RAPIDJSON_NAMESPACE::internal::DisableIf \
-        <RAPIDJSON_REMOVEFPTR_(cond)>::Type * = NULL
+    typename ::RAPIDJSON_NAMESPACE::internal::DisableIf<RAPIDJSON_REMOVEFPTR_(cond)>::Type* = NULL
 
-#define RAPIDJSON_ENABLEIF_RETURN(cond,returntype) \
-    typename ::RAPIDJSON_NAMESPACE::internal::EnableIf \
-        <RAPIDJSON_REMOVEFPTR_(cond), \
-         RAPIDJSON_REMOVEFPTR_(returntype)>::Type
+#define RAPIDJSON_ENABLEIF_RETURN(cond, returntype)                                 \
+    typename ::RAPIDJSON_NAMESPACE::internal::EnableIf<RAPIDJSON_REMOVEFPTR_(cond), \
+                                                       RAPIDJSON_REMOVEFPTR_(returntype)>::Type
 
-#define RAPIDJSON_DISABLEIF_RETURN(cond,returntype) \
-    typename ::RAPIDJSON_NAMESPACE::internal::DisableIf \
-        <RAPIDJSON_REMOVEFPTR_(cond), \
-         RAPIDJSON_REMOVEFPTR_(returntype)>::Type
+#define RAPIDJSON_DISABLEIF_RETURN(cond, returntype)                                 \
+    typename ::RAPIDJSON_NAMESPACE::internal::DisableIf<RAPIDJSON_REMOVEFPTR_(cond), \
+                                                        RAPIDJSON_REMOVEFPTR_(returntype)>::Type
 
 } // namespace internal
 RAPIDJSON_NAMESPACE_END
diff --git a/include/rapidjson/internal/pow10.h b/include/rapidjson/internal/pow10.h
index eae1a43ed1..6ac6116836 100644
--- a/include/rapidjson/internal/pow10.h
+++ b/include/rapidjson/internal/pow10.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_POW10_
@@ -25,26 +25,39 @@ namespace internal {
     \param n non-negative exponent. Must <= 308.
     \return 10.0^n
 */
-inline double Pow10(int n) {
-    static const double e[] = { // 1e-0...1e308: 309 * 8 bytes = 2472 bytes
-        1e+0,  
-        1e+1,  1e+2,  1e+3,  1e+4,  1e+5,  1e+6,  1e+7,  1e+8,  1e+9,  1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, 
-        1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40,
-        1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60,
-        1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80,
-        1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100,
-        1e+101,1e+102,1e+103,1e+104,1e+105,1e+106,1e+107,1e+108,1e+109,1e+110,1e+111,1e+112,1e+113,1e+114,1e+115,1e+116,1e+117,1e+118,1e+119,1e+120,
-        1e+121,1e+122,1e+123,1e+124,1e+125,1e+126,1e+127,1e+128,1e+129,1e+130,1e+131,1e+132,1e+133,1e+134,1e+135,1e+136,1e+137,1e+138,1e+139,1e+140,
-        1e+141,1e+142,1e+143,1e+144,1e+145,1e+146,1e+147,1e+148,1e+149,1e+150,1e+151,1e+152,1e+153,1e+154,1e+155,1e+156,1e+157,1e+158,1e+159,1e+160,
-        1e+161,1e+162,1e+163,1e+164,1e+165,1e+166,1e+167,1e+168,1e+169,1e+170,1e+171,1e+172,1e+173,1e+174,1e+175,1e+176,1e+177,1e+178,1e+179,1e+180,
-        1e+181,1e+182,1e+183,1e+184,1e+185,1e+186,1e+187,1e+188,1e+189,1e+190,1e+191,1e+192,1e+193,1e+194,1e+195,1e+196,1e+197,1e+198,1e+199,1e+200,
-        1e+201,1e+202,1e+203,1e+204,1e+205,1e+206,1e+207,1e+208,1e+209,1e+210,1e+211,1e+212,1e+213,1e+214,1e+215,1e+216,1e+217,1e+218,1e+219,1e+220,
-        1e+221,1e+222,1e+223,1e+224,1e+225,1e+226,1e+227,1e+228,1e+229,1e+230,1e+231,1e+232,1e+233,1e+234,1e+235,1e+236,1e+237,1e+238,1e+239,1e+240,
-        1e+241,1e+242,1e+243,1e+244,1e+245,1e+246,1e+247,1e+248,1e+249,1e+250,1e+251,1e+252,1e+253,1e+254,1e+255,1e+256,1e+257,1e+258,1e+259,1e+260,
-        1e+261,1e+262,1e+263,1e+264,1e+265,1e+266,1e+267,1e+268,1e+269,1e+270,1e+271,1e+272,1e+273,1e+274,1e+275,1e+276,1e+277,1e+278,1e+279,1e+280,
-        1e+281,1e+282,1e+283,1e+284,1e+285,1e+286,1e+287,1e+288,1e+289,1e+290,1e+291,1e+292,1e+293,1e+294,1e+295,1e+296,1e+297,1e+298,1e+299,1e+300,
-        1e+301,1e+302,1e+303,1e+304,1e+305,1e+306,1e+307,1e+308
-    };
+inline double Pow10(int n)
+{
+    static const double e[] = {
+        // 1e-0...1e308: 309 * 8 bytes = 2472 bytes
+        1e+0,   1e+1,   1e+2,   1e+3,   1e+4,   1e+5,   1e+6,   1e+7,   1e+8,   1e+9,   1e+10,
+        1e+11,  1e+12,  1e+13,  1e+14,  1e+15,  1e+16,  1e+17,  1e+18,  1e+19,  1e+20,  1e+21,
+        1e+22,  1e+23,  1e+24,  1e+25,  1e+26,  1e+27,  1e+28,  1e+29,  1e+30,  1e+31,  1e+32,
+        1e+33,  1e+34,  1e+35,  1e+36,  1e+37,  1e+38,  1e+39,  1e+40,  1e+41,  1e+42,  1e+43,
+        1e+44,  1e+45,  1e+46,  1e+47,  1e+48,  1e+49,  1e+50,  1e+51,  1e+52,  1e+53,  1e+54,
+        1e+55,  1e+56,  1e+57,  1e+58,  1e+59,  1e+60,  1e+61,  1e+62,  1e+63,  1e+64,  1e+65,
+        1e+66,  1e+67,  1e+68,  1e+69,  1e+70,  1e+71,  1e+72,  1e+73,  1e+74,  1e+75,  1e+76,
+        1e+77,  1e+78,  1e+79,  1e+80,  1e+81,  1e+82,  1e+83,  1e+84,  1e+85,  1e+86,  1e+87,
+        1e+88,  1e+89,  1e+90,  1e+91,  1e+92,  1e+93,  1e+94,  1e+95,  1e+96,  1e+97,  1e+98,
+        1e+99,  1e+100, 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109,
+        1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118, 1e+119, 1e+120,
+        1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127, 1e+128, 1e+129, 1e+130, 1e+131,
+        1e+132, 1e+133, 1e+134, 1e+135, 1e+136, 1e+137, 1e+138, 1e+139, 1e+140, 1e+141, 1e+142,
+        1e+143, 1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149, 1e+150, 1e+151, 1e+152, 1e+153,
+        1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159, 1e+160, 1e+161, 1e+162, 1e+163, 1e+164,
+        1e+165, 1e+166, 1e+167, 1e+168, 1e+169, 1e+170, 1e+171, 1e+172, 1e+173, 1e+174, 1e+175,
+        1e+176, 1e+177, 1e+178, 1e+179, 1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186,
+        1e+187, 1e+188, 1e+189, 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195, 1e+196, 1e+197,
+        1e+198, 1e+199, 1e+200, 1e+201, 1e+202, 1e+203, 1e+204, 1e+205, 1e+206, 1e+207, 1e+208,
+        1e+209, 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215, 1e+216, 1e+217, 1e+218, 1e+219,
+        1e+220, 1e+221, 1e+222, 1e+223, 1e+224, 1e+225, 1e+226, 1e+227, 1e+228, 1e+229, 1e+230,
+        1e+231, 1e+232, 1e+233, 1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239, 1e+240, 1e+241,
+        1e+242, 1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249, 1e+250, 1e+251, 1e+252,
+        1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258, 1e+259, 1e+260, 1e+261, 1e+262, 1e+263,
+        1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269, 1e+270, 1e+271, 1e+272, 1e+273, 1e+274,
+        1e+275, 1e+276, 1e+277, 1e+278, 1e+279, 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285,
+        1e+286, 1e+287, 1e+288, 1e+289, 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296,
+        1e+297, 1e+298, 1e+299, 1e+300, 1e+301, 1e+302, 1e+303, 1e+304, 1e+305, 1e+306, 1e+307,
+        1e+308};
     RAPIDJSON_ASSERT(n >= 0 && n <= 308);
     return e[n];
 }
diff --git a/include/rapidjson/internal/regex.h b/include/rapidjson/internal/regex.h
index 7740dcd527..1a078f1ba0 100644
--- a/include/rapidjson/internal/regex.h
+++ b/include/rapidjson/internal/regex.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_INTERNAL_REGEX_H_
@@ -22,7 +22,7 @@
 #ifdef __clang__
 RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(padded)
-RAPIDJSON_DIAG_OFF(switch-enum)
+RAPIDJSON_DIAG_OFF(switch - enum)
 #elif defined(_MSC_VER)
 RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
@@ -44,20 +44,23 @@ namespace internal {
 // DecodedStream
 
 template <typename SourceStream, typename Encoding>
-class DecodedStream {
-public:
+class DecodedStream
+{
+    public:
     DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); }
     unsigned Peek() { return codepoint_; }
-    unsigned Take() {
+    unsigned Take()
+    {
         unsigned c = codepoint_;
-        if (c) // No further decoding when '\0'
+        if(c) // No further decoding when '\0'
             Decode();
         return c;
     }
 
-private:
-    void Decode() {
-        if (!Encoding::Decode(ss_, &codepoint_))
+    private:
+    void Decode()
+    {
+        if(!Encoding::Decode(ss_, &codepoint_))
             codepoint_ = 0;
     }
 
@@ -68,7 +71,8 @@ private:
 ///////////////////////////////////////////////////////////////////////////////
 // GenericRegex
 
-static const SizeType kRegexInvalidState = ~SizeType(0);  //!< Represents an invalid index in GenericRegex::State::out, out1
+static const SizeType kRegexInvalidState =
+    ~SizeType(0); //!< Represents an invalid index in GenericRegex::State::out, out1
 static const SizeType kRegexInvalidRange = ~SizeType(0);
 
 template <typename Encoding, typename Allocator>
@@ -102,38 +106,42 @@ class GenericRegexSearch;
     - \c \\t Tab (U+0009)
     - \c \\v Vertical tab (U+000B)
 
-    \note This is a Thompson NFA engine, implemented with reference to 
-        Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).", 
-        https://swtch.com/~rsc/regexp/regexp1.html 
+    \note This is a Thompson NFA engine, implemented with reference to
+        Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl,
+   PHP, Python, Ruby,...).", https://swtch.com/~rsc/regexp/regexp1.html
 */
 template <typename Encoding, typename Allocator = CrtAllocator>
-class GenericRegex {
-public:
+class GenericRegex
+{
+    public:
     typedef Encoding EncodingType;
     typedef typename Encoding::Ch Ch;
-    template <typename, typename> friend class GenericRegexSearch;
+    template <typename, typename>
+    friend class GenericRegexSearch;
 
-    GenericRegex(const Ch* source, Allocator* allocator = 0) : 
-        ownAllocator_(allocator ? 0 : RAPIDJSON_NEW(Allocator)()), allocator_(allocator ? allocator : ownAllocator_), 
-        states_(allocator_, 256), ranges_(allocator_, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), 
-        anchorBegin_(), anchorEnd_()
+    GenericRegex(const Ch* source, Allocator* allocator = 0)
+        : ownAllocator_(allocator ? 0 : RAPIDJSON_NEW(Allocator)()),
+          allocator_(allocator ? allocator : ownAllocator_),
+          states_(allocator_, 256),
+          ranges_(allocator_, 256),
+          root_(kRegexInvalidState),
+          stateCount_(),
+          rangeCount_(),
+          anchorBegin_(),
+          anchorEnd_()
     {
         GenericStringStream<Encoding> ss(source);
         DecodedStream<GenericStringStream<Encoding>, Encoding> ds(ss);
         Parse(ds);
     }
 
-    ~GenericRegex()
+    ~GenericRegex() { RAPIDJSON_DELETE(ownAllocator_); }
+
+    bool IsValid() const { return root_ != kRegexInvalidState; }
+
+    private:
+    enum Operator
     {
-        RAPIDJSON_DELETE(ownAllocator_);
-    }
-
-    bool IsValid() const {
-        return root_ != kRegexInvalidState;
-    }
-
-private:
-    enum Operator {
         kZeroOrOne,
         kZeroOrMore,
         kOneOrMore,
@@ -142,172 +150,181 @@ private:
         kLeftParenthesis
     };
 
-    static const unsigned kAnyCharacterClass = 0xFFFFFFFF;   //!< For '.'
+    static const unsigned kAnyCharacterClass   = 0xFFFFFFFF; //!< For '.'
     static const unsigned kRangeCharacterClass = 0xFFFFFFFE;
-    static const unsigned kRangeNegationFlag = 0x80000000;
+    static const unsigned kRangeNegationFlag   = 0x80000000;
 
-    struct Range {
-        unsigned start; // 
+    struct Range
+    {
+        unsigned start; //
         unsigned end;
         SizeType next;
     };
 
-    struct State {
-        SizeType out;     //!< Equals to kInvalid for matching state
-        SizeType out1;    //!< Equals to non-kInvalid for split
+    struct State
+    {
+        SizeType out;  //!< Equals to kInvalid for matching state
+        SizeType out1; //!< Equals to non-kInvalid for split
         SizeType rangeStart;
         unsigned codepoint;
     };
 
-    struct Frag {
+    struct Frag
+    {
         Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {}
         SizeType start;
         SizeType out; //!< link-list of all output states
         SizeType minIndex;
     };
 
-    State& GetState(SizeType index) {
+    State& GetState(SizeType index)
+    {
         RAPIDJSON_ASSERT(index < stateCount_);
         return states_.template Bottom<State>()[index];
     }
 
-    const State& GetState(SizeType index) const {
+    const State& GetState(SizeType index) const
+    {
         RAPIDJSON_ASSERT(index < stateCount_);
         return states_.template Bottom<State>()[index];
     }
 
-    Range& GetRange(SizeType index) {
+    Range& GetRange(SizeType index)
+    {
         RAPIDJSON_ASSERT(index < rangeCount_);
         return ranges_.template Bottom<Range>()[index];
     }
 
-    const Range& GetRange(SizeType index) const {
+    const Range& GetRange(SizeType index) const
+    {
         RAPIDJSON_ASSERT(index < rangeCount_);
         return ranges_.template Bottom<Range>()[index];
     }
 
     template <typename InputStream>
-    void Parse(DecodedStream<InputStream, Encoding>& ds) {
-        Stack<Allocator> operandStack(allocator_, 256);    // Frag
-        Stack<Allocator> operatorStack(allocator_, 256);   // Operator
-        Stack<Allocator> atomCountStack(allocator_, 256);  // unsigned (Atom per parenthesis)
+    void Parse(DecodedStream<InputStream, Encoding>& ds)
+    {
+        Stack<Allocator> operandStack(allocator_, 256);   // Frag
+        Stack<Allocator> operatorStack(allocator_, 256);  // Operator
+        Stack<Allocator> atomCountStack(allocator_, 256); // unsigned (Atom per parenthesis)
 
         *atomCountStack.template Push<unsigned>() = 0;
 
         unsigned codepoint;
-        while (ds.Peek() != 0) {
-            switch (codepoint = ds.Take()) {
-                case '^':
-                    anchorBegin_ = true;
-                    break;
+        while(ds.Peek() != 0)
+        {
+            switch(codepoint = ds.Take())
+            {
+            case '^': anchorBegin_ = true; break;
 
-                case '$':
-                    anchorEnd_ = true;
-                    break;
+            case '$': anchorEnd_ = true; break;
 
-                case '|':
-                    while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
-                        if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
-                            return;
-                    *operatorStack.template Push<Operator>() = kAlternation;
-                    *atomCountStack.template Top<unsigned>() = 0;
-                    break;
-
-                case '(':
-                    *operatorStack.template Push<Operator>() = kLeftParenthesis;
-                    *atomCountStack.template Push<unsigned>() = 0;
-                    break;
-
-                case ')':
-                    while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
-                        if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
-                            return;
-                    if (operatorStack.Empty())
+            case '|':
+                while(!operatorStack.Empty() &&
+                      *operatorStack.template Top<Operator>() < kAlternation)
+                    if(!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
                         return;
-                    operatorStack.template Pop<Operator>(1);
-                    atomCountStack.template Pop<unsigned>(1);
-                    ImplicitConcatenation(atomCountStack, operatorStack);
-                    break;
+                *operatorStack.template Push<Operator>() = kAlternation;
+                *atomCountStack.template Top<unsigned>() = 0;
+                break;
 
-                case '?':
-                    if (!Eval(operandStack, kZeroOrOne))
+            case '(':
+                *operatorStack.template Push<Operator>()  = kLeftParenthesis;
+                *atomCountStack.template Push<unsigned>() = 0;
+                break;
+
+            case ')':
+                while(!operatorStack.Empty() &&
+                      *operatorStack.template Top<Operator>() != kLeftParenthesis)
+                    if(!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
                         return;
-                    break;
+                if(operatorStack.Empty())
+                    return;
+                operatorStack.template Pop<Operator>(1);
+                atomCountStack.template Pop<unsigned>(1);
+                ImplicitConcatenation(atomCountStack, operatorStack);
+                break;
 
-                case '*':
-                    if (!Eval(operandStack, kZeroOrMore))
+            case '?':
+                if(!Eval(operandStack, kZeroOrOne))
+                    return;
+                break;
+
+            case '*':
+                if(!Eval(operandStack, kZeroOrMore))
+                    return;
+                break;
+
+            case '+':
+                if(!Eval(operandStack, kOneOrMore))
+                    return;
+                break;
+
+            case '{': {
+                unsigned n, m;
+                if(!ParseUnsigned(ds, &n))
+                    return;
+
+                if(ds.Peek() == ',')
+                {
+                    ds.Take();
+                    if(ds.Peek() == '}')
+                        m = kInfinityQuantifier;
+                    else if(!ParseUnsigned(ds, &m) || m < n)
                         return;
-                    break;
+                }
+                else
+                    m = n;
 
-                case '+':
-                    if (!Eval(operandStack, kOneOrMore))
-                        return;
-                    break;
+                if(!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
+                    return;
+                ds.Take();
+            }
+            break;
 
-                case '{':
-                    {
-                        unsigned n, m;
-                        if (!ParseUnsigned(ds, &n))
-                            return;
+            case '.':
+                PushOperand(operandStack, kAnyCharacterClass);
+                ImplicitConcatenation(atomCountStack, operatorStack);
+                break;
 
-                        if (ds.Peek() == ',') {
-                            ds.Take();
-                            if (ds.Peek() == '}')
-                                m = kInfinityQuantifier;
-                            else if (!ParseUnsigned(ds, &m) || m < n)
-                                return;
-                        }
-                        else
-                            m = n;
+            case '[': {
+                SizeType range;
+                if(!ParseRange(ds, &range))
+                    return;
+                SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass);
+                GetState(s).rangeStart              = range;
+                *operandStack.template Push<Frag>() = Frag(s, s, s);
+            }
+                ImplicitConcatenation(atomCountStack, operatorStack);
+                break;
 
-                        if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
-                            return;
-                        ds.Take();
-                    }
-                    break;
+            case '\\': // Escape character
+                if(!CharacterEscape(ds, &codepoint))
+                    return; // Unsupported escape character
+                // fall through to default
+                RAPIDJSON_DELIBERATE_FALLTHROUGH;
 
-                case '.':
-                    PushOperand(operandStack, kAnyCharacterClass);
-                    ImplicitConcatenation(atomCountStack, operatorStack);
-                    break;
-
-                case '[':
-                    {
-                        SizeType range;
-                        if (!ParseRange(ds, &range))
-                            return;
-                        SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass);
-                        GetState(s).rangeStart = range;
-                        *operandStack.template Push<Frag>() = Frag(s, s, s);
-                    }
-                    ImplicitConcatenation(atomCountStack, operatorStack);
-                    break;
-
-                case '\\': // Escape character
-                    if (!CharacterEscape(ds, &codepoint))
-                        return; // Unsupported escape character
-                    // fall through to default
-                    RAPIDJSON_DELIBERATE_FALLTHROUGH;
-
-                default: // Pattern character
-                    PushOperand(operandStack, codepoint);
-                    ImplicitConcatenation(atomCountStack, operatorStack);
+            default: // Pattern character
+                PushOperand(operandStack, codepoint);
+                ImplicitConcatenation(atomCountStack, operatorStack);
             }
         }
 
-        while (!operatorStack.Empty())
-            if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
+        while(!operatorStack.Empty())
+            if(!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
                 return;
 
         // Link the operand to matching state.
-        if (operandStack.GetSize() == sizeof(Frag)) {
+        if(operandStack.GetSize() == sizeof(Frag))
+        {
             Frag* e = operandStack.template Pop<Frag>(1);
             Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
             root_ = e->start;
 
 #if RAPIDJSON_REGEX_VERBOSE
             printf("root: %d\n", root_);
-            for (SizeType i = 0; i < stateCount_ ; i++) {
+            for(SizeType i = 0; i < stateCount_; i++)
+            {
                 State& s = GetState(i);
                 printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
             }
@@ -316,162 +333,188 @@ private:
         }
     }
 
-    SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
-        State* s = states_.template Push<State>();
-        s->out = out;
-        s->out1 = out1;
-        s->codepoint = codepoint;
+    SizeType NewState(SizeType out, SizeType out1, unsigned codepoint)
+    {
+        State* s      = states_.template Push<State>();
+        s->out        = out;
+        s->out1       = out1;
+        s->codepoint  = codepoint;
         s->rangeStart = kRegexInvalidRange;
         return stateCount_++;
     }
 
-    void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) {
+    void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint)
+    {
         SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
         *operandStack.template Push<Frag>() = Frag(s, s, s);
     }
 
-    void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) {
-        if (*atomCountStack.template Top<unsigned>())
+    void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack)
+    {
+        if(*atomCountStack.template Top<unsigned>())
             *operatorStack.template Push<Operator>() = kConcatenation;
         (*atomCountStack.template Top<unsigned>())++;
     }
 
-    SizeType Append(SizeType l1, SizeType l2) {
+    SizeType Append(SizeType l1, SizeType l2)
+    {
         SizeType old = l1;
-        while (GetState(l1).out != kRegexInvalidState)
+        while(GetState(l1).out != kRegexInvalidState)
             l1 = GetState(l1).out;
         GetState(l1).out = l2;
         return old;
     }
 
-    void Patch(SizeType l, SizeType s) {
-        for (SizeType next; l != kRegexInvalidState; l = next) {
-            next = GetState(l).out;
+    void Patch(SizeType l, SizeType s)
+    {
+        for(SizeType next; l != kRegexInvalidState; l = next)
+        {
+            next            = GetState(l).out;
             GetState(l).out = s;
         }
     }
 
-    bool Eval(Stack<Allocator>& operandStack, Operator op) {
-        switch (op) {
-            case kConcatenation:
-                RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2);
-                {
-                    Frag e2 = *operandStack.template Pop<Frag>(1);
-                    Frag e1 = *operandStack.template Pop<Frag>(1);
-                    Patch(e1.out, e2.start);
-                    *operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
-                }
+    bool Eval(Stack<Allocator>& operandStack, Operator op)
+    {
+        switch(op)
+        {
+        case kConcatenation:
+            RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2);
+            {
+                Frag e2 = *operandStack.template Pop<Frag>(1);
+                Frag e1 = *operandStack.template Pop<Frag>(1);
+                Patch(e1.out, e2.start);
+                *operandStack.template Push<Frag>() =
+                    Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
+            }
+            return true;
+
+        case kAlternation:
+            if(operandStack.GetSize() >= sizeof(Frag) * 2)
+            {
+                Frag e2    = *operandStack.template Pop<Frag>(1);
+                Frag e1    = *operandStack.template Pop<Frag>(1);
+                SizeType s = NewState(e1.start, e2.start, 0);
+                *operandStack.template Push<Frag>() =
+                    Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
                 return true;
+            }
+            return false;
 
-            case kAlternation:
-                if (operandStack.GetSize() >= sizeof(Frag) * 2) {
-                    Frag e2 = *operandStack.template Pop<Frag>(1);
-                    Frag e1 = *operandStack.template Pop<Frag>(1);
-                    SizeType s = NewState(e1.start, e2.start, 0);
-                    *operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
-                    return true;
-                }
-                return false;
+        case kZeroOrOne:
+            if(operandStack.GetSize() >= sizeof(Frag))
+            {
+                Frag e                              = *operandStack.template Pop<Frag>(1);
+                SizeType s                          = NewState(kRegexInvalidState, e.start, 0);
+                *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
+                return true;
+            }
+            return false;
 
-            case kZeroOrOne:
-                if (operandStack.GetSize() >= sizeof(Frag)) {
-                    Frag e = *operandStack.template Pop<Frag>(1);
-                    SizeType s = NewState(kRegexInvalidState, e.start, 0);
-                    *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
-                    return true;
-                }
-                return false;
+        case kZeroOrMore:
+            if(operandStack.GetSize() >= sizeof(Frag))
+            {
+                Frag e     = *operandStack.template Pop<Frag>(1);
+                SizeType s = NewState(kRegexInvalidState, e.start, 0);
+                Patch(e.out, s);
+                *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
+                return true;
+            }
+            return false;
 
-            case kZeroOrMore:
-                if (operandStack.GetSize() >= sizeof(Frag)) {
-                    Frag e = *operandStack.template Pop<Frag>(1);
-                    SizeType s = NewState(kRegexInvalidState, e.start, 0);
-                    Patch(e.out, s);
-                    *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
-                    return true;
-                }
-                return false;
+        case kOneOrMore:
+            if(operandStack.GetSize() >= sizeof(Frag))
+            {
+                Frag e     = *operandStack.template Pop<Frag>(1);
+                SizeType s = NewState(kRegexInvalidState, e.start, 0);
+                Patch(e.out, s);
+                *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
+                return true;
+            }
+            return false;
 
-            case kOneOrMore:
-                if (operandStack.GetSize() >= sizeof(Frag)) {
-                    Frag e = *operandStack.template Pop<Frag>(1);
-                    SizeType s = NewState(kRegexInvalidState, e.start, 0);
-                    Patch(e.out, s);
-                    *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
-                    return true;
-                }
-                return false;
-
-            default: 
-                // syntax error (e.g. unclosed kLeftParenthesis)
-                return false;
+        default:
+            // syntax error (e.g. unclosed kLeftParenthesis)
+            return false;
         }
     }
 
-    bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
+    bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m)
+    {
         RAPIDJSON_ASSERT(n <= m);
         RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag));
 
-        if (n == 0) {
-            if (m == 0)                             // a{0} not support
+        if(n == 0)
+        {
+            if(m == 0) // a{0} not support
                 return false;
-            else if (m == kInfinityQuantifier)
-                Eval(operandStack, kZeroOrMore);    // a{0,} -> a*
-            else {
-                Eval(operandStack, kZeroOrOne);         // a{0,5} -> a?
-                for (unsigned i = 0; i < m - 1; i++)
-                    CloneTopOperand(operandStack);      // a{0,5} -> a? a? a? a? a?
-                for (unsigned i = 0; i < m - 1; i++)
+            else if(m == kInfinityQuantifier)
+                Eval(operandStack, kZeroOrMore); // a{0,} -> a*
+            else
+            {
+                Eval(operandStack, kZeroOrOne); // a{0,5} -> a?
+                for(unsigned i = 0; i < m - 1; i++)
+                    CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a?
+                for(unsigned i = 0; i < m - 1; i++)
                     Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
             }
             return true;
         }
 
-        for (unsigned i = 0; i < n - 1; i++)        // a{3} -> a a a
+        for(unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
             CloneTopOperand(operandStack);
 
-        if (m == kInfinityQuantifier)
-            Eval(operandStack, kOneOrMore);         // a{3,} -> a a a+
-        else if (m > n) {
-            CloneTopOperand(operandStack);          // a{3,5} -> a a a a
-            Eval(operandStack, kZeroOrOne);         // a{3,5} -> a a a a?
-            for (unsigned i = n; i < m - 1; i++)
-                CloneTopOperand(operandStack);      // a{3,5} -> a a a a? a?
-            for (unsigned i = n; i < m; i++)
+        if(m == kInfinityQuantifier)
+            Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
+        else if(m > n)
+        {
+            CloneTopOperand(operandStack);  // a{3,5} -> a a a a
+            Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a?
+            for(unsigned i = n; i < m - 1; i++)
+                CloneTopOperand(operandStack); // a{3,5} -> a a a a? a?
+            for(unsigned i = n; i < m; i++)
                 Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
         }
 
-        for (unsigned i = 0; i < n - 1; i++)
-            Eval(operandStack, kConcatenation);     // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
+        for(unsigned i = 0; i < n - 1; i++)
+            Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
 
         return true;
     }
 
     static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; }
 
-    void CloneTopOperand(Stack<Allocator>& operandStack) {
-        const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation
-        SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
+    void CloneTopOperand(Stack<Allocator>& operandStack)
+    {
+        const Frag src =
+            *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation
+        SizeType count =
+            stateCount_ -
+            src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
         State* s = states_.template Push<State>(count);
         memcpy(s, &GetState(src.minIndex), count * sizeof(State));
-        for (SizeType j = 0; j < count; j++) {
-            if (s[j].out != kRegexInvalidState)
+        for(SizeType j = 0; j < count; j++)
+        {
+            if(s[j].out != kRegexInvalidState)
                 s[j].out += count;
-            if (s[j].out1 != kRegexInvalidState)
+            if(s[j].out1 != kRegexInvalidState)
                 s[j].out1 += count;
         }
-        *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count);
+        *operandStack.template Push<Frag>() =
+            Frag(src.start + count, src.out + count, src.minIndex + count);
         stateCount_ += count;
     }
 
     template <typename InputStream>
-    bool ParseUnsigned(DecodedStream<InputStream, Encoding>& ds, unsigned* u) {
+    bool ParseUnsigned(DecodedStream<InputStream, Encoding>& ds, unsigned* u)
+    {
         unsigned r = 0;
-        if (ds.Peek() < '0' || ds.Peek() > '9')
+        if(ds.Peek() < '0' || ds.Peek() > '9')
             return false;
-        while (ds.Peek() >= '0' && ds.Peek() <= '9') {
-            if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
-                return false; // overflow
+        while(ds.Peek() >= '0' && ds.Peek() <= '9')
+        {
+            if(r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
+                return false;                     // overflow
             r = r * 10 + (ds.Take() - '0');
         }
         *u = r;
@@ -479,111 +522,120 @@ private:
     }
 
     template <typename InputStream>
-    bool ParseRange(DecodedStream<InputStream, Encoding>& ds, SizeType* range) {
-        bool isBegin = true;
-        bool negate = false;
-        int step = 0;
-        SizeType start = kRegexInvalidRange;
+    bool ParseRange(DecodedStream<InputStream, Encoding>& ds, SizeType* range)
+    {
+        bool isBegin     = true;
+        bool negate      = false;
+        int step         = 0;
+        SizeType start   = kRegexInvalidRange;
         SizeType current = kRegexInvalidRange;
         unsigned codepoint;
-        while ((codepoint = ds.Take()) != 0) {
-            if (isBegin) {
+        while((codepoint = ds.Take()) != 0)
+        {
+            if(isBegin)
+            {
                 isBegin = false;
-                if (codepoint == '^') {
+                if(codepoint == '^')
+                {
                     negate = true;
                     continue;
                 }
             }
 
-            switch (codepoint) {
+            switch(codepoint)
+            {
             case ']':
-                if (start == kRegexInvalidRange)
-                    return false;   // Error: nothing inside []
-                if (step == 2) { // Add trailing '-'
+                if(start == kRegexInvalidRange)
+                    return false; // Error: nothing inside []
+                if(step == 2)
+                { // Add trailing '-'
                     SizeType r = NewRange('-');
                     RAPIDJSON_ASSERT(current != kRegexInvalidRange);
                     GetRange(current).next = r;
                 }
-                if (negate)
+                if(negate)
                     GetRange(start).start |= kRangeNegationFlag;
                 *range = start;
                 return true;
 
             case '\\':
-                if (ds.Peek() == 'b') {
+                if(ds.Peek() == 'b')
+                {
                     ds.Take();
                     codepoint = 0x0008; // Escape backspace character
                 }
-                else if (!CharacterEscape(ds, &codepoint))
+                else if(!CharacterEscape(ds, &codepoint))
                     return false;
                 // fall through to default
                 RAPIDJSON_DELIBERATE_FALLTHROUGH;
 
             default:
-                switch (step) {
+                switch(step)
+                {
                 case 1:
-                    if (codepoint == '-') {
+                    if(codepoint == '-')
+                    {
                         step++;
                         break;
                     }
                     // fall through to step 0 for other characters
                     RAPIDJSON_DELIBERATE_FALLTHROUGH;
 
-                case 0:
-                    {
-                        SizeType r = NewRange(codepoint);
-                        if (current != kRegexInvalidRange)
-                            GetRange(current).next = r;
-                        if (start == kRegexInvalidRange)
-                            start = r;
-                        current = r;
-                    }
+                case 0: {
+                    SizeType r = NewRange(codepoint);
+                    if(current != kRegexInvalidRange)
+                        GetRange(current).next = r;
+                    if(start == kRegexInvalidRange)
+                        start = r;
+                    current = r;
+                }
                     step = 1;
                     break;
 
                 default:
                     RAPIDJSON_ASSERT(step == 2);
                     GetRange(current).end = codepoint;
-                    step = 0;
+                    step                  = 0;
                 }
             }
         }
         return false;
     }
-    
-    SizeType NewRange(unsigned codepoint) {
+
+    SizeType NewRange(unsigned codepoint)
+    {
         Range* r = ranges_.template Push<Range>();
         r->start = r->end = codepoint;
-        r->next = kRegexInvalidRange;
+        r->next           = kRegexInvalidRange;
         return rangeCount_++;
     }
 
     template <typename InputStream>
-    bool CharacterEscape(DecodedStream<InputStream, Encoding>& ds, unsigned* escapedCodepoint) {
+    bool CharacterEscape(DecodedStream<InputStream, Encoding>& ds, unsigned* escapedCodepoint)
+    {
         unsigned codepoint;
-        switch (codepoint = ds.Take()) {
-            case '^':
-            case '$':
-            case '|':
-            case '(':
-            case ')':
-            case '?':
-            case '*':
-            case '+':
-            case '.':
-            case '[':
-            case ']':
-            case '{':
-            case '}':
-            case '\\':
-                *escapedCodepoint = codepoint; return true;
-            case 'f': *escapedCodepoint = 0x000C; return true;
-            case 'n': *escapedCodepoint = 0x000A; return true;
-            case 'r': *escapedCodepoint = 0x000D; return true;
-            case 't': *escapedCodepoint = 0x0009; return true;
-            case 'v': *escapedCodepoint = 0x000B; return true;
-            default:
-                return false; // Unsupported escape character
+        switch(codepoint = ds.Take())
+        {
+        case '^':
+        case '$':
+        case '|':
+        case '(':
+        case ')':
+        case '?':
+        case '*':
+        case '+':
+        case '.':
+        case '[':
+        case ']':
+        case '{':
+        case '}':
+        case '\\': *escapedCodepoint = codepoint; return true;
+        case 'f': *escapedCodepoint = 0x000C; return true;
+        case 'n': *escapedCodepoint = 0x000A; return true;
+        case 'r': *escapedCodepoint = 0x000D; return true;
+        case 't': *escapedCodepoint = 0x0009; return true;
+        case 'v': *escapedCodepoint = 0x000B; return true;
+        default: return false; // Unsupported escape character
         }
     }
 
@@ -603,78 +655,93 @@ private:
 };
 
 template <typename RegexType, typename Allocator = CrtAllocator>
-class GenericRegexSearch {
-public:
+class GenericRegexSearch
+{
+    public:
     typedef typename RegexType::EncodingType Encoding;
     typedef typename Encoding::Ch Ch;
 
-    GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) : 
-        regex_(regex), allocator_(allocator), ownAllocator_(0),
-        state0_(allocator, 0), state1_(allocator, 0), stateSet_()
+    GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0)
+        : regex_(regex),
+          allocator_(allocator),
+          ownAllocator_(0),
+          state0_(allocator, 0),
+          state1_(allocator, 0),
+          stateSet_()
     {
         RAPIDJSON_ASSERT(regex_.IsValid());
-        if (!allocator_)
+        if(!allocator_)
             ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
         stateSet_ = static_cast<uint32_t*>(allocator_->Malloc(GetStateSetSize()));
         state0_.template Reserve<SizeType>(regex_.stateCount_);
         state1_.template Reserve<SizeType>(regex_.stateCount_);
     }
 
-    ~GenericRegexSearch() {
+    ~GenericRegexSearch()
+    {
         Allocator::Free(stateSet_);
         RAPIDJSON_DELETE(ownAllocator_);
     }
 
     template <typename InputStream>
-    bool Match(InputStream& is) {
+    bool Match(InputStream& is)
+    {
         return SearchWithAnchoring(is, true, true);
     }
 
-    bool Match(const Ch* s) {
+    bool Match(const Ch* s)
+    {
         GenericStringStream<Encoding> is(s);
         return Match(is);
     }
 
     template <typename InputStream>
-    bool Search(InputStream& is) {
+    bool Search(InputStream& is)
+    {
         return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_);
     }
 
-    bool Search(const Ch* s) {
+    bool Search(const Ch* s)
+    {
         GenericStringStream<Encoding> is(s);
         return Search(is);
     }
 
-private:
+    private:
     typedef typename RegexType::State State;
     typedef typename RegexType::Range Range;
 
     template <typename InputStream>
-    bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) {
+    bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd)
+    {
         DecodedStream<InputStream, Encoding> ds(is);
 
         state0_.Clear();
-        Stack<Allocator> *current = &state0_, *next = &state1_;
+        Stack<Allocator>*current = &state0_, *next = &state1_;
         const size_t stateSetSize = GetStateSetSize();
         std::memset(stateSet_, 0, stateSetSize);
 
         bool matched = AddState(*current, regex_.root_);
         unsigned codepoint;
-        while (!current->Empty() && (codepoint = ds.Take()) != 0) {
+        while(!current->Empty() && (codepoint = ds.Take()) != 0)
+        {
             std::memset(stateSet_, 0, stateSetSize);
             next->Clear();
             matched = false;
-            for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
+            for(const SizeType* s = current->template Bottom<SizeType>();
+                s != current->template End<SizeType>();
+                ++s)
+            {
                 const State& sr = regex_.GetState(*s);
-                if (sr.codepoint == codepoint ||
-                    sr.codepoint == RegexType::kAnyCharacterClass || 
-                    (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
+                if(sr.codepoint == codepoint || sr.codepoint == RegexType::kAnyCharacterClass ||
+                   (sr.codepoint == RegexType::kRangeCharacterClass &&
+                    MatchRange(sr.rangeStart, codepoint)))
                 {
                     matched = AddState(*next, sr.out) || matched;
-                    if (!anchorEnd && matched)
+                    if(!anchorEnd && matched)
                         return true;
                 }
-                if (!anchorBegin)
+                if(!anchorBegin)
                     AddState(*next, regex_.root_);
             }
             internal::Swap(current, next);
@@ -683,31 +750,35 @@ private:
         return matched;
     }
 
-    size_t GetStateSetSize() const {
-        return (regex_.stateCount_ + 31) / 32 * 4;
-    }
+    size_t GetStateSetSize() const { return (regex_.stateCount_ + 31) / 32 * 4; }
 
     // Return whether the added states is a match state
-    bool AddState(Stack<Allocator>& l, SizeType index) {
+    bool AddState(Stack<Allocator>& l, SizeType index)
+    {
         RAPIDJSON_ASSERT(index != kRegexInvalidState);
 
         const State& s = regex_.GetState(index);
-        if (s.out1 != kRegexInvalidState) { // Split
+        if(s.out1 != kRegexInvalidState)
+        { // Split
             bool matched = AddState(l, s.out);
             return AddState(l, s.out1) || matched;
         }
-        else if (!(stateSet_[index >> 5] & (1u << (index & 31)))) {
+        else if(!(stateSet_[index >> 5] & (1u << (index & 31))))
+        {
             stateSet_[index >> 5] |= (1u << (index & 31));
             *l.template PushUnsafe<SizeType>() = index;
         }
-        return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation.
+        return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not
+                                            // validated due to reallocation.
     }
 
-    bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
+    bool MatchRange(SizeType rangeIndex, unsigned codepoint) const
+    {
         bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0;
-        while (rangeIndex != kRegexInvalidRange) {
+        while(rangeIndex != kRegexInvalidRange)
+        {
             const Range& r = regex_.GetRange(rangeIndex);
-            if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end)
+            if(codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end)
                 return yes;
             rangeIndex = r.next;
         }
@@ -722,7 +793,7 @@ private:
     uint32_t* stateSet_;
 };
 
-typedef GenericRegex<UTF8<> > Regex;
+typedef GenericRegex<UTF8<>> Regex;
 typedef GenericRegexSearch<Regex> RegexSearch;
 
 } // namespace internal
diff --git a/include/rapidjson/internal/stack.h b/include/rapidjson/internal/stack.h
index 73abd706e9..fb8752a81a 100644
--- a/include/rapidjson/internal/stack.h
+++ b/include/rapidjson/internal/stack.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_INTERNAL_STACK_H_
@@ -21,7 +21,7 @@
 
 #if defined(__clang__)
 RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(c++98-compat)
+RAPIDJSON_DIAG_OFF(c++ 98 - compat)
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
@@ -32,13 +32,21 @@ namespace internal {
 
 //! A type-unsafe stack for storing different types of data.
 /*! \tparam Allocator Allocator for allocating stack memory.
-*/
+ */
 template <typename Allocator>
-class Stack {
-public:
+class Stack
+{
+    public:
     // Optimization note: Do not allocate memory for stack_ in constructor.
     // Do it lazily when first Push() -> Expand() -> Resize().
-    Stack(Allocator* allocator, size_t stackCapacity) : allocator_(allocator), ownAllocator_(0), stack_(0), stackTop_(0), stackEnd_(0), initialCapacity_(stackCapacity) {
+    Stack(Allocator* allocator, size_t stackCapacity)
+        : allocator_(allocator),
+          ownAllocator_(0),
+          stack_(0),
+          stackTop_(0),
+          stackEnd_(0),
+          initialCapacity_(stackCapacity)
+    {
     }
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
@@ -50,44 +58,44 @@ public:
           stackEnd_(rhs.stackEnd_),
           initialCapacity_(rhs.initialCapacity_)
     {
-        rhs.allocator_ = 0;
-        rhs.ownAllocator_ = 0;
-        rhs.stack_ = 0;
-        rhs.stackTop_ = 0;
-        rhs.stackEnd_ = 0;
+        rhs.allocator_       = 0;
+        rhs.ownAllocator_    = 0;
+        rhs.stack_           = 0;
+        rhs.stackTop_        = 0;
+        rhs.stackEnd_        = 0;
         rhs.initialCapacity_ = 0;
     }
 #endif
 
-    ~Stack() {
-        Destroy();
-    }
+    ~Stack() { Destroy(); }
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
-    Stack& operator=(Stack&& rhs) {
-        if (&rhs != this)
+    Stack& operator=(Stack&& rhs)
+    {
+        if(&rhs != this)
         {
             Destroy();
 
-            allocator_ = rhs.allocator_;
-            ownAllocator_ = rhs.ownAllocator_;
-            stack_ = rhs.stack_;
-            stackTop_ = rhs.stackTop_;
-            stackEnd_ = rhs.stackEnd_;
+            allocator_       = rhs.allocator_;
+            ownAllocator_    = rhs.ownAllocator_;
+            stack_           = rhs.stack_;
+            stackTop_        = rhs.stackTop_;
+            stackEnd_        = rhs.stackEnd_;
             initialCapacity_ = rhs.initialCapacity_;
 
-            rhs.allocator_ = 0;
-            rhs.ownAllocator_ = 0;
-            rhs.stack_ = 0;
-            rhs.stackTop_ = 0;
-            rhs.stackEnd_ = 0;
+            rhs.allocator_       = 0;
+            rhs.ownAllocator_    = 0;
+            rhs.stack_           = 0;
+            rhs.stackTop_        = 0;
+            rhs.stackEnd_        = 0;
             rhs.initialCapacity_ = 0;
         }
         return *this;
     }
 #endif
 
-    void Swap(Stack& rhs) RAPIDJSON_NOEXCEPT {
+    void Swap(Stack& rhs) RAPIDJSON_NOEXCEPT
+    {
         internal::Swap(allocator_, rhs.allocator_);
         internal::Swap(ownAllocator_, rhs.ownAllocator_);
         internal::Swap(stack_, rhs.stack_);
@@ -98,11 +106,13 @@ public:
 
     void Clear() { stackTop_ = stack_; }
 
-    void ShrinkToFit() { 
-        if (Empty()) {
+    void ShrinkToFit()
+    {
+        if(Empty())
+        {
             // If the stack is empty, completely deallocate the memory.
             Allocator::Free(stack_); // NOLINT (+clang-analyzer-unix.Malloc)
-            stack_ = 0;
+            stack_    = 0;
             stackTop_ = 0;
             stackEnd_ = 0;
         }
@@ -112,21 +122,25 @@ public:
 
     // Optimization note: try to minimize the size of this function for force inline.
     // Expansion is run very infrequently, so it is moved to another (probably non-inline) function.
-    template<typename T>
-    RAPIDJSON_FORCEINLINE void Reserve(size_t count = 1) {
-         // Expand the stack if needed
-        if (RAPIDJSON_UNLIKELY(static_cast<std::ptrdiff_t>(sizeof(T) * count) > (stackEnd_ - stackTop_)))
+    template <typename T>
+    RAPIDJSON_FORCEINLINE void Reserve(size_t count = 1)
+    {
+        // Expand the stack if needed
+        if(RAPIDJSON_UNLIKELY(static_cast<std::ptrdiff_t>(sizeof(T) * count) >
+                              (stackEnd_ - stackTop_)))
             Expand<T>(count);
     }
 
-    template<typename T>
-    RAPIDJSON_FORCEINLINE T* Push(size_t count = 1) {
+    template <typename T>
+    RAPIDJSON_FORCEINLINE T* Push(size_t count = 1)
+    {
         Reserve<T>(count);
         return PushUnsafe<T>(count);
     }
 
-    template<typename T>
-    RAPIDJSON_FORCEINLINE T* PushUnsafe(size_t count = 1) {
+    template <typename T>
+    RAPIDJSON_FORCEINLINE T* PushUnsafe(size_t count = 1)
+    {
         RAPIDJSON_ASSERT(stackTop_);
         RAPIDJSON_ASSERT(static_cast<std::ptrdiff_t>(sizeof(T) * count) <= (stackEnd_ - stackTop_));
         T* ret = reinterpret_cast<T*>(stackTop_);
@@ -134,42 +148,56 @@ public:
         return ret;
     }
 
-    template<typename T>
-    T* Pop(size_t count) {
+    template <typename T>
+    T* Pop(size_t count)
+    {
         RAPIDJSON_ASSERT(GetSize() >= count * sizeof(T));
         stackTop_ -= count * sizeof(T);
         return reinterpret_cast<T*>(stackTop_);
     }
 
-    template<typename T>
-    T* Top() { 
+    template <typename T>
+    T* Top()
+    {
         RAPIDJSON_ASSERT(GetSize() >= sizeof(T));
         return reinterpret_cast<T*>(stackTop_ - sizeof(T));
     }
 
-    template<typename T>
-    const T* Top() const {
+    template <typename T>
+    const T* Top() const
+    {
         RAPIDJSON_ASSERT(GetSize() >= sizeof(T));
         return reinterpret_cast<T*>(stackTop_ - sizeof(T));
     }
 
-    template<typename T>
-    T* End() { return reinterpret_cast<T*>(stackTop_); }
-
-    template<typename T>
-    const T* End() const { return reinterpret_cast<T*>(stackTop_); }
-
-    template<typename T>
-    T* Bottom() { return reinterpret_cast<T*>(stack_); }
-
-    template<typename T>
-    const T* Bottom() const { return reinterpret_cast<T*>(stack_); }
-
-    bool HasAllocator() const {
-        return allocator_ != 0;
+    template <typename T>
+    T* End()
+    {
+        return reinterpret_cast<T*>(stackTop_);
     }
 
-    Allocator& GetAllocator() {
+    template <typename T>
+    const T* End() const
+    {
+        return reinterpret_cast<T*>(stackTop_);
+    }
+
+    template <typename T>
+    T* Bottom()
+    {
+        return reinterpret_cast<T*>(stack_);
+    }
+
+    template <typename T>
+    const T* Bottom() const
+    {
+        return reinterpret_cast<T*>(stack_);
+    }
+
+    bool HasAllocator() const { return allocator_ != 0; }
+
+    Allocator& GetAllocator()
+    {
         RAPIDJSON_ASSERT(allocator_);
         return *allocator_;
     }
@@ -178,34 +206,41 @@ public:
     size_t GetSize() const { return static_cast<size_t>(stackTop_ - stack_); }
     size_t GetCapacity() const { return static_cast<size_t>(stackEnd_ - stack_); }
 
-private:
-    template<typename T>
-    void Expand(size_t count) {
-        // Only expand the capacity if the current stack exists. Otherwise just create a stack with initial capacity.
+    private:
+    template <typename T>
+    void Expand(size_t count)
+    {
+        // Only expand the capacity if the current stack exists. Otherwise just create a stack with
+        // initial capacity.
         size_t newCapacity;
-        if (stack_ == 0) {
-            if (!allocator_)
+        if(stack_ == 0)
+        {
+            if(!allocator_)
                 ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
             newCapacity = initialCapacity_;
-        } else {
+        }
+        else
+        {
             newCapacity = GetCapacity();
             newCapacity += (newCapacity + 1) / 2;
         }
         size_t newSize = GetSize() + sizeof(T) * count;
-        if (newCapacity < newSize)
+        if(newCapacity < newSize)
             newCapacity = newSize;
 
         Resize(newCapacity);
     }
 
-    void Resize(size_t newCapacity) {
-        const size_t size = GetSize();  // Backup the current size
-        stack_ = static_cast<char*>(allocator_->Realloc(stack_, GetCapacity(), newCapacity));
+    void Resize(size_t newCapacity)
+    {
+        const size_t size = GetSize(); // Backup the current size
+        stack_    = static_cast<char*>(allocator_->Realloc(stack_, GetCapacity(), newCapacity));
         stackTop_ = stack_ + size;
         stackEnd_ = stack_ + newCapacity;
     }
 
-    void Destroy() {
+    void Destroy()
+    {
         Allocator::Free(stack_);
         RAPIDJSON_DELETE(ownAllocator_); // Only delete if it is owned by the stack
     }
@@ -216,9 +251,9 @@ private:
 
     Allocator* allocator_;
     Allocator* ownAllocator_;
-    char *stack_;
-    char *stackTop_;
-    char *stackEnd_;
+    char* stack_;
+    char* stackTop_;
+    char* stackEnd_;
     size_t initialCapacity_;
 };
 
diff --git a/include/rapidjson/internal/strfunc.h b/include/rapidjson/internal/strfunc.h
index b698a8f43f..caa85e560a 100644
--- a/include/rapidjson/internal/strfunc.h
+++ b/include/rapidjson/internal/strfunc.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_INTERNAL_STRFUNC_H_
@@ -24,24 +24,29 @@ namespace internal {
 //! Custom strlen() which works on different character types.
 /*! \tparam Ch Character type (e.g. char, wchar_t, short)
     \param s Null-terminated input string.
-    \return Number of characters in the string. 
-    \note This has the same semantics as strlen(), the return value is not number of Unicode codepoints.
+    \return Number of characters in the string.
+    \note This has the same semantics as strlen(), the return value is not number of Unicode
+   codepoints.
 */
 template <typename Ch>
-inline SizeType StrLen(const Ch* s) {
+inline SizeType StrLen(const Ch* s)
+{
     RAPIDJSON_ASSERT(s != 0);
     const Ch* p = s;
-    while (*p) ++p;
+    while(*p)
+        ++p;
     return SizeType(p - s);
 }
 
 template <>
-inline SizeType StrLen(const char* s) {
+inline SizeType StrLen(const char* s)
+{
     return SizeType(std::strlen(s));
 }
 
 template <>
-inline SizeType StrLen(const wchar_t* s) {
+inline SizeType StrLen(const wchar_t* s)
+{
     return SizeType(std::wcslen(s));
 }
 
@@ -51,25 +56,34 @@ inline SizeType StrLen(const wchar_t* s) {
     \param s2 Null-terminated input string.
     \return 0 if equal
 */
-template<typename Ch>
-inline int StrCmp(const Ch* s1, const Ch* s2) {
+template <typename Ch>
+inline int StrCmp(const Ch* s1, const Ch* s2)
+{
     RAPIDJSON_ASSERT(s1 != 0);
     RAPIDJSON_ASSERT(s2 != 0);
-    while(*s1 && (*s1 == *s2)) { s1++; s2++; }
-    return static_cast<unsigned>(*s1) < static_cast<unsigned>(*s2) ? -1 : static_cast<unsigned>(*s1) > static_cast<unsigned>(*s2);
+    while(*s1 && (*s1 == *s2))
+    {
+        s1++;
+        s2++;
+    }
+    return static_cast<unsigned>(*s1) < static_cast<unsigned>(*s2)
+               ? -1
+               : static_cast<unsigned>(*s1) > static_cast<unsigned>(*s2);
 }
 
 //! Returns number of code points in a encoded string.
-template<typename Encoding>
-bool CountStringCodePoint(const typename Encoding::Ch* s, SizeType length, SizeType* outCount) {
+template <typename Encoding>
+bool CountStringCodePoint(const typename Encoding::Ch* s, SizeType length, SizeType* outCount)
+{
     RAPIDJSON_ASSERT(s != 0);
     RAPIDJSON_ASSERT(outCount != 0);
     GenericStringStream<Encoding> is(s);
     const typename Encoding::Ch* end = s + length;
-    SizeType count = 0;
-    while (is.src_ < end) {
+    SizeType count                   = 0;
+    while(is.src_ < end)
+    {
         unsigned codepoint;
-        if (!Encoding::Decode(is, &codepoint))
+        if(!Encoding::Decode(is, &codepoint))
             return false;
         count++;
     }
diff --git a/include/rapidjson/internal/strtod.h b/include/rapidjson/internal/strtod.h
index 57c8418bd9..369299b79d 100644
--- a/include/rapidjson/internal/strtod.h
+++ b/include/rapidjson/internal/strtod.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_STRTOD_
@@ -25,17 +25,20 @@
 RAPIDJSON_NAMESPACE_BEGIN
 namespace internal {
 
-inline double FastPath(double significand, int exp) {
-    if (exp < -308)
+inline double FastPath(double significand, int exp)
+{
+    if(exp < -308)
         return 0.0;
-    else if (exp >= 0)
+    else if(exp >= 0)
         return significand * internal::Pow10(exp);
     else
         return significand / internal::Pow10(-exp);
 }
 
-inline double StrtodNormalPrecision(double d, int p) {
-    if (p < -308) {
+inline double StrtodNormalPrecision(double d, int p)
+{
+    if(p < -308)
+    {
         // Prevent expSum < -308, making Pow10(p) = 0
         d = FastPath(d, -308);
         d = FastPath(d, p + 308);
@@ -46,27 +49,33 @@ inline double StrtodNormalPrecision(double d, int p) {
 }
 
 template <typename T>
-inline T Min3(T a, T b, T c) {
+inline T Min3(T a, T b, T c)
+{
     T m = a;
-    if (m > b) m = b;
-    if (m > c) m = c;
+    if(m > b)
+        m = b;
+    if(m > c)
+        m = c;
     return m;
 }
 
-inline int CheckWithinHalfULP(double b, const BigInteger& d, int dExp) {
+inline int CheckWithinHalfULP(double b, const BigInteger& d, int dExp)
+{
     const Double db(b);
     const uint64_t bInt = db.IntegerSignificand();
-    const int bExp = db.IntegerExponent();
-    const int hExp = bExp - 1;
+    const int bExp      = db.IntegerExponent();
+    const int hExp      = bExp - 1;
 
     int dS_Exp2 = 0, dS_Exp5 = 0, bS_Exp2 = 0, bS_Exp5 = 0, hS_Exp2 = 0, hS_Exp5 = 0;
 
     // Adjust for decimal exponent
-    if (dExp >= 0) {
+    if(dExp >= 0)
+    {
         dS_Exp2 += dExp;
         dS_Exp5 += dExp;
     }
-    else {
+    else
+    {
         bS_Exp2 -= dExp;
         bS_Exp5 -= dExp;
         hS_Exp2 -= dExp;
@@ -74,17 +83,19 @@ inline int CheckWithinHalfULP(double b, const BigInteger& d, int dExp) {
     }
 
     // Adjust for binary exponent
-    if (bExp >= 0)
+    if(bExp >= 0)
         bS_Exp2 += bExp;
-    else {
+    else
+    {
         dS_Exp2 -= bExp;
         hS_Exp2 -= bExp;
     }
 
     // Adjust for half ulp exponent
-    if (hExp >= 0)
+    if(hExp >= 0)
         hS_Exp2 += hExp;
-    else {
+    else
+    {
         dS_Exp2 -= hExp;
         bS_Exp2 -= hExp;
     }
@@ -110,16 +121,19 @@ inline int CheckWithinHalfULP(double b, const BigInteger& d, int dExp) {
     return delta.Compare(hS);
 }
 
-inline bool StrtodFast(double d, int p, double* result) {
+inline bool StrtodFast(double d, int p, double* result)
+{
     // Use fast path for string-to-double conversion if possible
     // see http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
-    if (p > 22  && p < 22 + 16) {
+    if(p > 22 && p < 22 + 16)
+    {
         // Fast Path Cases In Disguise
         d *= internal::Pow10(p - 22);
         p = 22;
     }
 
-    if (p >= -22 && p <= 22 && d <= 9007199254740991.0) { // 2^53 - 1
+    if(p >= -22 && p <= 22 && d <= 9007199254740991.0)
+    { // 2^53 - 1
         *result = FastPath(d, p);
         return true;
     }
@@ -128,24 +142,26 @@ inline bool StrtodFast(double d, int p, double* result) {
 }
 
 // Compute an approximation and see if it is within 1/2 ULP
-template<typename Ch>
-inline bool StrtodDiyFp(const Ch* decimals, int dLen, int dExp, double* result) {
+template <typename Ch>
+inline bool StrtodDiyFp(const Ch* decimals, int dLen, int dExp, double* result)
+{
     uint64_t significand = 0;
-    int i = 0;   // 2^64 - 1 = 18446744073709551615, 1844674407370955161 = 0x1999999999999999    
-    for (; i < dLen; i++) {
-        if (significand  >  RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) ||
-            (significand == RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) && decimals[i] >= Ch('5')))
+    int i = 0; // 2^64 - 1 = 18446744073709551615, 1844674407370955161 = 0x1999999999999999
+    for(; i < dLen; i++)
+    {
+        if(significand > RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) ||
+           (significand == RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) && decimals[i] >= Ch('5')))
             break;
         significand = significand * 10u + static_cast<unsigned>(decimals[i] - Ch('0'));
     }
-    
-    if (i < dLen && decimals[i] >= Ch('5')) // Rounding
+
+    if(i < dLen && decimals[i] >= Ch('5')) // Rounding
         significand++;
 
-    int remaining = dLen - i;
+    int remaining       = dLen - i;
     const int kUlpShift = 3;
-    const int kUlp = 1 << kUlpShift;
-    int64_t error = (remaining == 0) ? 0 : kUlp / 2;
+    const int kUlp      = 1 << kUlpShift;
+    int64_t error       = (remaining == 0) ? 0 : kUlp / 2;
 
     DiyFp v(significand, 0);
     v = v.Normalize();
@@ -155,20 +171,21 @@ inline bool StrtodDiyFp(const Ch* decimals, int dLen, int dExp, double* result)
 
     int actualExp;
     DiyFp cachedPower = GetCachedPower10(dExp, &actualExp);
-    if (actualExp != dExp) {
+    if(actualExp != dExp)
+    {
         static const DiyFp kPow10[] = {
-            DiyFp(RAPIDJSON_UINT64_C2(0xa0000000, 0x00000000), -60),  // 10^1
-            DiyFp(RAPIDJSON_UINT64_C2(0xc8000000, 0x00000000), -57),  // 10^2
-            DiyFp(RAPIDJSON_UINT64_C2(0xfa000000, 0x00000000), -54),  // 10^3
-            DiyFp(RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), -50),  // 10^4
-            DiyFp(RAPIDJSON_UINT64_C2(0xc3500000, 0x00000000), -47),  // 10^5
-            DiyFp(RAPIDJSON_UINT64_C2(0xf4240000, 0x00000000), -44),  // 10^6
-            DiyFp(RAPIDJSON_UINT64_C2(0x98968000, 0x00000000), -40)   // 10^7
+            DiyFp(RAPIDJSON_UINT64_C2(0xa0000000, 0x00000000), -60), // 10^1
+            DiyFp(RAPIDJSON_UINT64_C2(0xc8000000, 0x00000000), -57), // 10^2
+            DiyFp(RAPIDJSON_UINT64_C2(0xfa000000, 0x00000000), -54), // 10^3
+            DiyFp(RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), -50), // 10^4
+            DiyFp(RAPIDJSON_UINT64_C2(0xc3500000, 0x00000000), -47), // 10^5
+            DiyFp(RAPIDJSON_UINT64_C2(0xf4240000, 0x00000000), -44), // 10^6
+            DiyFp(RAPIDJSON_UINT64_C2(0x98968000, 0x00000000), -40)  // 10^7
         };
         int adjustment = dExp - actualExp;
         RAPIDJSON_ASSERT(adjustment >= 1 && adjustment < 8);
         v = v * kPow10[adjustment - 1];
-        if (dLen + adjustment > 19) // has more digits than decimal digits in 64-bit
+        if(dLen + adjustment > 19) // has more digits than decimal digits in 64-bit
             error += kUlp / 2;
     }
 
@@ -177,25 +194,28 @@ inline bool StrtodDiyFp(const Ch* decimals, int dLen, int dExp, double* result)
     error += kUlp + (error == 0 ? 0 : 1);
 
     const int oldExp = v.e;
-    v = v.Normalize();
+    v                = v.Normalize();
     error <<= oldExp - v.e;
 
     const int effectiveSignificandSize = Double::EffectiveSignificandSize(64 + v.e);
-    int precisionSize = 64 - effectiveSignificandSize;
-    if (precisionSize + kUlpShift >= 64) {
+    int precisionSize                  = 64 - effectiveSignificandSize;
+    if(precisionSize + kUlpShift >= 64)
+    {
         int scaleExp = (precisionSize + kUlpShift) - 63;
         v.f >>= scaleExp;
-        v.e += scaleExp; 
+        v.e += scaleExp;
         error = (error >> scaleExp) + 1 + kUlp;
         precisionSize -= scaleExp;
     }
 
     DiyFp rounded(v.f >> precisionSize, v.e + precisionSize);
     const uint64_t precisionBits = (v.f & ((uint64_t(1) << precisionSize) - 1)) * kUlp;
-    const uint64_t halfWay = (uint64_t(1) << (precisionSize - 1)) * kUlp;
-    if (precisionBits >= halfWay + static_cast<unsigned>(error)) {
+    const uint64_t halfWay       = (uint64_t(1) << (precisionSize - 1)) * kUlp;
+    if(precisionBits >= halfWay + static_cast<unsigned>(error))
+    {
         rounded.f++;
-        if (rounded.f & (DiyFp::kDpHiddenBit << 1)) { // rounding overflows mantissa (issue #340)
+        if(rounded.f & (DiyFp::kDpHiddenBit << 1))
+        { // rounding overflows mantissa (issue #340)
             rounded.f >>= 1;
             rounded.e++;
         }
@@ -203,20 +223,23 @@ inline bool StrtodDiyFp(const Ch* decimals, int dLen, int dExp, double* result)
 
     *result = rounded.ToDouble();
 
-    return halfWay - static_cast<unsigned>(error) >= precisionBits || precisionBits >= halfWay + static_cast<unsigned>(error);
+    return halfWay - static_cast<unsigned>(error) >= precisionBits ||
+           precisionBits >= halfWay + static_cast<unsigned>(error);
 }
 
-template<typename Ch>
-inline double StrtodBigInteger(double approx, const Ch* decimals, int dLen, int dExp) {
+template <typename Ch>
+inline double StrtodBigInteger(double approx, const Ch* decimals, int dLen, int dExp)
+{
     RAPIDJSON_ASSERT(dLen >= 0);
     const BigInteger dInt(decimals, static_cast<unsigned>(dLen));
     Double a(approx);
     int cmp = CheckWithinHalfULP(a.Value(), dInt, dExp);
-    if (cmp < 0)
-        return a.Value();  // within half ULP
-    else if (cmp == 0) {
+    if(cmp < 0)
+        return a.Value(); // within half ULP
+    else if(cmp == 0)
+    {
         // Round towards even
-        if (a.Significand() & 1)
+        if(a.Significand() & 1)
             return a.NextPositiveDouble();
         else
             return a.Value();
@@ -225,13 +248,15 @@ inline double StrtodBigInteger(double approx, const Ch* decimals, int dLen, int
         return a.NextPositiveDouble();
 }
 
-template<typename Ch>
-inline double StrtodFullPrecision(double d, int p, const Ch* decimals, size_t length, size_t decimalPosition, int exp) {
+template <typename Ch>
+inline double StrtodFullPrecision(
+    double d, int p, const Ch* decimals, size_t length, size_t decimalPosition, int exp)
+{
     RAPIDJSON_ASSERT(d >= 0.0);
     RAPIDJSON_ASSERT(length >= 1);
 
     double result = 0.0;
-    if (StrtodFast(d, p, &result))
+    if(StrtodFast(d, p, &result))
         return result;
 
     RAPIDJSON_ASSERT(length <= INT_MAX);
@@ -248,39 +273,43 @@ inline double StrtodFullPrecision(double d, int p, const Ch* decimals, size_t le
     RAPIDJSON_ASSERT(dExp <= INT_MAX - dLen);
 
     // Trim leading zeros
-    while (dLen > 0 && *decimals == '0') {
+    while(dLen > 0 && *decimals == '0')
+    {
         dLen--;
         decimals++;
     }
 
     // Trim trailing zeros
-    while (dLen > 0 && decimals[dLen - 1] == '0') {
+    while(dLen > 0 && decimals[dLen - 1] == '0')
+    {
         dLen--;
         dExp++;
     }
 
-    if (dLen == 0) { // Buffer only contains zeros.
+    if(dLen == 0)
+    { // Buffer only contains zeros.
         return 0.0;
     }
 
     // Trim right-most digits
     const int kMaxDecimalDigit = 767 + 1;
-    if (dLen > kMaxDecimalDigit) {
+    if(dLen > kMaxDecimalDigit)
+    {
         dExp += dLen - kMaxDecimalDigit;
         dLen = kMaxDecimalDigit;
     }
 
     // If too small, underflow to zero.
     // Any x <= 10^-324 is interpreted as zero.
-    if (dLen + dExp <= -324)
+    if(dLen + dExp <= -324)
         return 0.0;
 
     // If too large, overflow to infinity.
     // Any x >= 10^309 is interpreted as +infinity.
-    if (dLen + dExp > 309)
+    if(dLen + dExp > 309)
         return std::numeric_limits<double>::infinity();
 
-    if (StrtodDiyFp(decimals, dLen, dExp, &result))
+    if(StrtodDiyFp(decimals, dLen, dExp, &result))
         return result;
 
     // Use approximation from StrtodDiyFp and make adjustment with BigInteger comparison
diff --git a/include/rapidjson/internal/swap.h b/include/rapidjson/internal/swap.h
index 2cf92f93a1..6afaef177c 100644
--- a/include/rapidjson/internal/swap.h
+++ b/include/rapidjson/internal/swap.h
@@ -19,7 +19,7 @@
 
 #if defined(__clang__)
 RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(c++98-compat)
+RAPIDJSON_DIAG_OFF(c++ 98 - compat)
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
@@ -30,10 +30,11 @@ namespace internal {
     \note This has the same semantics as std::swap().
 */
 template <typename T>
-inline void Swap(T& a, T& b) RAPIDJSON_NOEXCEPT {
+inline void Swap(T& a, T& b) RAPIDJSON_NOEXCEPT
+{
     T tmp = a;
-        a = b;
-        b = tmp;
+    a     = b;
+    b     = tmp;
 }
 
 } // namespace internal
diff --git a/include/rapidjson/istreamwrapper.h b/include/rapidjson/istreamwrapper.h
index 01437ec012..ad07e5ca3c 100644
--- a/include/rapidjson/istreamwrapper.h
+++ b/include/rapidjson/istreamwrapper.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_ISTREAMWRAPPER_H_
@@ -44,17 +44,27 @@ RAPIDJSON_NAMESPACE_BEGIN
 
     \tparam StreamType Class derived from \c std::basic_istream.
 */
-   
+
 template <typename StreamType>
-class BasicIStreamWrapper {
-public:
+class BasicIStreamWrapper
+{
+    public:
     typedef typename StreamType::char_type Ch;
 
     //! Constructor.
     /*!
         \param stream stream opened for read.
     */
-    BasicIStreamWrapper(StreamType &stream) : stream_(stream), buffer_(peekBuffer_), bufferSize_(4), bufferLast_(0), current_(buffer_), readCount_(0), count_(0), eof_(false) { 
+    BasicIStreamWrapper(StreamType& stream)
+        : stream_(stream),
+          buffer_(peekBuffer_),
+          bufferSize_(4),
+          bufferLast_(0),
+          current_(buffer_),
+          readCount_(0),
+          count_(0),
+          eof_(false)
+    {
         Read();
     }
 
@@ -64,55 +74,78 @@ public:
         \param buffer user-supplied buffer.
         \param bufferSize size of buffer in bytes. Must >=4 bytes.
     */
-    BasicIStreamWrapper(StreamType &stream, char* buffer, size_t bufferSize) : stream_(stream), buffer_(buffer), bufferSize_(bufferSize), bufferLast_(0), current_(buffer_), readCount_(0), count_(0), eof_(false) { 
+    BasicIStreamWrapper(StreamType& stream, char* buffer, size_t bufferSize)
+        : stream_(stream),
+          buffer_(buffer),
+          bufferSize_(bufferSize),
+          bufferLast_(0),
+          current_(buffer_),
+          readCount_(0),
+          count_(0),
+          eof_(false)
+    {
         RAPIDJSON_ASSERT(bufferSize >= 4);
         Read();
     }
 
     Ch Peek() const { return *current_; }
-    Ch Take() { Ch c = *current_; Read(); return c; }
+    Ch Take()
+    {
+        Ch c = *current_;
+        Read();
+        return c;
+    }
     size_t Tell() const { return count_ + static_cast<size_t>(current_ - buffer_); }
 
     // Not implemented
     void Put(Ch) { RAPIDJSON_ASSERT(false); }
-    void Flush() { RAPIDJSON_ASSERT(false); } 
-    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
-
-    // For encoding detection only.
-    const Ch* Peek4() const {
-        return (current_ + 4 - !eof_ <= bufferLast_) ? current_ : 0;
+    void Flush() { RAPIDJSON_ASSERT(false); }
+    Ch* PutBegin()
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    size_t PutEnd(Ch*)
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
     }
 
-private:
+    // For encoding detection only.
+    const Ch* Peek4() const { return (current_ + 4 - !eof_ <= bufferLast_) ? current_ : 0; }
+
+    private:
     BasicIStreamWrapper();
     BasicIStreamWrapper(const BasicIStreamWrapper&);
     BasicIStreamWrapper& operator=(const BasicIStreamWrapper&);
 
-    void Read() {
-        if (current_ < bufferLast_)
+    void Read()
+    {
+        if(current_ < bufferLast_)
             ++current_;
-        else if (!eof_) {
+        else if(!eof_)
+        {
             count_ += readCount_;
-            readCount_ = bufferSize_;
+            readCount_  = bufferSize_;
             bufferLast_ = buffer_ + readCount_ - 1;
-            current_ = buffer_;
+            current_    = buffer_;
 
-            if (!stream_.read(buffer_, static_cast<std::streamsize>(bufferSize_))) {
-                readCount_ = static_cast<size_t>(stream_.gcount());
+            if(!stream_.read(buffer_, static_cast<std::streamsize>(bufferSize_)))
+            {
+                readCount_                            = static_cast<size_t>(stream_.gcount());
                 *(bufferLast_ = buffer_ + readCount_) = '\0';
-                eof_ = true;
+                eof_                                  = true;
             }
         }
     }
 
-    StreamType &stream_;
+    StreamType& stream_;
     Ch peekBuffer_[4], *buffer_;
     size_t bufferSize_;
-    Ch *bufferLast_;
-    Ch *current_;
+    Ch* bufferLast_;
+    Ch* current_;
     size_t readCount_;
-    size_t count_;  //!< Number of characters read
+    size_t count_; //!< Number of characters read
     bool eof_;
 };
 
diff --git a/include/rapidjson/memorybuffer.h b/include/rapidjson/memorybuffer.h
index ffbc41ed1f..3855619252 100644
--- a/include/rapidjson/memorybuffer.h
+++ b/include/rapidjson/memorybuffer.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_MEMORYBUFFER_H_
@@ -27,17 +27,22 @@ RAPIDJSON_NAMESPACE_BEGIN
     It is similar to FileWriteBuffer but the destination is an in-memory buffer instead of a file.
 
     Differences between MemoryBuffer and StringBuffer:
-    1. StringBuffer has Encoding but MemoryBuffer is only a byte buffer. 
-    2. StringBuffer::GetString() returns a null-terminated string. MemoryBuffer::GetBuffer() returns a buffer without terminator.
+    1. StringBuffer has Encoding but MemoryBuffer is only a byte buffer.
+    2. StringBuffer::GetString() returns a null-terminated string. MemoryBuffer::GetBuffer() returns
+   a buffer without terminator.
 
     \tparam Allocator type for allocating memory buffer.
     \note implements Stream concept
 */
 template <typename Allocator = CrtAllocator>
-struct GenericMemoryBuffer {
+struct GenericMemoryBuffer
+{
     typedef char Ch; // byte
 
-    GenericMemoryBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {}
+    GenericMemoryBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity)
+        : stack_(allocator, capacity)
+    {
+    }
 
     void Put(Ch c) { *stack_.template Push<Ch>() = c; }
     void Flush() {}
@@ -47,9 +52,7 @@ struct GenericMemoryBuffer {
     Ch* Push(size_t count) { return stack_.template Push<Ch>(count); }
     void Pop(size_t count) { stack_.template Pop<Ch>(count); }
 
-    const Ch* GetBuffer() const {
-        return stack_.template Bottom<Ch>();
-    }
+    const Ch* GetBuffer() const { return stack_.template Bottom<Ch>(); }
 
     size_t GetSize() const { return stack_.GetSize(); }
 
@@ -60,8 +63,9 @@ struct GenericMemoryBuffer {
 typedef GenericMemoryBuffer<> MemoryBuffer;
 
 //! Implement specialized version of PutN() with memset() for better performance.
-template<>
-inline void PutN(MemoryBuffer& memoryBuffer, char c, size_t n) {
+template <>
+inline void PutN(MemoryBuffer& memoryBuffer, char c, size_t n)
+{
     std::memset(memoryBuffer.stack_.Push<char>(n), c, n * sizeof(c));
 }
 
diff --git a/include/rapidjson/memorystream.h b/include/rapidjson/memorystream.h
index 77af6c999e..221b756d7f 100644
--- a/include/rapidjson/memorystream.h
+++ b/include/rapidjson/memorystream.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_MEMORYSTREAM_H_
@@ -19,8 +19,8 @@
 
 #ifdef __clang__
 RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(unreachable-code)
-RAPIDJSON_DIAG_OFF(missing-noreturn)
+RAPIDJSON_DIAG_OFF(unreachable - code)
+RAPIDJSON_DIAG_OFF(missing - noreturn)
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
@@ -33,33 +33,43 @@ RAPIDJSON_NAMESPACE_BEGIN
 
     Differences between MemoryStream and StringStream:
     1. StringStream has encoding but MemoryStream is a byte stream.
-    2. MemoryStream needs size of the source buffer and the buffer don't need to be null terminated. StringStream assume null-terminated string as source.
-    3. MemoryStream supports Peek4() for encoding detection. StringStream is specified with an encoding so it should not have Peek4().
-    \note implements Stream concept
+    2. MemoryStream needs size of the source buffer and the buffer don't need to be null terminated.
+   StringStream assume null-terminated string as source.
+    3. MemoryStream supports Peek4() for encoding detection. StringStream is specified with an
+   encoding so it should not have Peek4(). \note implements Stream concept
 */
-struct MemoryStream {
+struct MemoryStream
+{
     typedef char Ch; // byte
 
-    MemoryStream(const Ch *src, size_t size) : src_(src), begin_(src), end_(src + size), size_(size) {}
+    MemoryStream(const Ch* src, size_t size) : src_(src), begin_(src), end_(src + size), size_(size)
+    {
+    }
 
     Ch Peek() const { return RAPIDJSON_UNLIKELY(src_ == end_) ? '\0' : *src_; }
     Ch Take() { return RAPIDJSON_UNLIKELY(src_ == end_) ? '\0' : *src_++; }
     size_t Tell() const { return static_cast<size_t>(src_ - begin_); }
 
-    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    Ch* PutBegin()
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
     void Put(Ch) { RAPIDJSON_ASSERT(false); }
     void Flush() { RAPIDJSON_ASSERT(false); }
-    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
-
-    // For encoding detection only.
-    const Ch* Peek4() const {
-        return Tell() + 4 <= size_ ? src_ : 0;
+    size_t PutEnd(Ch*)
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
     }
 
-    const Ch* src_;     //!< Current read position.
-    const Ch* begin_;   //!< Original head of the string.
-    const Ch* end_;     //!< End of stream.
-    size_t size_;       //!< Size of the stream.
+    // For encoding detection only.
+    const Ch* Peek4() const { return Tell() + 4 <= size_ ? src_ : 0; }
+
+    const Ch* src_;   //!< Current read position.
+    const Ch* begin_; //!< Original head of the string.
+    const Ch* end_;   //!< End of stream.
+    size_t size_;     //!< Size of the stream.
 };
 
 RAPIDJSON_NAMESPACE_END
diff --git a/include/rapidjson/msinttypes/inttypes.h b/include/rapidjson/msinttypes/inttypes.h
index 18111286bf..4c3efbdd75 100644
--- a/include/rapidjson/msinttypes/inttypes.h
+++ b/include/rapidjson/msinttypes/inttypes.h
@@ -1,37 +1,37 @@
 // ISO C9x  compliant inttypes.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
-// 
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
+//
 //  Copyright (c) 2006-2013 Alexander Chemeris
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
-// 
+//
 //   1. Redistributions of source code must retain the above copyright notice,
 //      this list of conditions and the following disclaimer.
-// 
+//
 //   2. Redistributions in binary form must reproduce the above copyright
 //      notice, this list of conditions and the following disclaimer in the
 //      documentation and/or other materials provided with the distribution.
-// 
+//
 //   3. Neither the name of the product nor the names of its contributors may
 //      be used to endorse or promote products derived from this software
 //      without specific prior written permission.
-// 
+//
 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// 
+//
 ///////////////////////////////////////////////////////////////////////////////
 
-// The above software in this distribution may have been modified by 
-// THL A29 Limited ("Tencent Modifications"). 
+// The above software in this distribution may have been modified by
+// THL A29 Limited ("Tencent Modifications").
 // All Tencent Modifications are Copyright (C) 2015 THL A29 Limited.
 
 #ifndef _MSC_VER // [
@@ -54,9 +54,10 @@
 
 // 7.8 Format conversion of integer types
 
-typedef struct {
-   intmax_t quot;
-   intmax_t rem;
+typedef struct
+{
+    intmax_t quot;
+    intmax_t rem;
 } imaxdiv_t;
 
 // 7.8.1 Macros for format specifiers
@@ -64,212 +65,212 @@ typedef struct {
 #if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [   See footnote 185 at page 198
 
 // The fprintf macros for signed integers are:
-#define PRId8       "d"
-#define PRIi8       "i"
-#define PRIdLEAST8  "d"
-#define PRIiLEAST8  "i"
-#define PRIdFAST8   "d"
-#define PRIiFAST8   "i"
+#define PRId8 "d"
+#define PRIi8 "i"
+#define PRIdLEAST8 "d"
+#define PRIiLEAST8 "i"
+#define PRIdFAST8 "d"
+#define PRIiFAST8 "i"
 
-#define PRId16       "hd"
-#define PRIi16       "hi"
-#define PRIdLEAST16  "hd"
-#define PRIiLEAST16  "hi"
-#define PRIdFAST16   "hd"
-#define PRIiFAST16   "hi"
+#define PRId16 "hd"
+#define PRIi16 "hi"
+#define PRIdLEAST16 "hd"
+#define PRIiLEAST16 "hi"
+#define PRIdFAST16 "hd"
+#define PRIiFAST16 "hi"
 
-#define PRId32       "I32d"
-#define PRIi32       "I32i"
-#define PRIdLEAST32  "I32d"
-#define PRIiLEAST32  "I32i"
-#define PRIdFAST32   "I32d"
-#define PRIiFAST32   "I32i"
+#define PRId32 "I32d"
+#define PRIi32 "I32i"
+#define PRIdLEAST32 "I32d"
+#define PRIiLEAST32 "I32i"
+#define PRIdFAST32 "I32d"
+#define PRIiFAST32 "I32i"
 
-#define PRId64       "I64d"
-#define PRIi64       "I64i"
-#define PRIdLEAST64  "I64d"
-#define PRIiLEAST64  "I64i"
-#define PRIdFAST64   "I64d"
-#define PRIiFAST64   "I64i"
+#define PRId64 "I64d"
+#define PRIi64 "I64i"
+#define PRIdLEAST64 "I64d"
+#define PRIiLEAST64 "I64i"
+#define PRIdFAST64 "I64d"
+#define PRIiFAST64 "I64i"
 
-#define PRIdMAX     "I64d"
-#define PRIiMAX     "I64i"
+#define PRIdMAX "I64d"
+#define PRIiMAX "I64i"
 
-#define PRIdPTR     "Id"
-#define PRIiPTR     "Ii"
+#define PRIdPTR "Id"
+#define PRIiPTR "Ii"
 
 // The fprintf macros for unsigned integers are:
-#define PRIo8       "o"
-#define PRIu8       "u"
-#define PRIx8       "x"
-#define PRIX8       "X"
-#define PRIoLEAST8  "o"
-#define PRIuLEAST8  "u"
-#define PRIxLEAST8  "x"
-#define PRIXLEAST8  "X"
-#define PRIoFAST8   "o"
-#define PRIuFAST8   "u"
-#define PRIxFAST8   "x"
-#define PRIXFAST8   "X"
+#define PRIo8 "o"
+#define PRIu8 "u"
+#define PRIx8 "x"
+#define PRIX8 "X"
+#define PRIoLEAST8 "o"
+#define PRIuLEAST8 "u"
+#define PRIxLEAST8 "x"
+#define PRIXLEAST8 "X"
+#define PRIoFAST8 "o"
+#define PRIuFAST8 "u"
+#define PRIxFAST8 "x"
+#define PRIXFAST8 "X"
 
-#define PRIo16       "ho"
-#define PRIu16       "hu"
-#define PRIx16       "hx"
-#define PRIX16       "hX"
-#define PRIoLEAST16  "ho"
-#define PRIuLEAST16  "hu"
-#define PRIxLEAST16  "hx"
-#define PRIXLEAST16  "hX"
-#define PRIoFAST16   "ho"
-#define PRIuFAST16   "hu"
-#define PRIxFAST16   "hx"
-#define PRIXFAST16   "hX"
+#define PRIo16 "ho"
+#define PRIu16 "hu"
+#define PRIx16 "hx"
+#define PRIX16 "hX"
+#define PRIoLEAST16 "ho"
+#define PRIuLEAST16 "hu"
+#define PRIxLEAST16 "hx"
+#define PRIXLEAST16 "hX"
+#define PRIoFAST16 "ho"
+#define PRIuFAST16 "hu"
+#define PRIxFAST16 "hx"
+#define PRIXFAST16 "hX"
 
-#define PRIo32       "I32o"
-#define PRIu32       "I32u"
-#define PRIx32       "I32x"
-#define PRIX32       "I32X"
-#define PRIoLEAST32  "I32o"
-#define PRIuLEAST32  "I32u"
-#define PRIxLEAST32  "I32x"
-#define PRIXLEAST32  "I32X"
-#define PRIoFAST32   "I32o"
-#define PRIuFAST32   "I32u"
-#define PRIxFAST32   "I32x"
-#define PRIXFAST32   "I32X"
+#define PRIo32 "I32o"
+#define PRIu32 "I32u"
+#define PRIx32 "I32x"
+#define PRIX32 "I32X"
+#define PRIoLEAST32 "I32o"
+#define PRIuLEAST32 "I32u"
+#define PRIxLEAST32 "I32x"
+#define PRIXLEAST32 "I32X"
+#define PRIoFAST32 "I32o"
+#define PRIuFAST32 "I32u"
+#define PRIxFAST32 "I32x"
+#define PRIXFAST32 "I32X"
 
-#define PRIo64       "I64o"
-#define PRIu64       "I64u"
-#define PRIx64       "I64x"
-#define PRIX64       "I64X"
-#define PRIoLEAST64  "I64o"
-#define PRIuLEAST64  "I64u"
-#define PRIxLEAST64  "I64x"
-#define PRIXLEAST64  "I64X"
-#define PRIoFAST64   "I64o"
-#define PRIuFAST64   "I64u"
-#define PRIxFAST64   "I64x"
-#define PRIXFAST64   "I64X"
+#define PRIo64 "I64o"
+#define PRIu64 "I64u"
+#define PRIx64 "I64x"
+#define PRIX64 "I64X"
+#define PRIoLEAST64 "I64o"
+#define PRIuLEAST64 "I64u"
+#define PRIxLEAST64 "I64x"
+#define PRIXLEAST64 "I64X"
+#define PRIoFAST64 "I64o"
+#define PRIuFAST64 "I64u"
+#define PRIxFAST64 "I64x"
+#define PRIXFAST64 "I64X"
 
-#define PRIoMAX     "I64o"
-#define PRIuMAX     "I64u"
-#define PRIxMAX     "I64x"
-#define PRIXMAX     "I64X"
+#define PRIoMAX "I64o"
+#define PRIuMAX "I64u"
+#define PRIxMAX "I64x"
+#define PRIXMAX "I64X"
 
-#define PRIoPTR     "Io"
-#define PRIuPTR     "Iu"
-#define PRIxPTR     "Ix"
-#define PRIXPTR     "IX"
+#define PRIoPTR "Io"
+#define PRIuPTR "Iu"
+#define PRIxPTR "Ix"
+#define PRIXPTR "IX"
 
 // The fscanf macros for signed integers are:
-#define SCNd8       "d"
-#define SCNi8       "i"
-#define SCNdLEAST8  "d"
-#define SCNiLEAST8  "i"
-#define SCNdFAST8   "d"
-#define SCNiFAST8   "i"
+#define SCNd8 "d"
+#define SCNi8 "i"
+#define SCNdLEAST8 "d"
+#define SCNiLEAST8 "i"
+#define SCNdFAST8 "d"
+#define SCNiFAST8 "i"
 
-#define SCNd16       "hd"
-#define SCNi16       "hi"
-#define SCNdLEAST16  "hd"
-#define SCNiLEAST16  "hi"
-#define SCNdFAST16   "hd"
-#define SCNiFAST16   "hi"
+#define SCNd16 "hd"
+#define SCNi16 "hi"
+#define SCNdLEAST16 "hd"
+#define SCNiLEAST16 "hi"
+#define SCNdFAST16 "hd"
+#define SCNiFAST16 "hi"
 
-#define SCNd32       "ld"
-#define SCNi32       "li"
-#define SCNdLEAST32  "ld"
-#define SCNiLEAST32  "li"
-#define SCNdFAST32   "ld"
-#define SCNiFAST32   "li"
+#define SCNd32 "ld"
+#define SCNi32 "li"
+#define SCNdLEAST32 "ld"
+#define SCNiLEAST32 "li"
+#define SCNdFAST32 "ld"
+#define SCNiFAST32 "li"
 
-#define SCNd64       "I64d"
-#define SCNi64       "I64i"
-#define SCNdLEAST64  "I64d"
-#define SCNiLEAST64  "I64i"
-#define SCNdFAST64   "I64d"
-#define SCNiFAST64   "I64i"
+#define SCNd64 "I64d"
+#define SCNi64 "I64i"
+#define SCNdLEAST64 "I64d"
+#define SCNiLEAST64 "I64i"
+#define SCNdFAST64 "I64d"
+#define SCNiFAST64 "I64i"
 
-#define SCNdMAX     "I64d"
-#define SCNiMAX     "I64i"
+#define SCNdMAX "I64d"
+#define SCNiMAX "I64i"
 
 #ifdef _WIN64 // [
-#  define SCNdPTR     "I64d"
-#  define SCNiPTR     "I64i"
-#else  // _WIN64 ][
-#  define SCNdPTR     "ld"
-#  define SCNiPTR     "li"
-#endif  // _WIN64 ]
+#define SCNdPTR "I64d"
+#define SCNiPTR "I64i"
+#else // _WIN64 ][
+#define SCNdPTR "ld"
+#define SCNiPTR "li"
+#endif // _WIN64 ]
 
 // The fscanf macros for unsigned integers are:
-#define SCNo8       "o"
-#define SCNu8       "u"
-#define SCNx8       "x"
-#define SCNX8       "X"
-#define SCNoLEAST8  "o"
-#define SCNuLEAST8  "u"
-#define SCNxLEAST8  "x"
-#define SCNXLEAST8  "X"
-#define SCNoFAST8   "o"
-#define SCNuFAST8   "u"
-#define SCNxFAST8   "x"
-#define SCNXFAST8   "X"
+#define SCNo8 "o"
+#define SCNu8 "u"
+#define SCNx8 "x"
+#define SCNX8 "X"
+#define SCNoLEAST8 "o"
+#define SCNuLEAST8 "u"
+#define SCNxLEAST8 "x"
+#define SCNXLEAST8 "X"
+#define SCNoFAST8 "o"
+#define SCNuFAST8 "u"
+#define SCNxFAST8 "x"
+#define SCNXFAST8 "X"
 
-#define SCNo16       "ho"
-#define SCNu16       "hu"
-#define SCNx16       "hx"
-#define SCNX16       "hX"
-#define SCNoLEAST16  "ho"
-#define SCNuLEAST16  "hu"
-#define SCNxLEAST16  "hx"
-#define SCNXLEAST16  "hX"
-#define SCNoFAST16   "ho"
-#define SCNuFAST16   "hu"
-#define SCNxFAST16   "hx"
-#define SCNXFAST16   "hX"
+#define SCNo16 "ho"
+#define SCNu16 "hu"
+#define SCNx16 "hx"
+#define SCNX16 "hX"
+#define SCNoLEAST16 "ho"
+#define SCNuLEAST16 "hu"
+#define SCNxLEAST16 "hx"
+#define SCNXLEAST16 "hX"
+#define SCNoFAST16 "ho"
+#define SCNuFAST16 "hu"
+#define SCNxFAST16 "hx"
+#define SCNXFAST16 "hX"
 
-#define SCNo32       "lo"
-#define SCNu32       "lu"
-#define SCNx32       "lx"
-#define SCNX32       "lX"
-#define SCNoLEAST32  "lo"
-#define SCNuLEAST32  "lu"
-#define SCNxLEAST32  "lx"
-#define SCNXLEAST32  "lX"
-#define SCNoFAST32   "lo"
-#define SCNuFAST32   "lu"
-#define SCNxFAST32   "lx"
-#define SCNXFAST32   "lX"
+#define SCNo32 "lo"
+#define SCNu32 "lu"
+#define SCNx32 "lx"
+#define SCNX32 "lX"
+#define SCNoLEAST32 "lo"
+#define SCNuLEAST32 "lu"
+#define SCNxLEAST32 "lx"
+#define SCNXLEAST32 "lX"
+#define SCNoFAST32 "lo"
+#define SCNuFAST32 "lu"
+#define SCNxFAST32 "lx"
+#define SCNXFAST32 "lX"
 
-#define SCNo64       "I64o"
-#define SCNu64       "I64u"
-#define SCNx64       "I64x"
-#define SCNX64       "I64X"
-#define SCNoLEAST64  "I64o"
-#define SCNuLEAST64  "I64u"
-#define SCNxLEAST64  "I64x"
-#define SCNXLEAST64  "I64X"
-#define SCNoFAST64   "I64o"
-#define SCNuFAST64   "I64u"
-#define SCNxFAST64   "I64x"
-#define SCNXFAST64   "I64X"
+#define SCNo64 "I64o"
+#define SCNu64 "I64u"
+#define SCNx64 "I64x"
+#define SCNX64 "I64X"
+#define SCNoLEAST64 "I64o"
+#define SCNuLEAST64 "I64u"
+#define SCNxLEAST64 "I64x"
+#define SCNXLEAST64 "I64X"
+#define SCNoFAST64 "I64o"
+#define SCNuFAST64 "I64u"
+#define SCNxFAST64 "I64x"
+#define SCNXFAST64 "I64X"
 
-#define SCNoMAX     "I64o"
-#define SCNuMAX     "I64u"
-#define SCNxMAX     "I64x"
-#define SCNXMAX     "I64X"
+#define SCNoMAX "I64o"
+#define SCNuMAX "I64u"
+#define SCNxMAX "I64x"
+#define SCNXMAX "I64X"
 
 #ifdef _WIN64 // [
-#  define SCNoPTR     "I64o"
-#  define SCNuPTR     "I64u"
-#  define SCNxPTR     "I64x"
-#  define SCNXPTR     "I64X"
-#else  // _WIN64 ][
-#  define SCNoPTR     "lo"
-#  define SCNuPTR     "lu"
-#  define SCNxPTR     "lx"
-#  define SCNXPTR     "lX"
-#endif  // _WIN64 ]
+#define SCNoPTR "I64o"
+#define SCNuPTR "I64u"
+#define SCNxPTR "I64x"
+#define SCNXPTR "I64X"
+#else // _WIN64 ][
+#define SCNoPTR "lo"
+#define SCNuPTR "lu"
+#define SCNxPTR "lx"
+#define SCNXPTR "lX"
+#endif // _WIN64 ]
 
 #endif // __STDC_FORMAT_MACROS ]
 
@@ -284,23 +285,24 @@ typedef struct {
 // in %MSVC.NET%\crt\src\div.c
 #ifdef STATIC_IMAXDIV // [
 static
-#else // STATIC_IMAXDIV ][
+#else                 // STATIC_IMAXDIV ][
 _inline
-#endif // STATIC_IMAXDIV ]
-imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
+#endif                // STATIC_IMAXDIV ]
+    imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
 {
-   imaxdiv_t result;
+    imaxdiv_t result;
 
-   result.quot = numer / denom;
-   result.rem = numer % denom;
+    result.quot = numer / denom;
+    result.rem  = numer % denom;
 
-   if (numer < 0 && result.rem > 0) {
-      // did division wrong; must fix up
-      ++result.quot;
-      result.rem -= denom;
-   }
+    if(numer < 0 && result.rem > 0)
+    {
+        // did division wrong; must fix up
+        ++result.quot;
+        result.rem -= denom;
+    }
 
-   return result;
+    return result;
 }
 
 // 7.8.2.3 The strtoimax and strtoumax functions
diff --git a/include/rapidjson/msinttypes/stdint.h b/include/rapidjson/msinttypes/stdint.h
index 3d4477b9a0..3e1ffc2408 100644
--- a/include/rapidjson/msinttypes/stdint.h
+++ b/include/rapidjson/msinttypes/stdint.h
@@ -1,37 +1,37 @@
 // ISO C9x  compliant stdint.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
-// 
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
+//
 //  Copyright (c) 2006-2013 Alexander Chemeris
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
-// 
+//
 //   1. Redistributions of source code must retain the above copyright notice,
 //      this list of conditions and the following disclaimer.
-// 
+//
 //   2. Redistributions in binary form must reproduce the above copyright
 //      notice, this list of conditions and the following disclaimer in the
 //      documentation and/or other materials provided with the distribution.
-// 
+//
 //   3. Neither the name of the product nor the names of its contributors may
 //      be used to endorse or promote products derived from this software
 //      without specific prior written permission.
-// 
+//
 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// 
+//
 ///////////////////////////////////////////////////////////////////////////////
 
-// The above software in this distribution may have been modified by 
-// THL A29 Limited ("Tencent Modifications"). 
+// The above software in this distribution may have been modified by
+// THL A29 Limited ("Tencent Modifications").
 // All Tencent Modifications are Copyright (C) 2015 THL A29 Limited.
 
 #ifndef _MSC_VER // [
@@ -45,7 +45,8 @@
 #pragma once
 #endif
 
-// miloyip: Originally Visual Studio 2010 uses its own stdint.h. However it generates warning with INT64_C(), so change to use this file for vs2010.
+// miloyip: Originally Visual Studio 2010 uses its own stdint.h. However it generates warning with
+// INT64_C(), so change to use this file for vs2010.
 #if _MSC_VER >= 1600 // [
 #include <stdint.h>
 
@@ -62,12 +63,12 @@
 
 // 7.18.4.1 Macros for minimum-width integer constants
 
-#define INT8_C(val)  val##i8
+#define INT8_C(val) val##i8
 #define INT16_C(val) val##i16
 #define INT32_C(val) val##i32
 #define INT64_C(val) val##i64
 
-#define UINT8_C(val)  val##ui8
+#define UINT8_C(val) val##ui8
 #define UINT16_C(val) val##ui16
 #define UINT32_C(val) val##ui32
 #define UINT64_C(val) val##ui64
@@ -76,10 +77,10 @@
 // These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>.
 // Check out Issue 9 for the details.
 #ifndef INTMAX_C //   [
-#  define INTMAX_C   INT64_C
-#endif // INTMAX_C    ]
+#define INTMAX_C INT64_C
+#endif            // INTMAX_C    ]
 #ifndef UINTMAX_C //  [
-#  define UINTMAX_C  UINT64_C
+#define UINTMAX_C UINT64_C
 #endif // UINTMAX_C   ]
 
 #endif // __STDC_CONSTANT_MACROS ]
@@ -95,20 +96,19 @@
 #if defined(__cplusplus) && !defined(_M_ARM)
 extern "C" {
 #endif
-#  include <wchar.h>
+#include <wchar.h>
 #if defined(__cplusplus) && !defined(_M_ARM)
 }
 #endif
 
 // Define _W64 macros to mark types changing their size, like intptr_t.
 #ifndef _W64
-#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
-#     define _W64 __w64
-#  else
-#     define _W64
-#  endif
+#if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
+#define _W64 __w64
+#else
+#define _W64
+#endif
 #endif
-
 
 // 7.18.1 Integer types
 
@@ -117,168 +117,166 @@ extern "C" {
 // Visual Studio 6 and Embedded Visual C++ 4 doesn't
 // realize that, e.g. char has the same size as __int8
 // so we give up on __intX for them.
-#if (_MSC_VER < 1300)
-   typedef signed char       int8_t;
-   typedef signed short      int16_t;
-   typedef signed int        int32_t;
-   typedef unsigned char     uint8_t;
-   typedef unsigned short    uint16_t;
-   typedef unsigned int      uint32_t;
+#if(_MSC_VER < 1300)
+typedef signed char int8_t;
+typedef signed short int16_t;
+typedef signed int int32_t;
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
 #else
-   typedef signed __int8     int8_t;
-   typedef signed __int16    int16_t;
-   typedef signed __int32    int32_t;
-   typedef unsigned __int8   uint8_t;
-   typedef unsigned __int16  uint16_t;
-   typedef unsigned __int32  uint32_t;
+typedef signed __int8 int8_t;
+typedef signed __int16 int16_t;
+typedef signed __int32 int32_t;
+typedef unsigned __int8 uint8_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
 #endif
-typedef signed __int64       int64_t;
-typedef unsigned __int64     uint64_t;
-
+typedef signed __int64 int64_t;
+typedef unsigned __int64 uint64_t;
 
 // 7.18.1.2 Minimum-width integer types
-typedef int8_t    int_least8_t;
-typedef int16_t   int_least16_t;
-typedef int32_t   int_least32_t;
-typedef int64_t   int_least64_t;
-typedef uint8_t   uint_least8_t;
-typedef uint16_t  uint_least16_t;
-typedef uint32_t  uint_least32_t;
-typedef uint64_t  uint_least64_t;
+typedef int8_t int_least8_t;
+typedef int16_t int_least16_t;
+typedef int32_t int_least32_t;
+typedef int64_t int_least64_t;
+typedef uint8_t uint_least8_t;
+typedef uint16_t uint_least16_t;
+typedef uint32_t uint_least32_t;
+typedef uint64_t uint_least64_t;
 
 // 7.18.1.3 Fastest minimum-width integer types
-typedef int8_t    int_fast8_t;
-typedef int16_t   int_fast16_t;
-typedef int32_t   int_fast32_t;
-typedef int64_t   int_fast64_t;
-typedef uint8_t   uint_fast8_t;
-typedef uint16_t  uint_fast16_t;
-typedef uint32_t  uint_fast32_t;
-typedef uint64_t  uint_fast64_t;
+typedef int8_t int_fast8_t;
+typedef int16_t int_fast16_t;
+typedef int32_t int_fast32_t;
+typedef int64_t int_fast64_t;
+typedef uint8_t uint_fast8_t;
+typedef uint16_t uint_fast16_t;
+typedef uint32_t uint_fast32_t;
+typedef uint64_t uint_fast64_t;
 
 // 7.18.1.4 Integer types capable of holding object pointers
 #ifdef _WIN64 // [
-   typedef signed __int64    intptr_t;
-   typedef unsigned __int64  uintptr_t;
-#else // _WIN64 ][
-   typedef _W64 signed int   intptr_t;
-   typedef _W64 unsigned int uintptr_t;
-#endif // _WIN64 ]
+typedef signed __int64 intptr_t;
+typedef unsigned __int64 uintptr_t;
+#else         // _WIN64 ][
+typedef _W64 signed int intptr_t;
+typedef _W64 unsigned int uintptr_t;
+#endif        // _WIN64 ]
 
 // 7.18.1.5 Greatest-width integer types
-typedef int64_t   intmax_t;
-typedef uint64_t  uintmax_t;
-
+typedef int64_t intmax_t;
+typedef uint64_t uintmax_t;
 
 // 7.18.2 Limits of specified-width integer types
 
-#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+#if !defined(__cplusplus) || \
+    defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
 
 // 7.18.2.1 Limits of exact-width integer types
-#define INT8_MIN     ((int8_t)_I8_MIN)
-#define INT8_MAX     _I8_MAX
-#define INT16_MIN    ((int16_t)_I16_MIN)
-#define INT16_MAX    _I16_MAX
-#define INT32_MIN    ((int32_t)_I32_MIN)
-#define INT32_MAX    _I32_MAX
-#define INT64_MIN    ((int64_t)_I64_MIN)
-#define INT64_MAX    _I64_MAX
-#define UINT8_MAX    _UI8_MAX
-#define UINT16_MAX   _UI16_MAX
-#define UINT32_MAX   _UI32_MAX
-#define UINT64_MAX   _UI64_MAX
+#define INT8_MIN ((int8_t)_I8_MIN)
+#define INT8_MAX _I8_MAX
+#define INT16_MIN ((int16_t)_I16_MIN)
+#define INT16_MAX _I16_MAX
+#define INT32_MIN ((int32_t)_I32_MIN)
+#define INT32_MAX _I32_MAX
+#define INT64_MIN ((int64_t)_I64_MIN)
+#define INT64_MAX _I64_MAX
+#define UINT8_MAX _UI8_MAX
+#define UINT16_MAX _UI16_MAX
+#define UINT32_MAX _UI32_MAX
+#define UINT64_MAX _UI64_MAX
 
 // 7.18.2.2 Limits of minimum-width integer types
-#define INT_LEAST8_MIN    INT8_MIN
-#define INT_LEAST8_MAX    INT8_MAX
-#define INT_LEAST16_MIN   INT16_MIN
-#define INT_LEAST16_MAX   INT16_MAX
-#define INT_LEAST32_MIN   INT32_MIN
-#define INT_LEAST32_MAX   INT32_MAX
-#define INT_LEAST64_MIN   INT64_MIN
-#define INT_LEAST64_MAX   INT64_MAX
-#define UINT_LEAST8_MAX   UINT8_MAX
-#define UINT_LEAST16_MAX  UINT16_MAX
-#define UINT_LEAST32_MAX  UINT32_MAX
-#define UINT_LEAST64_MAX  UINT64_MAX
+#define INT_LEAST8_MIN INT8_MIN
+#define INT_LEAST8_MAX INT8_MAX
+#define INT_LEAST16_MIN INT16_MIN
+#define INT_LEAST16_MAX INT16_MAX
+#define INT_LEAST32_MIN INT32_MIN
+#define INT_LEAST32_MAX INT32_MAX
+#define INT_LEAST64_MIN INT64_MIN
+#define INT_LEAST64_MAX INT64_MAX
+#define UINT_LEAST8_MAX UINT8_MAX
+#define UINT_LEAST16_MAX UINT16_MAX
+#define UINT_LEAST32_MAX UINT32_MAX
+#define UINT_LEAST64_MAX UINT64_MAX
 
 // 7.18.2.3 Limits of fastest minimum-width integer types
-#define INT_FAST8_MIN    INT8_MIN
-#define INT_FAST8_MAX    INT8_MAX
-#define INT_FAST16_MIN   INT16_MIN
-#define INT_FAST16_MAX   INT16_MAX
-#define INT_FAST32_MIN   INT32_MIN
-#define INT_FAST32_MAX   INT32_MAX
-#define INT_FAST64_MIN   INT64_MIN
-#define INT_FAST64_MAX   INT64_MAX
-#define UINT_FAST8_MAX   UINT8_MAX
-#define UINT_FAST16_MAX  UINT16_MAX
-#define UINT_FAST32_MAX  UINT32_MAX
-#define UINT_FAST64_MAX  UINT64_MAX
+#define INT_FAST8_MIN INT8_MIN
+#define INT_FAST8_MAX INT8_MAX
+#define INT_FAST16_MIN INT16_MIN
+#define INT_FAST16_MAX INT16_MAX
+#define INT_FAST32_MIN INT32_MIN
+#define INT_FAST32_MAX INT32_MAX
+#define INT_FAST64_MIN INT64_MIN
+#define INT_FAST64_MAX INT64_MAX
+#define UINT_FAST8_MAX UINT8_MAX
+#define UINT_FAST16_MAX UINT16_MAX
+#define UINT_FAST32_MAX UINT32_MAX
+#define UINT_FAST64_MAX UINT64_MAX
 
 // 7.18.2.4 Limits of integer types capable of holding object pointers
 #ifdef _WIN64 // [
-#  define INTPTR_MIN   INT64_MIN
-#  define INTPTR_MAX   INT64_MAX
-#  define UINTPTR_MAX  UINT64_MAX
+#define INTPTR_MIN INT64_MIN
+#define INTPTR_MAX INT64_MAX
+#define UINTPTR_MAX UINT64_MAX
 #else // _WIN64 ][
-#  define INTPTR_MIN   INT32_MIN
-#  define INTPTR_MAX   INT32_MAX
-#  define UINTPTR_MAX  UINT32_MAX
+#define INTPTR_MIN INT32_MIN
+#define INTPTR_MAX INT32_MAX
+#define UINTPTR_MAX UINT32_MAX
 #endif // _WIN64 ]
 
 // 7.18.2.5 Limits of greatest-width integer types
-#define INTMAX_MIN   INT64_MIN
-#define INTMAX_MAX   INT64_MAX
-#define UINTMAX_MAX  UINT64_MAX
+#define INTMAX_MIN INT64_MIN
+#define INTMAX_MAX INT64_MAX
+#define UINTMAX_MAX UINT64_MAX
 
 // 7.18.3 Limits of other integer types
 
 #ifdef _WIN64 // [
-#  define PTRDIFF_MIN  _I64_MIN
-#  define PTRDIFF_MAX  _I64_MAX
-#else  // _WIN64 ][
-#  define PTRDIFF_MIN  _I32_MIN
-#  define PTRDIFF_MAX  _I32_MAX
-#endif  // _WIN64 ]
+#define PTRDIFF_MIN _I64_MIN
+#define PTRDIFF_MAX _I64_MAX
+#else // _WIN64 ][
+#define PTRDIFF_MIN _I32_MIN
+#define PTRDIFF_MAX _I32_MAX
+#endif // _WIN64 ]
 
-#define SIG_ATOMIC_MIN  INT_MIN
-#define SIG_ATOMIC_MAX  INT_MAX
+#define SIG_ATOMIC_MIN INT_MIN
+#define SIG_ATOMIC_MAX INT_MAX
 
 #ifndef SIZE_MAX // [
-#  ifdef _WIN64 // [
-#     define SIZE_MAX  _UI64_MAX
-#  else // _WIN64 ][
-#     define SIZE_MAX  _UI32_MAX
-#  endif // _WIN64 ]
-#endif // SIZE_MAX ]
+#ifdef _WIN64    // [
+#define SIZE_MAX _UI64_MAX
+#else // _WIN64 ][
+#define SIZE_MAX _UI32_MAX
+#endif            // _WIN64 ]
+#endif            // SIZE_MAX ]
 
 // WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
 #ifndef WCHAR_MIN // [
-#  define WCHAR_MIN  0
-#endif  // WCHAR_MIN ]
+#define WCHAR_MIN 0
+#endif            // WCHAR_MIN ]
 #ifndef WCHAR_MAX // [
-#  define WCHAR_MAX  _UI16_MAX
-#endif  // WCHAR_MAX ]
+#define WCHAR_MAX _UI16_MAX
+#endif // WCHAR_MAX ]
 
-#define WINT_MIN  0
-#define WINT_MAX  _UI16_MAX
+#define WINT_MIN 0
+#define WINT_MAX _UI16_MAX
 
 #endif // __STDC_LIMIT_MACROS ]
 
-
 // 7.18.4 Limits of other integer types
 
 #if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
 
 // 7.18.4.1 Macros for minimum-width integer constants
 
-#define INT8_C(val)  val##i8
+#define INT8_C(val) val##i8
 #define INT16_C(val) val##i16
 #define INT32_C(val) val##i32
 #define INT64_C(val) val##i64
 
-#define UINT8_C(val)  val##ui8
+#define UINT8_C(val) val##ui8
 #define UINT16_C(val) val##ui16
 #define UINT32_C(val) val##ui32
 #define UINT64_C(val) val##ui64
@@ -287,10 +285,10 @@ typedef uint64_t  uintmax_t;
 // These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>.
 // Check out Issue 9 for the details.
 #ifndef INTMAX_C //   [
-#  define INTMAX_C   INT64_C
-#endif // INTMAX_C    ]
+#define INTMAX_C INT64_C
+#endif            // INTMAX_C    ]
 #ifndef UINTMAX_C //  [
-#  define UINTMAX_C  UINT64_C
+#define UINTMAX_C UINT64_C
 #endif // UINTMAX_C   ]
 
 #endif // __STDC_CONSTANT_MACROS ]
diff --git a/include/rapidjson/ostreamwrapper.h b/include/rapidjson/ostreamwrapper.h
index 11ed4d33f9..547bd09fc7 100644
--- a/include/rapidjson/ostreamwrapper.h
+++ b/include/rapidjson/ostreamwrapper.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_OSTREAMWRAPPER_H_
@@ -40,29 +40,46 @@ RAPIDJSON_NAMESPACE_BEGIN
 
     \tparam StreamType Class derived from \c std::basic_ostream.
 */
-   
+
 template <typename StreamType>
-class BasicOStreamWrapper {
-public:
+class BasicOStreamWrapper
+{
+    public:
     typedef typename StreamType::char_type Ch;
     BasicOStreamWrapper(StreamType& stream) : stream_(stream) {}
 
-    void Put(Ch c) {
-        stream_.put(c);
-    }
+    void Put(Ch c) { stream_.put(c); }
 
-    void Flush() {
-        stream_.flush();
-    }
+    void Flush() { stream_.flush(); }
 
     // Not implemented
-    char Peek() const { RAPIDJSON_ASSERT(false); return 0; }
-    char Take() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
-    char* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t PutEnd(char*) { RAPIDJSON_ASSERT(false); return 0; }
+    char Peek() const
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    char Take()
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    size_t Tell() const
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    char* PutBegin()
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
+    size_t PutEnd(char*)
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
 
-private:
+    private:
     BasicOStreamWrapper(const BasicOStreamWrapper&);
     BasicOStreamWrapper& operator=(const BasicOStreamWrapper&);
 
diff --git a/include/rapidjson/pointer.h b/include/rapidjson/pointer.h
index 355929ede0..bba4519774 100644
--- a/include/rapidjson/pointer.h
+++ b/include/rapidjson/pointer.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_POINTER_H_
@@ -22,7 +22,7 @@
 
 #ifdef __clang__
 RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(switch-enum)
+RAPIDJSON_DIAG_OFF(switch - enum)
 #elif defined(_MSC_VER)
 RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
@@ -36,23 +36,24 @@ RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
 
 RAPIDJSON_NAMESPACE_BEGIN
 
-static const SizeType kPointerInvalidIndex = ~SizeType(0);  //!< Represents an invalid index in GenericPointer::Token
+static const SizeType kPointerInvalidIndex =
+    ~SizeType(0); //!< Represents an invalid index in GenericPointer::Token
 
 ///////////////////////////////////////////////////////////////////////////////
 // GenericPointer
 
 //! Represents a JSON Pointer. Use Pointer for UTF8 encoding and default allocator.
 /*!
-    This class implements RFC 6901 "JavaScript Object Notation (JSON) Pointer" 
+    This class implements RFC 6901 "JavaScript Object Notation (JSON) Pointer"
     (https://tools.ietf.org/html/rfc6901).
 
     A JSON pointer is for identifying a specific value in a JSON document
     (GenericDocument). It can simplify coding of DOM tree manipulation, because it
     can access multiple-level depth of DOM tree with single API call.
 
-    After it parses a string representation (e.g. "/foo/0" or URI fragment 
+    After it parses a string representation (e.g. "/foo/0" or URI fragment
     representation (e.g. "#/foo/0") into its internal representation (tokens),
-    it can be used to resolve a specific value in multiple documents, or sub-tree 
+    it can be used to resolve a specific value in multiple documents, or sub-tree
     of documents.
 
     Contrary to GenericValue, Pointer can be copy constructed and copy assigned.
@@ -71,16 +72,16 @@ static const SizeType kPointerInvalidIndex = ~SizeType(0);  //!< Represents an i
     However, Allocator of GenericPointer is independent of Allocator of Value.
 */
 template <typename ValueType, typename Allocator = CrtAllocator>
-class GenericPointer {
-public:
-    typedef typename ValueType::EncodingType EncodingType;  //!< Encoding type from Value
-    typedef typename ValueType::Ch Ch;                      //!< Character type from Value
+class GenericPointer
+{
+    public:
+    typedef typename ValueType::EncodingType EncodingType; //!< Encoding type from Value
+    typedef typename ValueType::Ch Ch;                     //!< Character type from Value
     typedef GenericUri<ValueType, Allocator> UriType;
 
-
-  //! A token is the basic units of internal representation.
+    //! A token is the basic units of internal representation.
     /*!
-        A JSON pointer string representation "/foo/123" is parsed to two tokens: 
+        A JSON pointer string representation "/foo/123" is parsed to two tokens:
         "foo" and 123. 123 will be represented in both numeric form and string form.
         They are resolved according to the actual value type (object or array).
 
@@ -88,27 +89,47 @@ public:
         (greater than limits of SizeType), they are only treated as string form
         (i.e. the token's index will be equal to kPointerInvalidIndex).
 
-        This struct is public so that user can create a Pointer without parsing and 
+        This struct is public so that user can create a Pointer without parsing and
         allocation, using a special constructor.
     */
-    struct Token {
-        const Ch* name;             //!< Name of the token. It has null character at the end but it can contain null character.
-        SizeType length;            //!< Length of the name.
-        SizeType index;             //!< A valid array index, if it is not equal to kPointerInvalidIndex.
+    struct Token
+    {
+        const Ch* name;  //!< Name of the token. It has null character at the end but it can contain
+                         //!< null character.
+        SizeType length; //!< Length of the name.
+        SizeType index;  //!< A valid array index, if it is not equal to kPointerInvalidIndex.
     };
 
     //!@name Constructors and destructor.
     //@{
 
     //! Default constructor.
-    GenericPointer(Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {}
+    GenericPointer(Allocator* allocator = 0)
+        : allocator_(allocator),
+          ownAllocator_(),
+          nameBuffer_(),
+          tokens_(),
+          tokenCount_(),
+          parseErrorOffset_(),
+          parseErrorCode_(kPointerParseErrorNone)
+    {
+    }
 
     //! Constructor that parses a string or URI fragment representation.
     /*!
         \param source A null-terminated, string or URI fragment representation of JSON pointer.
-        \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one.
+        \param allocator User supplied allocator for this pointer. If no allocator is provided, it
+       creates a self-owned one.
     */
-    explicit GenericPointer(const Ch* source, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
+    explicit GenericPointer(const Ch* source, Allocator* allocator = 0)
+        : allocator_(allocator),
+          ownAllocator_(),
+          nameBuffer_(),
+          tokens_(),
+          tokenCount_(),
+          parseErrorOffset_(),
+          parseErrorCode_(kPointerParseErrorNone)
+    {
         Parse(source, internal::StrLen(source));
     }
 
@@ -116,22 +137,40 @@ public:
     //! Constructor that parses a string or URI fragment representation.
     /*!
         \param source A string or URI fragment representation of JSON pointer.
-        \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one.
-        \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
+        \param allocator User supplied allocator for this pointer. If no allocator is provided, it
+       creates a self-owned one. \note Requires the definition of the preprocessor symbol \ref
+       RAPIDJSON_HAS_STDSTRING.
     */
-    explicit GenericPointer(const std::basic_string<Ch>& source, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
+    explicit GenericPointer(const std::basic_string<Ch>& source, Allocator* allocator = 0)
+        : allocator_(allocator),
+          ownAllocator_(),
+          nameBuffer_(),
+          tokens_(),
+          tokenCount_(),
+          parseErrorOffset_(),
+          parseErrorCode_(kPointerParseErrorNone)
+    {
         Parse(source.c_str(), source.size());
     }
 #endif
 
-    //! Constructor that parses a string or URI fragment representation, with length of the source string.
+    //! Constructor that parses a string or URI fragment representation, with length of the source
+    //! string.
     /*!
         \param source A string or URI fragment representation of JSON pointer.
         \param length Length of source.
-        \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one.
-        \note Slightly faster than the overload without length.
+        \param allocator User supplied allocator for this pointer. If no allocator is provided, it
+       creates a self-owned one. \note Slightly faster than the overload without length.
     */
-    GenericPointer(const Ch* source, size_t length, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
+    GenericPointer(const Ch* source, size_t length, Allocator* allocator = 0)
+        : allocator_(allocator),
+          ownAllocator_(),
+          nameBuffer_(),
+          tokens_(),
+          tokenCount_(),
+          parseErrorOffset_(),
+          parseErrorCode_(kPointerParseErrorNone)
+    {
         Parse(source, length);
     }
 
@@ -157,40 +196,70 @@ public:
         #undef INDEX
         \endcode
     */
-    GenericPointer(const Token* tokens, size_t tokenCount) : allocator_(), ownAllocator_(), nameBuffer_(), tokens_(const_cast<Token*>(tokens)), tokenCount_(tokenCount), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {}
+    GenericPointer(const Token* tokens, size_t tokenCount)
+        : allocator_(),
+          ownAllocator_(),
+          nameBuffer_(),
+          tokens_(const_cast<Token*>(tokens)),
+          tokenCount_(tokenCount),
+          parseErrorOffset_(),
+          parseErrorCode_(kPointerParseErrorNone)
+    {
+    }
 
     //! Copy constructor.
-    GenericPointer(const GenericPointer& rhs) : allocator_(), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
+    GenericPointer(const GenericPointer& rhs)
+        : allocator_(),
+          ownAllocator_(),
+          nameBuffer_(),
+          tokens_(),
+          tokenCount_(),
+          parseErrorOffset_(),
+          parseErrorCode_(kPointerParseErrorNone)
+    {
         *this = rhs;
     }
 
     //! Copy constructor.
-    GenericPointer(const GenericPointer& rhs, Allocator* allocator) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
+    GenericPointer(const GenericPointer& rhs, Allocator* allocator)
+        : allocator_(allocator),
+          ownAllocator_(),
+          nameBuffer_(),
+          tokens_(),
+          tokenCount_(),
+          parseErrorOffset_(),
+          parseErrorCode_(kPointerParseErrorNone)
+    {
         *this = rhs;
     }
 
     //! Destructor.
-    ~GenericPointer() {
-        if (nameBuffer_)    // If user-supplied tokens constructor is used, nameBuffer_ is nullptr and tokens_ are not deallocated.
+    ~GenericPointer()
+    {
+        if(nameBuffer_) // If user-supplied tokens constructor is used, nameBuffer_ is nullptr and
+                        // tokens_ are not deallocated.
             Allocator::Free(tokens_);
         RAPIDJSON_DELETE(ownAllocator_);
     }
 
     //! Assignment operator.
-    GenericPointer& operator=(const GenericPointer& rhs) {
-        if (this != &rhs) {
+    GenericPointer& operator=(const GenericPointer& rhs)
+    {
+        if(this != &rhs)
+        {
             // Do not delete ownAllcator
-            if (nameBuffer_)
+            if(nameBuffer_)
                 Allocator::Free(tokens_);
 
-            tokenCount_ = rhs.tokenCount_;
+            tokenCount_       = rhs.tokenCount_;
             parseErrorOffset_ = rhs.parseErrorOffset_;
-            parseErrorCode_ = rhs.parseErrorCode_;
+            parseErrorCode_   = rhs.parseErrorCode_;
 
-            if (rhs.nameBuffer_)
+            if(rhs.nameBuffer_)
                 CopyFromRaw(rhs); // Normally parsed tokens.
-            else {
-                tokens_ = rhs.tokens_; // User supplied const tokens.
+            else
+            {
+                tokens_     = rhs.tokens_; // User supplied const tokens.
                 nameBuffer_ = 0;
             }
         }
@@ -202,7 +271,8 @@ public:
         \param other The pointer to swap with.
         \note Constant complexity.
     */
-    GenericPointer& Swap(GenericPointer& other) RAPIDJSON_NOEXCEPT {
+    GenericPointer& Swap(GenericPointer& other) RAPIDJSON_NOEXCEPT
+    {
         internal::Swap(allocator_, other.allocator_);
         internal::Swap(ownAllocator_, other.ownAllocator_);
         internal::Swap(nameBuffer_, other.nameBuffer_);
@@ -215,11 +285,9 @@ public:
 
     //! free-standing swap function helper
     /*!
-        Helper function to enable support for common swap implementation pattern based on \c std::swap:
-        \code
-        void swap(MyClass& a, MyClass& b) {
-            using std::swap;
-            swap(a.pointer, b.pointer);
+        Helper function to enable support for common swap implementation pattern based on \c
+       std::swap: \code void swap(MyClass& a, MyClass& b) { using std::swap; swap(a.pointer,
+       b.pointer);
             // ...
         }
         \endcode
@@ -238,14 +306,15 @@ public:
         \param allocator Allocator for the newly return Pointer.
         \return A new Pointer with appended token.
     */
-    GenericPointer Append(const Token& token, Allocator* allocator = 0) const {
+    GenericPointer Append(const Token& token, Allocator* allocator = 0) const
+    {
         GenericPointer r;
         r.allocator_ = allocator;
-        Ch *p = r.CopyFromRaw(*this, 1, token.length + 1);
+        Ch* p        = r.CopyFromRaw(*this, 1, token.length + 1);
         std::memcpy(p, token.name, (token.length + 1) * sizeof(Ch));
-        r.tokens_[tokenCount_].name = p;
+        r.tokens_[tokenCount_].name   = p;
         r.tokens_[tokenCount_].length = token.length;
-        r.tokens_[tokenCount_].index = token.index;
+        r.tokens_[tokenCount_].index  = token.index;
         return r;
     }
 
@@ -256,8 +325,9 @@ public:
         \param allocator Allocator for the newly return Pointer.
         \return A new Pointer with appended token.
     */
-    GenericPointer Append(const Ch* name, SizeType length, Allocator* allocator = 0) const {
-        Token token = { name, length, kPointerInvalidIndex };
+    GenericPointer Append(const Ch* name, SizeType length, Allocator* allocator = 0) const
+    {
+        Token token = {name, length, kPointerInvalidIndex};
         return Append(token, allocator);
     }
 
@@ -268,8 +338,11 @@ public:
         \return A new Pointer with appended token.
     */
     template <typename T>
-    RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch> >), (GenericPointer))
-    Append(T* name, Allocator* allocator = 0) const {
+    RAPIDJSON_DISABLEIF_RETURN(
+        (internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch>>),
+        (GenericPointer))
+    Append(T* name, Allocator* allocator = 0) const
+    {
         return Append(name, internal::StrLen(name), allocator);
     }
 
@@ -280,7 +353,8 @@ public:
         \param allocator Allocator for the newly return Pointer.
         \return A new Pointer with appended token.
     */
-    GenericPointer Append(const std::basic_string<Ch>& name, Allocator* allocator = 0) const {
+    GenericPointer Append(const std::basic_string<Ch>& name, Allocator* allocator = 0) const
+    {
         return Append(name.c_str(), static_cast<SizeType>(name.size()), allocator);
     }
 #endif
@@ -291,21 +365,25 @@ public:
         \param allocator Allocator for the newly return Pointer.
         \return A new Pointer with appended token.
     */
-    GenericPointer Append(SizeType index, Allocator* allocator = 0) const {
+    GenericPointer Append(SizeType index, Allocator* allocator = 0) const
+    {
         char buffer[21];
-        char* end = sizeof(SizeType) == 4 ? internal::u32toa(index, buffer) : internal::u64toa(index, buffer);
+        char* end       = sizeof(SizeType) == 4 ? internal::u32toa(index, buffer)
+                                                : internal::u64toa(index, buffer);
         SizeType length = static_cast<SizeType>(end - buffer);
-        buffer[length] = '\0';
+        buffer[length]  = '\0';
 
-        RAPIDJSON_IF_CONSTEXPR (sizeof(Ch) == 1) {
-            Token token = { reinterpret_cast<Ch*>(buffer), length, index };
+        RAPIDJSON_IF_CONSTEXPR(sizeof(Ch) == 1)
+        {
+            Token token = {reinterpret_cast<Ch*>(buffer), length, index};
             return Append(token, allocator);
         }
-        else {
+        else
+        {
             Ch name[21];
-            for (size_t i = 0; i <= length; i++)
+            for(size_t i = 0; i <= length; i++)
                 name[i] = static_cast<Ch>(buffer[i]);
-            Token token = { name, length, index };
+            Token token = {name, length, index};
             return Append(token, allocator);
         }
     }
@@ -316,10 +394,12 @@ public:
         \param allocator Allocator for the newly return Pointer.
         \return A new Pointer with appended token.
     */
-    GenericPointer Append(const ValueType& token, Allocator* allocator = 0) const {
-        if (token.IsString())
+    GenericPointer Append(const ValueType& token, Allocator* allocator = 0) const
+    {
+        if(token.IsString())
             return Append(token.GetString(), token.GetStringLength(), allocator);
-        else {
+        else
+        {
             RAPIDJSON_ASSERT(token.IsUint64());
             RAPIDJSON_ASSERT(token.GetUint64() <= SizeType(~0));
             return Append(static_cast<SizeType>(token.GetUint64()), allocator);
@@ -361,14 +441,18 @@ public:
     /*!
         \note When any pointers are invalid, always returns false.
     */
-    bool operator==(const GenericPointer& rhs) const {
-        if (!IsValid() || !rhs.IsValid() || tokenCount_ != rhs.tokenCount_)
+    bool operator==(const GenericPointer& rhs) const
+    {
+        if(!IsValid() || !rhs.IsValid() || tokenCount_ != rhs.tokenCount_)
             return false;
 
-        for (size_t i = 0; i < tokenCount_; i++) {
-            if (tokens_[i].index != rhs.tokens_[i].index ||
-                tokens_[i].length != rhs.tokens_[i].length || 
-                (tokens_[i].length != 0 && std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch)* tokens_[i].length) != 0))
+        for(size_t i = 0; i < tokenCount_; i++)
+        {
+            if(tokens_[i].index != rhs.tokens_[i].index ||
+               tokens_[i].length != rhs.tokens_[i].length ||
+               (tokens_[i].length != 0 &&
+                std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch) * tokens_[i].length) !=
+                    0))
             {
                 return false;
             }
@@ -387,23 +471,26 @@ public:
     /*!
         \note Invalid pointers are always greater than valid ones.
     */
-    bool operator<(const GenericPointer& rhs) const {
-        if (!IsValid())
+    bool operator<(const GenericPointer& rhs) const
+    {
+        if(!IsValid())
             return false;
-        if (!rhs.IsValid())
+        if(!rhs.IsValid())
             return true;
 
-        if (tokenCount_ != rhs.tokenCount_)
+        if(tokenCount_ != rhs.tokenCount_)
             return tokenCount_ < rhs.tokenCount_;
 
-        for (size_t i = 0; i < tokenCount_; i++) {
-            if (tokens_[i].index != rhs.tokens_[i].index)
+        for(size_t i = 0; i < tokenCount_; i++)
+        {
+            if(tokens_[i].index != rhs.tokens_[i].index)
                 return tokens_[i].index < rhs.tokens_[i].index;
 
-            if (tokens_[i].length != rhs.tokens_[i].length)
+            if(tokens_[i].length != rhs.tokens_[i].length)
                 return tokens_[i].length < rhs.tokens_[i].length;
 
-            if (int cmp = std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch) * tokens_[i].length))
+            if(int cmp = std::memcmp(
+                   tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch) * tokens_[i].length))
                 return cmp < 0;
         }
 
@@ -420,8 +507,9 @@ public:
         \tparam OutputStream Type of output stream.
         \param os The output stream.
     */
-    template<typename OutputStream>
-    bool Stringify(OutputStream& os) const {
+    template <typename OutputStream>
+    bool Stringify(OutputStream& os) const
+    {
         return Stringify<false, OutputStream>(os);
     }
 
@@ -430,8 +518,9 @@ public:
         \tparam OutputStream Type of output stream.
         \param os The output stream.
     */
-    template<typename OutputStream>
-    bool StringifyUriFragment(OutputStream& os) const {
+    template <typename OutputStream>
+    bool StringifyUriFragment(OutputStream& os) const
+    {
         return Stringify<true, OutputStream>(os);
     }
 
@@ -445,51 +534,67 @@ public:
         If the value is not exist, it creates all parent values and a JSON Null value.
         So it always succeed and return the newly created or existing value.
 
-        Remind that it may change types of parents according to tokens, so it 
-        potentially removes previously stored values. For example, if a document 
-        was an array, and "/foo" is used to create a value, then the document 
+        Remind that it may change types of parents according to tokens, so it
+        potentially removes previously stored values. For example, if a document
+        was an array, and "/foo" is used to create a value, then the document
         will be changed to an object, and all existing array elements are lost.
 
-        \param root Root value of a DOM subtree to be resolved. It can be any value other than document root.
-        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
-        \param alreadyExist If non-null, it stores whether the resolved value is already exist.
-        \return The resolved newly created (a JSON Null value), or already exists value.
+        \param root Root value of a DOM subtree to be resolved. It can be any value other than
+       document root. \param allocator Allocator for creating the values if the specified value or
+       its parents are not exist. \param alreadyExist If non-null, it stores whether the resolved
+       value is already exist. \return The resolved newly created (a JSON Null value), or already
+       exists value.
     */
-    ValueType& Create(ValueType& root, typename ValueType::AllocatorType& allocator, bool* alreadyExist = 0) const {
+    ValueType& Create(ValueType& root,
+                      typename ValueType::AllocatorType& allocator,
+                      bool* alreadyExist = 0) const
+    {
         RAPIDJSON_ASSERT(IsValid());
         ValueType* v = &root;
-        bool exist = true;
-        for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) {
-            if (v->IsArray() && t->name[0] == '-' && t->length == 1) {
+        bool exist   = true;
+        for(const Token* t = tokens_; t != tokens_ + tokenCount_; ++t)
+        {
+            if(v->IsArray() && t->name[0] == '-' && t->length == 1)
+            {
                 v->PushBack(ValueType().Move(), allocator);
-                v = &((*v)[v->Size() - 1]);
+                v     = &((*v)[v->Size() - 1]);
                 exist = false;
             }
-            else {
-                if (t->index == kPointerInvalidIndex) { // must be object name
-                    if (!v->IsObject())
+            else
+            {
+                if(t->index == kPointerInvalidIndex)
+                { // must be object name
+                    if(!v->IsObject())
                         v->SetObject(); // Change to Object
                 }
-                else { // object name or array index
-                    if (!v->IsArray() && !v->IsObject())
+                else
+                { // object name or array index
+                    if(!v->IsArray() && !v->IsObject())
                         v->SetArray(); // Change to Array
                 }
 
-                if (v->IsArray()) {
-                    if (t->index >= v->Size()) {
+                if(v->IsArray())
+                {
+                    if(t->index >= v->Size())
+                    {
                         v->Reserve(t->index + 1, allocator);
-                        while (t->index >= v->Size())
+                        while(t->index >= v->Size())
                             v->PushBack(ValueType().Move(), allocator);
                         exist = false;
                     }
                     v = &((*v)[t->index]);
                 }
-                else {
-                    typename ValueType::MemberIterator m = v->FindMember(GenericValue<EncodingType>(GenericStringRef<Ch>(t->name, t->length)));
-                    if (m == v->MemberEnd()) {
-                        v->AddMember(ValueType(t->name, t->length, allocator).Move(), ValueType().Move(), allocator);
-                        m = v->MemberEnd();
-                        v = &(--m)->value; // Assumes AddMember() appends at the end
+                else
+                {
+                    typename ValueType::MemberIterator m = v->FindMember(
+                        GenericValue<EncodingType>(GenericStringRef<Ch>(t->name, t->length)));
+                    if(m == v->MemberEnd())
+                    {
+                        v->AddMember(ValueType(t->name, t->length, allocator).Move(),
+                                     ValueType().Move(),
+                                     allocator);
+                        m     = v->MemberEnd();
+                        v     = &(--m)->value; // Assumes AddMember() appends at the end
                         exist = false;
                     }
                     else
@@ -498,7 +603,7 @@ public:
             }
         }
 
-        if (alreadyExist)
+        if(alreadyExist)
             *alreadyExist = exist;
 
         return *v;
@@ -511,7 +616,10 @@ public:
         \return The resolved newly created, or already exists value.
     */
     template <typename stackAllocator>
-    ValueType& Create(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, bool* alreadyExist = 0) const {
+    ValueType& Create(
+        GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document,
+        bool* alreadyExist = 0) const
+    {
         return Create(document, document.GetAllocator(), alreadyExist);
     }
 
@@ -523,9 +631,9 @@ public:
     //! Compute the in-scope URI for a subtree.
     //  For use with JSON pointers into JSON schema documents.
     /*!
-        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
-        \param rootUri Root URI
-        \param unresolvedTokenIndex If the pointer cannot resolve a token in the pointer, this parameter can obtain the index of unresolved token.
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than
+       document root. \param rootUri Root URI \param unresolvedTokenIndex If the pointer cannot
+       resolve a token in the pointer, this parameter can obtain the index of unresolved token.
         \param allocator Allocator for Uris
         \return Uri if it can be resolved. Otherwise null.
 
@@ -537,58 +645,68 @@ public:
 
         Use unresolvedTokenIndex to retrieve the token index.
     */
-    UriType GetUri(ValueType& root, const UriType& rootUri, size_t* unresolvedTokenIndex = 0, Allocator* allocator = 0) const {
-        static const Ch kIdString[] = { 'i', 'd', '\0' };
+    UriType GetUri(ValueType& root,
+                   const UriType& rootUri,
+                   size_t* unresolvedTokenIndex = 0,
+                   Allocator* allocator         = 0) const
+    {
+        static const Ch kIdString[] = {'i', 'd', '\0'};
         static const ValueType kIdValue(kIdString, 2);
         UriType base = UriType(rootUri, allocator);
         RAPIDJSON_ASSERT(IsValid());
         ValueType* v = &root;
-        for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) {
-            switch (v->GetType()) {
-                case kObjectType:
+        for(const Token* t = tokens_; t != tokens_ + tokenCount_; ++t)
+        {
+            switch(v->GetType())
+            {
+            case kObjectType: {
+                // See if we have an id, and if so resolve with the current base
+                typename ValueType::MemberIterator m = v->FindMember(kIdValue);
+                if(m != v->MemberEnd() && (m->value).IsString())
                 {
-                    // See if we have an id, and if so resolve with the current base
-                    typename ValueType::MemberIterator m = v->FindMember(kIdValue);
-                    if (m != v->MemberEnd() && (m->value).IsString()) {
-                        UriType here = UriType(m->value, allocator).Resolve(base, allocator);
-                        base = here;
-                    }
-                    m = v->FindMember(GenericValue<EncodingType>(GenericStringRef<Ch>(t->name, t->length)));
-                    if (m == v->MemberEnd())
-                        break;
-                    v = &m->value;
+                    UriType here = UriType(m->value, allocator).Resolve(base, allocator);
+                    base         = here;
                 }
-                  continue;
-                case kArrayType:
-                    if (t->index == kPointerInvalidIndex || t->index >= v->Size())
-                        break;
-                    v = &((*v)[t->index]);
-                    continue;
-                default:
+                m = v->FindMember(
+                    GenericValue<EncodingType>(GenericStringRef<Ch>(t->name, t->length)));
+                if(m == v->MemberEnd())
                     break;
+                v = &m->value;
+            }
+                continue;
+            case kArrayType:
+                if(t->index == kPointerInvalidIndex || t->index >= v->Size())
+                    break;
+                v = &((*v)[t->index]);
+                continue;
+            default: break;
             }
 
             // Error: unresolved token
-            if (unresolvedTokenIndex)
+            if(unresolvedTokenIndex)
                 *unresolvedTokenIndex = static_cast<size_t>(t - tokens_);
             return UriType(allocator);
         }
         return base;
     }
 
-    UriType GetUri(const ValueType& root, const UriType& rootUri, size_t* unresolvedTokenIndex = 0, Allocator* allocator = 0) const {
-      return GetUri(const_cast<ValueType&>(root), rootUri, unresolvedTokenIndex, allocator);
+    UriType GetUri(const ValueType& root,
+                   const UriType& rootUri,
+                   size_t* unresolvedTokenIndex = 0,
+                   Allocator* allocator         = 0) const
+    {
+        return GetUri(const_cast<ValueType&>(root), rootUri, unresolvedTokenIndex, allocator);
     }
 
-
     //!@name Query value
     //@{
 
     //! Query a value in a subtree.
     /*!
-        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
-        \param unresolvedTokenIndex If the pointer cannot resolve a token in the pointer, this parameter can obtain the index of unresolved token.
-        \return Pointer to the value if it can be resolved. Otherwise null.
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than
+       document root. \param unresolvedTokenIndex If the pointer cannot resolve a token in the
+       pointer, this parameter can obtain the index of unresolved token. \return Pointer to the
+       value if it can be resolved. Otherwise null.
 
         \note
         There are only 3 situations when a value cannot be resolved:
@@ -598,30 +716,32 @@ public:
 
         Use unresolvedTokenIndex to retrieve the token index.
     */
-    ValueType* Get(ValueType& root, size_t* unresolvedTokenIndex = 0) const {
+    ValueType* Get(ValueType& root, size_t* unresolvedTokenIndex = 0) const
+    {
         RAPIDJSON_ASSERT(IsValid());
         ValueType* v = &root;
-        for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) {
-            switch (v->GetType()) {
-            case kObjectType:
-                {
-                    typename ValueType::MemberIterator m = v->FindMember(GenericValue<EncodingType>(GenericStringRef<Ch>(t->name, t->length)));
-                    if (m == v->MemberEnd())
-                        break;
-                    v = &m->value;
-                }
+        for(const Token* t = tokens_; t != tokens_ + tokenCount_; ++t)
+        {
+            switch(v->GetType())
+            {
+            case kObjectType: {
+                typename ValueType::MemberIterator m = v->FindMember(
+                    GenericValue<EncodingType>(GenericStringRef<Ch>(t->name, t->length)));
+                if(m == v->MemberEnd())
+                    break;
+                v = &m->value;
+            }
                 continue;
             case kArrayType:
-                if (t->index == kPointerInvalidIndex || t->index >= v->Size())
+                if(t->index == kPointerInvalidIndex || t->index >= v->Size())
                     break;
                 v = &((*v)[t->index]);
                 continue;
-            default:
-                break;
+            default: break;
             }
 
             // Error: unresolved token
-            if (unresolvedTokenIndex)
+            if(unresolvedTokenIndex)
                 *unresolvedTokenIndex = static_cast<size_t>(t - tokens_);
             return 0;
         }
@@ -630,10 +750,11 @@ public:
 
     //! Query a const value in a const subtree.
     /*!
-        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
-        \return Pointer to the value if it can be resolved. Otherwise null.
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than
+       document root. \return Pointer to the value if it can be resolved. Otherwise null.
     */
-    const ValueType* Get(const ValueType& root, size_t* unresolvedTokenIndex = 0) const { 
+    const ValueType* Get(const ValueType& root, size_t* unresolvedTokenIndex = 0) const
+    {
         return Get(const_cast<ValueType&>(root), unresolvedTokenIndex);
     }
 
@@ -644,22 +765,28 @@ public:
 
     //! Query a value in a subtree with default value.
     /*!
-        Similar to Get(), but if the specified value do not exists, it creates all parents and clone the default value.
-        So that this function always succeed.
+        Similar to Get(), but if the specified value do not exists, it creates all parents and clone
+       the default value. So that this function always succeed.
 
-        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
-        \param defaultValue Default value to be cloned if the value was not exists.
-        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
-        \see Create()
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than
+       document root. \param defaultValue Default value to be cloned if the value was not exists.
+        \param allocator Allocator for creating the values if the specified value or its parents are
+       not exist. \see Create()
     */
-    ValueType& GetWithDefault(ValueType& root, const ValueType& defaultValue, typename ValueType::AllocatorType& allocator) const {
+    ValueType& GetWithDefault(ValueType& root,
+                              const ValueType& defaultValue,
+                              typename ValueType::AllocatorType& allocator) const
+    {
         bool alreadyExist;
         ValueType& v = Create(root, allocator, &alreadyExist);
         return alreadyExist ? v : v.CopyFrom(defaultValue, allocator);
     }
 
     //! Query a value in a subtree with default null-terminated string.
-    ValueType& GetWithDefault(ValueType& root, const Ch* defaultValue, typename ValueType::AllocatorType& allocator) const {
+    ValueType& GetWithDefault(ValueType& root,
+                              const Ch* defaultValue,
+                              typename ValueType::AllocatorType& allocator) const
+    {
         bool alreadyExist;
         ValueType& v = Create(root, allocator, &alreadyExist);
         return alreadyExist ? v : v.SetString(defaultValue, allocator);
@@ -667,7 +794,10 @@ public:
 
 #if RAPIDJSON_HAS_STDSTRING
     //! Query a value in a subtree with default std::basic_string.
-    ValueType& GetWithDefault(ValueType& root, const std::basic_string<Ch>& defaultValue, typename ValueType::AllocatorType& allocator) const {
+    ValueType& GetWithDefault(ValueType& root,
+                              const std::basic_string<Ch>& defaultValue,
+                              typename ValueType::AllocatorType& allocator) const
+    {
         bool alreadyExist;
         ValueType& v = Create(root, allocator, &alreadyExist);
         return alreadyExist ? v : v.SetString(defaultValue, allocator);
@@ -679,27 +809,40 @@ public:
         \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
     */
     template <typename T>
-    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
-    GetWithDefault(ValueType& root, T defaultValue, typename ValueType::AllocatorType& allocator) const {
+    RAPIDJSON_DISABLEIF_RETURN(
+        (internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T>>), (ValueType&))
+    GetWithDefault(ValueType& root,
+                   T defaultValue,
+                   typename ValueType::AllocatorType& allocator) const
+    {
         return GetWithDefault(root, ValueType(defaultValue).Move(), allocator);
     }
 
     //! Query a value in a document with default value.
     template <typename stackAllocator>
-    ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const ValueType& defaultValue) const {
+    ValueType& GetWithDefault(
+        GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document,
+        const ValueType& defaultValue) const
+    {
         return GetWithDefault(document, defaultValue, document.GetAllocator());
     }
 
     //! Query a value in a document with default null-terminated string.
     template <typename stackAllocator>
-    ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const Ch* defaultValue) const {
+    ValueType& GetWithDefault(
+        GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document,
+        const Ch* defaultValue) const
+    {
         return GetWithDefault(document, defaultValue, document.GetAllocator());
     }
 
 #if RAPIDJSON_HAS_STDSTRING
     //! Query a value in a document with default std::basic_string.
     template <typename stackAllocator>
-    ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const std::basic_string<Ch>& defaultValue) const {
+    ValueType& GetWithDefault(
+        GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document,
+        const std::basic_string<Ch>& defaultValue) const
+    {
         return GetWithDefault(document, defaultValue, document.GetAllocator());
     }
 #endif
@@ -709,8 +852,12 @@ public:
         \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
     */
     template <typename T, typename stackAllocator>
-    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
-    GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, T defaultValue) const {
+    RAPIDJSON_DISABLEIF_RETURN(
+        (internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T>>), (ValueType&))
+    GetWithDefault(
+        GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document,
+        T defaultValue) const
+    {
         return GetWithDefault(document, defaultValue, document.GetAllocator());
     }
 
@@ -724,28 +871,36 @@ public:
         It creates all parents if they are not exist or types are different to the tokens.
         So this function always succeeds but potentially remove existing values.
 
-        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
-        \param value Value to be set.
-        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
-        \see Create()
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than
+       document root. \param value Value to be set. \param allocator Allocator for creating the
+       values if the specified value or its parents are not exist. \see Create()
     */
-    ValueType& Set(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const {
+    ValueType&
+    Set(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const
+    {
         return Create(root, allocator) = value;
     }
 
     //! Set a value in a subtree, with copy semantics.
-    ValueType& Set(ValueType& root, const ValueType& value, typename ValueType::AllocatorType& allocator) const {
+    ValueType&
+    Set(ValueType& root, const ValueType& value, typename ValueType::AllocatorType& allocator) const
+    {
         return Create(root, allocator).CopyFrom(value, allocator);
     }
 
     //! Set a null-terminated string in a subtree.
-    ValueType& Set(ValueType& root, const Ch* value, typename ValueType::AllocatorType& allocator) const {
+    ValueType&
+    Set(ValueType& root, const Ch* value, typename ValueType::AllocatorType& allocator) const
+    {
         return Create(root, allocator) = ValueType(value, allocator).Move();
     }
 
 #if RAPIDJSON_HAS_STDSTRING
     //! Set a std::basic_string in a subtree.
-    ValueType& Set(ValueType& root, const std::basic_string<Ch>& value, typename ValueType::AllocatorType& allocator) const {
+    ValueType& Set(ValueType& root,
+                   const std::basic_string<Ch>& value,
+                   typename ValueType::AllocatorType& allocator) const
+    {
         return Create(root, allocator) = ValueType(value, allocator).Move();
     }
 #endif
@@ -755,33 +910,47 @@ public:
         \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
     */
     template <typename T>
-    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
-    Set(ValueType& root, T value, typename ValueType::AllocatorType& allocator) const {
+    RAPIDJSON_DISABLEIF_RETURN(
+        (internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T>>), (ValueType&))
+    Set(ValueType& root, T value, typename ValueType::AllocatorType& allocator) const
+    {
         return Create(root, allocator) = ValueType(value).Move();
     }
 
     //! Set a value in a document, with move semantics.
     template <typename stackAllocator>
-    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, ValueType& value) const {
+    ValueType&
+    Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document,
+        ValueType& value) const
+    {
         return Create(document) = value;
     }
 
     //! Set a value in a document, with copy semantics.
     template <typename stackAllocator>
-    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const ValueType& value) const {
+    ValueType&
+    Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document,
+        const ValueType& value) const
+    {
         return Create(document).CopyFrom(value, document.GetAllocator());
     }
 
     //! Set a null-terminated string in a document.
     template <typename stackAllocator>
-    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const Ch* value) const {
+    ValueType&
+    Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document,
+        const Ch* value) const
+    {
         return Create(document) = ValueType(value, document.GetAllocator()).Move();
     }
 
 #if RAPIDJSON_HAS_STDSTRING
     //! Sets a std::basic_string in a document.
     template <typename stackAllocator>
-    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const std::basic_string<Ch>& value) const {
+    ValueType&
+    Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document,
+        const std::basic_string<Ch>& value) const
+    {
         return Create(document) = ValueType(value, document.GetAllocator()).Move();
     }
 #endif
@@ -791,9 +960,12 @@ public:
     \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
     */
     template <typename T, typename stackAllocator>
-    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
-        Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, T value) const {
-            return Create(document) = value;
+    RAPIDJSON_DISABLEIF_RETURN(
+        (internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T>>), (ValueType&))
+    Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document,
+        T value) const
+    {
+        return Create(document) = value;
     }
 
     //@}
@@ -806,18 +978,22 @@ public:
         It creates all parents if they are not exist or types are different to the tokens.
         So this function always succeeds but potentially remove existing values.
 
-        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
-        \param value Value to be swapped.
-        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
-        \see Create()
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than
+       document root. \param value Value to be swapped. \param allocator Allocator for creating the
+       values if the specified value or its parents are not exist. \see Create()
     */
-    ValueType& Swap(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const {
+    ValueType&
+    Swap(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const
+    {
         return Create(root, allocator).Swap(value);
     }
 
     //! Swap a value with a value in a document.
     template <typename stackAllocator>
-    ValueType& Swap(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, ValueType& value) const {
+    ValueType&
+    Swap(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document,
+         ValueType& value) const
+    {
         return Create(document).Swap(value);
     }
 
@@ -825,52 +1001,54 @@ public:
 
     //! Erase a value in a subtree.
     /*!
-        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
-        \return Whether the resolved value is found and erased.
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than
+       document root. \return Whether the resolved value is found and erased.
 
-        \note Erasing with an empty pointer \c Pointer(""), i.e. the root, always fail and return false.
+        \note Erasing with an empty pointer \c Pointer(""), i.e. the root, always fail and return
+       false.
     */
-    bool Erase(ValueType& root) const {
+    bool Erase(ValueType& root) const
+    {
         RAPIDJSON_ASSERT(IsValid());
-        if (tokenCount_ == 0) // Cannot erase the root
+        if(tokenCount_ == 0) // Cannot erase the root
             return false;
 
-        ValueType* v = &root;
+        ValueType* v      = &root;
         const Token* last = tokens_ + (tokenCount_ - 1);
-        for (const Token *t = tokens_; t != last; ++t) {
-            switch (v->GetType()) {
-            case kObjectType:
-                {
-                    typename ValueType::MemberIterator m = v->FindMember(GenericValue<EncodingType>(GenericStringRef<Ch>(t->name, t->length)));
-                    if (m == v->MemberEnd())
-                        return false;
-                    v = &m->value;
-                }
-                break;
+        for(const Token* t = tokens_; t != last; ++t)
+        {
+            switch(v->GetType())
+            {
+            case kObjectType: {
+                typename ValueType::MemberIterator m = v->FindMember(
+                    GenericValue<EncodingType>(GenericStringRef<Ch>(t->name, t->length)));
+                if(m == v->MemberEnd())
+                    return false;
+                v = &m->value;
+            }
+            break;
             case kArrayType:
-                if (t->index == kPointerInvalidIndex || t->index >= v->Size())
+                if(t->index == kPointerInvalidIndex || t->index >= v->Size())
                     return false;
                 v = &((*v)[t->index]);
                 break;
-            default:
-                return false;
+            default: return false;
             }
         }
 
-        switch (v->GetType()) {
-        case kObjectType:
-            return v->EraseMember(GenericStringRef<Ch>(last->name, last->length));
+        switch(v->GetType())
+        {
+        case kObjectType: return v->EraseMember(GenericStringRef<Ch>(last->name, last->length));
         case kArrayType:
-            if (last->index == kPointerInvalidIndex || last->index >= v->Size())
+            if(last->index == kPointerInvalidIndex || last->index >= v->Size())
                 return false;
             v->Erase(v->Begin() + last->index);
             return true;
-        default:
-            return false;
+        default: return false;
         }
     }
 
-private:
+    private:
     //! Clone the content from rhs to this.
     /*!
         \param rhs Source pointer.
@@ -878,33 +1056,39 @@ private:
         \param extraNameBufferSize Extra name buffer size (in number of Ch) to be allocated.
         \return Start of non-occupied name buffer, for storing extra names.
     */
-    Ch* CopyFromRaw(const GenericPointer& rhs, size_t extraToken = 0, size_t extraNameBufferSize = 0) {
-        if (!allocator_) // allocator is independently owned.
+    Ch*
+    CopyFromRaw(const GenericPointer& rhs, size_t extraToken = 0, size_t extraNameBufferSize = 0)
+    {
+        if(!allocator_) // allocator is independently owned.
             ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
 
         size_t nameBufferSize = rhs.tokenCount_; // null terminators for tokens
-        for (Token *t = rhs.tokens_; t != rhs.tokens_ + rhs.tokenCount_; ++t)
+        for(Token* t = rhs.tokens_; t != rhs.tokens_ + rhs.tokenCount_; ++t)
             nameBufferSize += t->length;
 
         tokenCount_ = rhs.tokenCount_ + extraToken;
-        tokens_ = static_cast<Token *>(allocator_->Malloc(tokenCount_ * sizeof(Token) + (nameBufferSize + extraNameBufferSize) * sizeof(Ch)));
-        nameBuffer_ = reinterpret_cast<Ch *>(tokens_ + tokenCount_);
-        if (rhs.tokenCount_ > 0) {
+        tokens_     = static_cast<Token*>(allocator_->Malloc(
+            tokenCount_ * sizeof(Token) + (nameBufferSize + extraNameBufferSize) * sizeof(Ch)));
+        nameBuffer_ = reinterpret_cast<Ch*>(tokens_ + tokenCount_);
+        if(rhs.tokenCount_ > 0)
+        {
             std::memcpy(tokens_, rhs.tokens_, rhs.tokenCount_ * sizeof(Token));
         }
-        if (nameBufferSize > 0) {
+        if(nameBufferSize > 0)
+        {
             std::memcpy(nameBuffer_, rhs.nameBuffer_, nameBufferSize * sizeof(Ch));
         }
 
         // The names of each token point to a string in the nameBuffer_. The
         // previous memcpy copied over string pointers into the rhs.nameBuffer_,
         // but they should point to the strings in the new nameBuffer_.
-        for (size_t i = 0; i < rhs.tokenCount_; ++i) {
-          // The offset between the string address and the name buffer should
-          // still be constant, so we can just get this offset and set each new
-          // token name according the new buffer start + the known offset.
-          std::ptrdiff_t name_offset = rhs.tokens_[i].name - rhs.nameBuffer_;
-          tokens_[i].name = nameBuffer_ + name_offset;
+        for(size_t i = 0; i < rhs.tokenCount_; ++i)
+        {
+            // The offset between the string address and the name buffer should
+            // still be constant, so we can just get this offset and set each new
+            // token name according the new buffer start + the known offset.
+            std::ptrdiff_t name_offset = rhs.tokens_[i].name - rhs.nameBuffer_;
+            tokens_[i].name            = nameBuffer_ + name_offset;
         }
 
         return nameBuffer_ + nameBufferSize;
@@ -915,80 +1099,93 @@ private:
         According to RFC 3986 2.3 Unreserved Characters.
         \param c The character (code unit) to be tested.
     */
-    bool NeedPercentEncode(Ch c) const {
-        return !((c >= '0' && c <= '9') || (c >= 'A' && c <='Z') || (c >= 'a' && c <= 'z') || c == '-' || c == '.' || c == '_' || c =='~');
+    bool NeedPercentEncode(Ch c) const
+    {
+        return !((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
+                 c == '-' || c == '.' || c == '_' || c == '~');
     }
 
     //! Parse a JSON String or its URI fragment representation into tokens.
 #ifndef __clang__ // -Wdocumentation
     /*!
-        \param source Either a JSON Pointer string, or its URI fragment representation. Not need to be null terminated.
-        \param length Length of the source string.
-        \note Source cannot be JSON String Representation of JSON Pointer, e.g. In "/\u0000", \u0000 will not be unescaped.
+        \param source Either a JSON Pointer string, or its URI fragment representation. Not need to
+       be null terminated. \param length Length of the source string. \note Source cannot be JSON
+       String Representation of JSON Pointer, e.g. In "/\u0000", \u0000 will not be unescaped.
     */
 #endif
-    void Parse(const Ch* source, size_t length) {
+    void Parse(const Ch* source, size_t length)
+    {
         RAPIDJSON_ASSERT(source != NULL);
         RAPIDJSON_ASSERT(nameBuffer_ == 0);
         RAPIDJSON_ASSERT(tokens_ == 0);
 
         // Create own allocator if user did not supply.
-        if (!allocator_)
+        if(!allocator_)
             ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
 
         // Count number of '/' as tokenCount
         tokenCount_ = 0;
-        for (const Ch* s = source; s != source + length; s++) 
-            if (*s == '/')
+        for(const Ch* s = source; s != source + length; s++)
+            if(*s == '/')
                 tokenCount_++;
 
-        Token* token = tokens_ = static_cast<Token *>(allocator_->Malloc(tokenCount_ * sizeof(Token) + length * sizeof(Ch)));
-        Ch* name = nameBuffer_ = reinterpret_cast<Ch *>(tokens_ + tokenCount_);
-        size_t i = 0;
+        Token* token = tokens_ = static_cast<Token*>(
+            allocator_->Malloc(tokenCount_ * sizeof(Token) + length * sizeof(Ch)));
+        Ch* name = nameBuffer_ = reinterpret_cast<Ch*>(tokens_ + tokenCount_);
+        size_t i               = 0;
 
         // Detect if it is a URI fragment
         bool uriFragment = false;
-        if (source[i] == '#') {
+        if(source[i] == '#')
+        {
             uriFragment = true;
             i++;
         }
 
-        if (i != length && source[i] != '/') {
+        if(i != length && source[i] != '/')
+        {
             parseErrorCode_ = kPointerParseErrorTokenMustBeginWithSolidus;
             goto error;
         }
 
-        while (i < length) {
+        while(i < length)
+        {
             RAPIDJSON_ASSERT(source[i] == '/');
             i++; // consumes '/'
 
-            token->name = name;
+            token->name   = name;
             bool isNumber = true;
 
-            while (i < length && source[i] != '/') {
+            while(i < length && source[i] != '/')
+            {
                 Ch c = source[i];
-                if (uriFragment) {
+                if(uriFragment)
+                {
                     // Decoding percent-encoding for URI fragment
-                    if (c == '%') {
+                    if(c == '%')
+                    {
                         PercentDecodeStream is(&source[i], source + length);
                         GenericInsituStringStream<EncodingType> os(name);
                         Ch* begin = os.PutBegin();
-                        if (!Transcoder<UTF8<>, EncodingType>().Validate(is, os) || !is.IsValid()) {
+                        if(!Transcoder<UTF8<>, EncodingType>().Validate(is, os) || !is.IsValid())
+                        {
                             parseErrorCode_ = kPointerParseErrorInvalidPercentEncoding;
                             goto error;
                         }
                         size_t len = os.PutEnd(begin);
                         i += is.Tell() - 1;
-                        if (len == 1)
+                        if(len == 1)
                             c = *name;
-                        else {
+                        else
+                        {
                             name += len;
                             isNumber = false;
                             i++;
                             continue;
                         }
                     }
-                    else if (NeedPercentEncode(c)) {
+                    else if(NeedPercentEncode(c))
+                    {
                         parseErrorCode_ = kPointerParseErrorCharacterMustPercentEncode;
                         goto error;
                     }
@@ -997,44 +1194,53 @@ private:
                 i++;
 
                 // Escaping "~0" -> '~', "~1" -> '/'
-                if (c == '~') {
-                    if (i < length) {
+                if(c == '~')
+                {
+                    if(i < length)
+                    {
                         c = source[i];
-                        if (c == '0')       c = '~';
-                        else if (c == '1')  c = '/';
-                        else {
+                        if(c == '0')
+                            c = '~';
+                        else if(c == '1')
+                            c = '/';
+                        else
+                        {
                             parseErrorCode_ = kPointerParseErrorInvalidEscape;
                             goto error;
                         }
                         i++;
                     }
-                    else {
+                    else
+                    {
                         parseErrorCode_ = kPointerParseErrorInvalidEscape;
                         goto error;
                     }
                 }
 
                 // First check for index: all of characters are digit
-                if (c < '0' || c > '9')
+                if(c < '0' || c > '9')
                     isNumber = false;
 
                 *name++ = c;
             }
             token->length = static_cast<SizeType>(name - token->name);
-            if (token->length == 0)
+            if(token->length == 0)
                 isNumber = false;
             *name++ = '\0'; // Null terminator
 
             // Second check for index: more than one digit cannot have leading zero
-            if (isNumber && token->length > 1 && token->name[0] == '0')
+            if(isNumber && token->length > 1 && token->name[0] == '0')
                 isNumber = false;
 
             // String to SizeType conversion
             SizeType n = 0;
-            if (isNumber) {
-                for (size_t j = 0; j < token->length; j++) {
+            if(isNumber)
+            {
+                for(size_t j = 0; j < token->length; j++)
+                {
                     SizeType m = n * 10 + static_cast<SizeType>(token->name[j] - '0');
-                    if (m < n) {   // overflow detection
+                    if(m < n)
+                    { // overflow detection
                         isNumber = false;
                         break;
                     }
@@ -1052,43 +1258,48 @@ private:
 
     error:
         Allocator::Free(tokens_);
-        nameBuffer_ = 0;
-        tokens_ = 0;
-        tokenCount_ = 0;
+        nameBuffer_       = 0;
+        tokens_           = 0;
+        tokenCount_       = 0;
         parseErrorOffset_ = i;
         return;
     }
 
     //! Stringify to string or URI fragment representation.
     /*!
-        \tparam uriFragment True for stringifying to URI fragment representation. False for string representation.
-        \tparam OutputStream type of output stream.
-        \param os The output stream.
+        \tparam uriFragment True for stringifying to URI fragment representation. False for string
+       representation. \tparam OutputStream type of output stream. \param os The output stream.
     */
-    template<bool uriFragment, typename OutputStream>
-    bool Stringify(OutputStream& os) const {
+    template <bool uriFragment, typename OutputStream>
+    bool Stringify(OutputStream& os) const
+    {
         RAPIDJSON_ASSERT(IsValid());
 
-        if (uriFragment)
+        if(uriFragment)
             os.Put('#');
 
-        for (Token *t = tokens_; t != tokens_ + tokenCount_; ++t) {
+        for(Token* t = tokens_; t != tokens_ + tokenCount_; ++t)
+        {
             os.Put('/');
-            for (size_t j = 0; j < t->length; j++) {
+            for(size_t j = 0; j < t->length; j++)
+            {
                 Ch c = t->name[j];
-                if (c == '~') {
+                if(c == '~')
+                {
                     os.Put('~');
                     os.Put('0');
                 }
-                else if (c == '/') {
+                else if(c == '/')
+                {
                     os.Put('~');
                     os.Put('1');
                 }
-                else if (uriFragment && NeedPercentEncode(c)) { 
+                else if(uriFragment && NeedPercentEncode(c))
+                {
                     // Transcode to UTF8 sequence
                     GenericStringStream<typename ValueType::EncodingType> source(&t->name[j]);
                     PercentEncodeStream<OutputStream> target(os);
-                    if (!Transcoder<EncodingType, UTF8<> >().Validate(source, target))
+                    if(!Transcoder<EncodingType, UTF8<>>().Validate(source, target))
                         return false;
                     j += source.Tell() - 1;
                 }
@@ -1102,11 +1313,12 @@ private:
     //! A helper stream for decoding a percent-encoded sequence into code unit.
     /*!
         This stream decodes %XY triplet into code unit (0-255).
-        If it encounters invalid characters, it sets output code unit as 0 and 
+        If it encounters invalid characters, it sets output code unit as 0 and
         mark invalid, and to be checked by IsValid().
     */
-    class PercentDecodeStream {
-    public:
+    class PercentDecodeStream
+    {
+        public:
         typedef typename ValueType::Ch Ch;
 
         //! Constructor
@@ -1114,22 +1326,32 @@ private:
             \param source Start of the stream
             \param end Past-the-end of the stream.
         */
-        PercentDecodeStream(const Ch* source, const Ch* end) : src_(source), head_(source), end_(end), valid_(true) {}
+        PercentDecodeStream(const Ch* source, const Ch* end)
+            : src_(source), head_(source), end_(end), valid_(true)
+        {
+        }
 
-        Ch Take() {
-            if (*src_ != '%' || src_ + 3 > end_) { // %XY triplet
+        Ch Take()
+        {
+            if(*src_ != '%' || src_ + 3 > end_)
+            { // %XY triplet
                 valid_ = false;
                 return 0;
             }
             src_++;
             Ch c = 0;
-            for (int j = 0; j < 2; j++) {
-                c = static_cast<Ch>(c << 4);
+            for(int j = 0; j < 2; j++)
+            {
+                c    = static_cast<Ch>(c << 4);
                 Ch h = *src_;
-                if      (h >= '0' && h <= '9') c = static_cast<Ch>(c + h - '0');
-                else if (h >= 'A' && h <= 'F') c = static_cast<Ch>(c + h - 'A' + 10);
-                else if (h >= 'a' && h <= 'f') c = static_cast<Ch>(c + h - 'a' + 10);
-                else {
+                if(h >= '0' && h <= '9')
+                    c = static_cast<Ch>(c + h - '0');
+                else if(h >= 'A' && h <= 'F')
+                    c = static_cast<Ch>(c + h - 'A' + 10);
+                else if(h >= 'a' && h <= 'f')
+                    c = static_cast<Ch>(c + h - 'a' + 10);
+                else
+                {
                     valid_ = false;
                     return 0;
                 }
@@ -1141,36 +1363,41 @@ private:
         size_t Tell() const { return static_cast<size_t>(src_ - head_); }
         bool IsValid() const { return valid_; }
 
-    private:
-        const Ch* src_;     //!< Current read position.
-        const Ch* head_;    //!< Original head of the string.
-        const Ch* end_;     //!< Past-the-end position.
-        bool valid_;        //!< Whether the parsing is valid.
+        private:
+        const Ch* src_;  //!< Current read position.
+        const Ch* head_; //!< Original head of the string.
+        const Ch* end_;  //!< Past-the-end position.
+        bool valid_;     //!< Whether the parsing is valid.
     };
 
     //! A helper stream to encode character (UTF-8 code unit) into percent-encoded sequence.
     template <typename OutputStream>
-    class PercentEncodeStream {
-    public:
+    class PercentEncodeStream
+    {
+        public:
         PercentEncodeStream(OutputStream& os) : os_(os) {}
-        void Put(char c) { // UTF-8 must be byte
-            unsigned char u = static_cast<unsigned char>(c);
-            static const char hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+        void Put(char c)
+        { // UTF-8 must be byte
+            unsigned char u                 = static_cast<unsigned char>(c);
+            static const char hexDigits[16] = {
+                '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
             os_.Put('%');
             os_.Put(static_cast<typename OutputStream::Ch>(hexDigits[u >> 4]));
             os_.Put(static_cast<typename OutputStream::Ch>(hexDigits[u & 15]));
         }
-    private:
+
+        private:
         OutputStream& os_;
     };
 
-    Allocator* allocator_;                  //!< The current allocator. It is either user-supplied or equal to ownAllocator_.
-    Allocator* ownAllocator_;               //!< Allocator owned by this Pointer.
-    Ch* nameBuffer_;                        //!< A buffer containing all names in tokens.
-    Token* tokens_;                         //!< A list of tokens.
-    size_t tokenCount_;                     //!< Number of tokens in tokens_.
-    size_t parseErrorOffset_;               //!< Offset in code unit when parsing fail.
-    PointerParseErrorCode parseErrorCode_;  //!< Parsing error code.
+    Allocator* allocator_;    //!< The current allocator. It is either user-supplied or equal to
+                              //!< ownAllocator_.
+    Allocator* ownAllocator_; //!< Allocator owned by this Pointer.
+    Ch* nameBuffer_;          //!< A buffer containing all names in tokens.
+    Token* tokens_;           //!< A list of tokens.
+    size_t tokenCount_;       //!< Number of tokens in tokens_.
+    size_t parseErrorOffset_; //!< Offset in code unit when parsing fail.
+    PointerParseErrorCode parseErrorCode_; //!< Parsing error code.
 };
 
 //! GenericPointer for Value (UTF-8, default allocator).
@@ -1182,292 +1409,487 @@ typedef GenericPointer<Value> Pointer;
 //////////////////////////////////////////////////////////////////////////////
 
 template <typename T>
-typename T::ValueType& CreateValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::AllocatorType& a) {
+typename T::ValueType& CreateValueByPointer(T& root,
+                                            const GenericPointer<typename T::ValueType>& pointer,
+                                            typename T::AllocatorType& a)
+{
     return pointer.Create(root, a);
 }
 
 template <typename T, typename CharType, size_t N>
-typename T::ValueType& CreateValueByPointer(T& root, const CharType(&source)[N], typename T::AllocatorType& a) {
+typename T::ValueType&
+CreateValueByPointer(T& root, const CharType (&source)[N], typename T::AllocatorType& a)
+{
     return GenericPointer<typename T::ValueType>(source, N - 1).Create(root, a);
 }
 
 // No allocator parameter
 
 template <typename DocumentType>
-typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer) {
+typename DocumentType::ValueType&
+CreateValueByPointer(DocumentType& document,
+                     const GenericPointer<typename DocumentType::ValueType>& pointer)
+{
     return pointer.Create(document);
 }
 
 template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document, const CharType(&source)[N]) {
+typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document,
+                                                       const CharType (&source)[N])
+{
     return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Create(document);
 }
 
 //////////////////////////////////////////////////////////////////////////////
 
 template <typename T>
-typename T::ValueType* GetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, size_t* unresolvedTokenIndex = 0) {
+typename T::ValueType* GetValueByPointer(T& root,
+                                         const GenericPointer<typename T::ValueType>& pointer,
+                                         size_t* unresolvedTokenIndex = 0)
+{
     return pointer.Get(root, unresolvedTokenIndex);
 }
 
 template <typename T>
-const typename T::ValueType* GetValueByPointer(const T& root, const GenericPointer<typename T::ValueType>& pointer, size_t* unresolvedTokenIndex = 0) {
+const typename T::ValueType* GetValueByPointer(const T& root,
+                                               const GenericPointer<typename T::ValueType>& pointer,
+                                               size_t* unresolvedTokenIndex = 0)
+{
     return pointer.Get(root, unresolvedTokenIndex);
 }
 
 template <typename T, typename CharType, size_t N>
-typename T::ValueType* GetValueByPointer(T& root, const CharType (&source)[N], size_t* unresolvedTokenIndex = 0) {
+typename T::ValueType*
+GetValueByPointer(T& root, const CharType (&source)[N], size_t* unresolvedTokenIndex = 0)
+{
     return GenericPointer<typename T::ValueType>(source, N - 1).Get(root, unresolvedTokenIndex);
 }
 
 template <typename T, typename CharType, size_t N>
-const typename T::ValueType* GetValueByPointer(const T& root, const CharType(&source)[N], size_t* unresolvedTokenIndex = 0) {
+const typename T::ValueType*
+GetValueByPointer(const T& root, const CharType (&source)[N], size_t* unresolvedTokenIndex = 0)
+{
     return GenericPointer<typename T::ValueType>(source, N - 1).Get(root, unresolvedTokenIndex);
 }
 
 //////////////////////////////////////////////////////////////////////////////
 
 template <typename T>
-typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::ValueType& defaultValue, typename T::AllocatorType& a) {
+typename T::ValueType&
+GetValueByPointerWithDefault(T& root,
+                             const GenericPointer<typename T::ValueType>& pointer,
+                             const typename T::ValueType& defaultValue,
+                             typename T::AllocatorType& a)
+{
     return pointer.GetWithDefault(root, defaultValue, a);
 }
 
 template <typename T>
-typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::Ch* defaultValue, typename T::AllocatorType& a) {
+typename T::ValueType&
+GetValueByPointerWithDefault(T& root,
+                             const GenericPointer<typename T::ValueType>& pointer,
+                             const typename T::Ch* defaultValue,
+                             typename T::AllocatorType& a)
+{
     return pointer.GetWithDefault(root, defaultValue, a);
 }
 
 #if RAPIDJSON_HAS_STDSTRING
 template <typename T>
-typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const std::basic_string<typename T::Ch>& defaultValue, typename T::AllocatorType& a) {
+typename T::ValueType&
+GetValueByPointerWithDefault(T& root,
+                             const GenericPointer<typename T::ValueType>& pointer,
+                             const std::basic_string<typename T::Ch>& defaultValue,
+                             typename T::AllocatorType& a)
+{
     return pointer.GetWithDefault(root, defaultValue, a);
 }
 #endif
 
 template <typename T, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
-GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, T2 defaultValue, typename T::AllocatorType& a) {
+RAPIDJSON_DISABLEIF_RETURN(
+    (internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2>>),
+    (typename T::ValueType&))
+GetValueByPointerWithDefault(T& root,
+                             const GenericPointer<typename T::ValueType>& pointer,
+                             T2 defaultValue,
+                             typename T::AllocatorType& a)
+{
     return pointer.GetWithDefault(root, defaultValue, a);
 }
 
 template <typename T, typename CharType, size_t N>
-typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const typename T::ValueType& defaultValue, typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
+typename T::ValueType& GetValueByPointerWithDefault(T& root,
+                                                    const CharType (&source)[N],
+                                                    const typename T::ValueType& defaultValue,
+                                                    typename T::AllocatorType& a)
+{
+    return GenericPointer<typename T::ValueType>(source, N - 1)
+        .GetWithDefault(root, defaultValue, a);
 }
 
 template <typename T, typename CharType, size_t N>
-typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const typename T::Ch* defaultValue, typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
+typename T::ValueType& GetValueByPointerWithDefault(T& root,
+                                                    const CharType (&source)[N],
+                                                    const typename T::Ch* defaultValue,
+                                                    typename T::AllocatorType& a)
+{
+    return GenericPointer<typename T::ValueType>(source, N - 1)
+        .GetWithDefault(root, defaultValue, a);
 }
 
 #if RAPIDJSON_HAS_STDSTRING
 template <typename T, typename CharType, size_t N>
-typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const std::basic_string<typename T::Ch>& defaultValue, typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
+typename T::ValueType&
+GetValueByPointerWithDefault(T& root,
+                             const CharType (&source)[N],
+                             const std::basic_string<typename T::Ch>& defaultValue,
+                             typename T::AllocatorType& a)
+{
+    return GenericPointer<typename T::ValueType>(source, N - 1)
+        .GetWithDefault(root, defaultValue, a);
 }
 #endif
 
 template <typename T, typename CharType, size_t N, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
-GetValueByPointerWithDefault(T& root, const CharType(&source)[N], T2 defaultValue, typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
+RAPIDJSON_DISABLEIF_RETURN(
+    (internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2>>),
+    (typename T::ValueType&))
+GetValueByPointerWithDefault(T& root,
+                             const CharType (&source)[N],
+                             T2 defaultValue,
+                             typename T::AllocatorType& a)
+{
+    return GenericPointer<typename T::ValueType>(source, N - 1)
+        .GetWithDefault(root, defaultValue, a);
 }
 
 // No allocator parameter
 
 template <typename DocumentType>
-typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::ValueType& defaultValue) {
+typename DocumentType::ValueType&
+GetValueByPointerWithDefault(DocumentType& document,
+                             const GenericPointer<typename DocumentType::ValueType>& pointer,
+                             const typename DocumentType::ValueType& defaultValue)
+{
     return pointer.GetWithDefault(document, defaultValue);
 }
 
 template <typename DocumentType>
-typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::Ch* defaultValue) {
+typename DocumentType::ValueType&
+GetValueByPointerWithDefault(DocumentType& document,
+                             const GenericPointer<typename DocumentType::ValueType>& pointer,
+                             const typename DocumentType::Ch* defaultValue)
+{
     return pointer.GetWithDefault(document, defaultValue);
 }
 
 #if RAPIDJSON_HAS_STDSTRING
 template <typename DocumentType>
-typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const std::basic_string<typename DocumentType::Ch>& defaultValue) {
+typename DocumentType::ValueType&
+GetValueByPointerWithDefault(DocumentType& document,
+                             const GenericPointer<typename DocumentType::ValueType>& pointer,
+                             const std::basic_string<typename DocumentType::Ch>& defaultValue)
+{
     return pointer.GetWithDefault(document, defaultValue);
 }
 #endif
 
 template <typename DocumentType, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
-GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, T2 defaultValue) {
+RAPIDJSON_DISABLEIF_RETURN(
+    (internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2>>),
+    (typename DocumentType::ValueType&))
+GetValueByPointerWithDefault(DocumentType& document,
+                             const GenericPointer<typename DocumentType::ValueType>& pointer,
+                             T2 defaultValue)
+{
     return pointer.GetWithDefault(document, defaultValue);
 }
 
 template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const typename DocumentType::ValueType& defaultValue) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
+typename DocumentType::ValueType&
+GetValueByPointerWithDefault(DocumentType& document,
+                             const CharType (&source)[N],
+                             const typename DocumentType::ValueType& defaultValue)
+{
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1)
+        .GetWithDefault(document, defaultValue);
 }
 
 template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const typename DocumentType::Ch* defaultValue) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
+typename DocumentType::ValueType&
+GetValueByPointerWithDefault(DocumentType& document,
+                             const CharType (&source)[N],
+                             const typename DocumentType::Ch* defaultValue)
+{
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1)
+        .GetWithDefault(document, defaultValue);
 }
 
 #if RAPIDJSON_HAS_STDSTRING
 template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const std::basic_string<typename DocumentType::Ch>& defaultValue) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
+typename DocumentType::ValueType&
+GetValueByPointerWithDefault(DocumentType& document,
+                             const CharType (&source)[N],
+                             const std::basic_string<typename DocumentType::Ch>& defaultValue)
+{
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1)
+        .GetWithDefault(document, defaultValue);
 }
 #endif
 
 template <typename DocumentType, typename CharType, size_t N, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
-GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], T2 defaultValue) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
+RAPIDJSON_DISABLEIF_RETURN(
+    (internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2>>),
+    (typename DocumentType::ValueType&))
+GetValueByPointerWithDefault(DocumentType& document, const CharType (&source)[N], T2 defaultValue)
+{
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1)
+        .GetWithDefault(document, defaultValue);
 }
 
 //////////////////////////////////////////////////////////////////////////////
 
 template <typename T>
-typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::ValueType& value, typename T::AllocatorType& a) {
+typename T::ValueType& SetValueByPointer(T& root,
+                                         const GenericPointer<typename T::ValueType>& pointer,
+                                         typename T::ValueType& value,
+                                         typename T::AllocatorType& a)
+{
     return pointer.Set(root, value, a);
 }
 
 template <typename T>
-typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::ValueType& value, typename T::AllocatorType& a) {
+typename T::ValueType& SetValueByPointer(T& root,
+                                         const GenericPointer<typename T::ValueType>& pointer,
+                                         const typename T::ValueType& value,
+                                         typename T::AllocatorType& a)
+{
     return pointer.Set(root, value, a);
 }
 
 template <typename T>
-typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::Ch* value, typename T::AllocatorType& a) {
+typename T::ValueType& SetValueByPointer(T& root,
+                                         const GenericPointer<typename T::ValueType>& pointer,
+                                         const typename T::Ch* value,
+                                         typename T::AllocatorType& a)
+{
     return pointer.Set(root, value, a);
 }
 
 #if RAPIDJSON_HAS_STDSTRING
 template <typename T>
-typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const std::basic_string<typename T::Ch>& value, typename T::AllocatorType& a) {
+typename T::ValueType& SetValueByPointer(T& root,
+                                         const GenericPointer<typename T::ValueType>& pointer,
+                                         const std::basic_string<typename T::Ch>& value,
+                                         typename T::AllocatorType& a)
+{
     return pointer.Set(root, value, a);
 }
 #endif
 
 template <typename T, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
-SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, T2 value, typename T::AllocatorType& a) {
+RAPIDJSON_DISABLEIF_RETURN(
+    (internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2>>),
+    (typename T::ValueType&))
+SetValueByPointer(T& root,
+                  const GenericPointer<typename T::ValueType>& pointer,
+                  T2 value,
+                  typename T::AllocatorType& a)
+{
     return pointer.Set(root, value, a);
 }
 
 template <typename T, typename CharType, size_t N>
-typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], typename T::ValueType& value, typename T::AllocatorType& a) {
+typename T::ValueType& SetValueByPointer(T& root,
+                                         const CharType (&source)[N],
+                                         typename T::ValueType& value,
+                                         typename T::AllocatorType& a)
+{
     return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
 }
 
 template <typename T, typename CharType, size_t N>
-typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const typename T::ValueType& value, typename T::AllocatorType& a) {
+typename T::ValueType& SetValueByPointer(T& root,
+                                         const CharType (&source)[N],
+                                         const typename T::ValueType& value,
+                                         typename T::AllocatorType& a)
+{
     return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
 }
 
 template <typename T, typename CharType, size_t N>
-typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const typename T::Ch* value, typename T::AllocatorType& a) {
+typename T::ValueType& SetValueByPointer(T& root,
+                                         const CharType (&source)[N],
+                                         const typename T::Ch* value,
+                                         typename T::AllocatorType& a)
+{
     return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
 }
 
 #if RAPIDJSON_HAS_STDSTRING
 template <typename T, typename CharType, size_t N>
-typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const std::basic_string<typename T::Ch>& value, typename T::AllocatorType& a) {
+typename T::ValueType& SetValueByPointer(T& root,
+                                         const CharType (&source)[N],
+                                         const std::basic_string<typename T::Ch>& value,
+                                         typename T::AllocatorType& a)
+{
     return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
 }
 #endif
 
 template <typename T, typename CharType, size_t N, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
-SetValueByPointer(T& root, const CharType(&source)[N], T2 value, typename T::AllocatorType& a) {
+RAPIDJSON_DISABLEIF_RETURN(
+    (internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2>>),
+    (typename T::ValueType&))
+SetValueByPointer(T& root, const CharType (&source)[N], T2 value, typename T::AllocatorType& a)
+{
     return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
 }
 
 // No allocator parameter
 
 template <typename DocumentType>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, typename DocumentType::ValueType& value) {
+typename DocumentType::ValueType&
+SetValueByPointer(DocumentType& document,
+                  const GenericPointer<typename DocumentType::ValueType>& pointer,
+                  typename DocumentType::ValueType& value)
+{
     return pointer.Set(document, value);
 }
 
 template <typename DocumentType>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::ValueType& value) {
+typename DocumentType::ValueType&
+SetValueByPointer(DocumentType& document,
+                  const GenericPointer<typename DocumentType::ValueType>& pointer,
+                  const typename DocumentType::ValueType& value)
+{
     return pointer.Set(document, value);
 }
 
 template <typename DocumentType>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::Ch* value) {
+typename DocumentType::ValueType&
+SetValueByPointer(DocumentType& document,
+                  const GenericPointer<typename DocumentType::ValueType>& pointer,
+                  const typename DocumentType::Ch* value)
+{
     return pointer.Set(document, value);
 }
 
 #if RAPIDJSON_HAS_STDSTRING
 template <typename DocumentType>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const std::basic_string<typename DocumentType::Ch>& value) {
+typename DocumentType::ValueType&
+SetValueByPointer(DocumentType& document,
+                  const GenericPointer<typename DocumentType::ValueType>& pointer,
+                  const std::basic_string<typename DocumentType::Ch>& value)
+{
     return pointer.Set(document, value);
 }
 #endif
 
 template <typename DocumentType, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
-SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, T2 value) {
+RAPIDJSON_DISABLEIF_RETURN(
+    (internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2>>),
+    (typename DocumentType::ValueType&))
+SetValueByPointer(DocumentType& document,
+                  const GenericPointer<typename DocumentType::ValueType>& pointer,
+                  T2 value)
+{
     return pointer.Set(document, value);
 }
 
 template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], typename DocumentType::ValueType& value) {
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document,
+                                                    const CharType (&source)[N],
+                                                    typename DocumentType::ValueType& value)
+{
     return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
 }
 
 template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const typename DocumentType::ValueType& value) {
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document,
+                                                    const CharType (&source)[N],
+                                                    const typename DocumentType::ValueType& value)
+{
     return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
 }
 
 template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const typename DocumentType::Ch* value) {
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document,
+                                                    const CharType (&source)[N],
+                                                    const typename DocumentType::Ch* value)
+{
     return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
 }
 
 #if RAPIDJSON_HAS_STDSTRING
 template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const std::basic_string<typename DocumentType::Ch>& value) {
+typename DocumentType::ValueType&
+SetValueByPointer(DocumentType& document,
+                  const CharType (&source)[N],
+                  const std::basic_string<typename DocumentType::Ch>& value)
+{
     return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
 }
 #endif
 
 template <typename DocumentType, typename CharType, size_t N, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
-SetValueByPointer(DocumentType& document, const CharType(&source)[N], T2 value) {
+RAPIDJSON_DISABLEIF_RETURN(
+    (internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2>>),
+    (typename DocumentType::ValueType&))
+SetValueByPointer(DocumentType& document, const CharType (&source)[N], T2 value)
+{
     return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
 }
 
 //////////////////////////////////////////////////////////////////////////////
 
 template <typename T>
-typename T::ValueType& SwapValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::ValueType& value, typename T::AllocatorType& a) {
+typename T::ValueType& SwapValueByPointer(T& root,
+                                          const GenericPointer<typename T::ValueType>& pointer,
+                                          typename T::ValueType& value,
+                                          typename T::AllocatorType& a)
+{
     return pointer.Swap(root, value, a);
 }
 
 template <typename T, typename CharType, size_t N>
-typename T::ValueType& SwapValueByPointer(T& root, const CharType(&source)[N], typename T::ValueType& value, typename T::AllocatorType& a) {
+typename T::ValueType& SwapValueByPointer(T& root,
+                                          const CharType (&source)[N],
+                                          typename T::ValueType& value,
+                                          typename T::AllocatorType& a)
+{
     return GenericPointer<typename T::ValueType>(source, N - 1).Swap(root, value, a);
 }
 
 template <typename DocumentType>
-typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, typename DocumentType::ValueType& value) {
+typename DocumentType::ValueType&
+SwapValueByPointer(DocumentType& document,
+                   const GenericPointer<typename DocumentType::ValueType>& pointer,
+                   typename DocumentType::ValueType& value)
+{
     return pointer.Swap(document, value);
 }
 
 template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document, const CharType(&source)[N], typename DocumentType::ValueType& value) {
+typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document,
+                                                     const CharType (&source)[N],
+                                                     typename DocumentType::ValueType& value)
+{
     return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Swap(document, value);
 }
 
 //////////////////////////////////////////////////////////////////////////////
 
 template <typename T>
-bool EraseValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer) {
+bool EraseValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer)
+{
     return pointer.Erase(root);
 }
 
 template <typename T, typename CharType, size_t N>
-bool EraseValueByPointer(T& root, const CharType(&source)[N]) {
+bool EraseValueByPointer(T& root, const CharType (&source)[N])
+{
     return GenericPointer<typename T::ValueType>(source, N - 1).Erase(root);
 }
 
diff --git a/include/rapidjson/prettywriter.h b/include/rapidjson/prettywriter.h
index fe45df1d10..0642c5766c 100644
--- a/include/rapidjson/prettywriter.h
+++ b/include/rapidjson/prettywriter.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_PRETTYWRITER_H_
@@ -24,7 +24,7 @@ RAPIDJSON_DIAG_OFF(effc++)
 
 #if defined(__clang__)
 RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(c++98-compat)
+RAPIDJSON_DIAG_OFF(c++ 98 - compat)
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
@@ -32,8 +32,9 @@ RAPIDJSON_NAMESPACE_BEGIN
 //! Combination of PrettyWriter format flags.
 /*! \see PrettyWriter::SetFormatOptions
  */
-enum PrettyFormatOptions {
-    kFormatDefault = 0,         //!< Default pretty formatting.
+enum PrettyFormatOptions
+{
+    kFormatDefault         = 0, //!< Default pretty formatting.
     kFormatSingleLineArray = 1  //!< Format arrays on a single line.
 };
 
@@ -44,9 +45,15 @@ enum PrettyFormatOptions {
     \tparam TargetEncoding Encoding of output stream.
     \tparam StackAllocator Type of allocator for allocating memory of stack.
 */
-template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags>
-class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator, writeFlags> {
-public:
+template <typename OutputStream,
+          typename SourceEncoding = UTF8<>,
+          typename TargetEncoding = UTF8<>,
+          typename StackAllocator = CrtAllocator,
+          unsigned writeFlags     = kWriteDefaultFlags>
+class PrettyWriter
+    : public Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator, writeFlags>
+{
+    public:
     typedef Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator, writeFlags> Base;
     typedef typename Base::Ch Ch;
 
@@ -55,34 +62,54 @@ public:
         \param allocator User supplied allocator. If it is null, it will create a private one.
         \param levelDepth Initial capacity of stack.
     */
-    explicit PrettyWriter(OutputStream& os, StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : 
-        Base(os, allocator, levelDepth), indentChar_(' '), indentCharCount_(4), formatOptions_(kFormatDefault) {}
+    explicit PrettyWriter(OutputStream& os,
+                          StackAllocator* allocator = 0,
+                          size_t levelDepth         = Base::kDefaultLevelDepth)
+        : Base(os, allocator, levelDepth),
+          indentChar_(' '),
+          indentCharCount_(4),
+          formatOptions_(kFormatDefault)
+    {
+    }
 
-
-    explicit PrettyWriter(StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : 
-        Base(allocator, levelDepth), indentChar_(' '), indentCharCount_(4), formatOptions_(kFormatDefault) {}
+    explicit PrettyWriter(StackAllocator* allocator = 0,
+                          size_t levelDepth         = Base::kDefaultLevelDepth)
+        : Base(allocator, levelDepth),
+          indentChar_(' '),
+          indentCharCount_(4),
+          formatOptions_(kFormatDefault)
+    {
+    }
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
-    PrettyWriter(PrettyWriter&& rhs) :
-        Base(std::forward<PrettyWriter>(rhs)), indentChar_(rhs.indentChar_), indentCharCount_(rhs.indentCharCount_), formatOptions_(rhs.formatOptions_) {}
+    PrettyWriter(PrettyWriter&& rhs)
+        : Base(std::forward<PrettyWriter>(rhs)),
+          indentChar_(rhs.indentChar_),
+          indentCharCount_(rhs.indentCharCount_),
+          formatOptions_(rhs.formatOptions_)
+    {
+    }
 #endif
 
     //! Set custom indentation.
-    /*! \param indentChar       Character for indentation. Must be whitespace character (' ', '\\t', '\\n', '\\r').
-        \param indentCharCount  Number of indent characters for each indentation level.
-        \note The default indentation is 4 spaces.
+    /*! \param indentChar       Character for indentation. Must be whitespace character (' ', '\\t',
+       '\\n', '\\r'). \param indentCharCount  Number of indent characters for each indentation
+       level. \note The default indentation is 4 spaces.
     */
-    PrettyWriter& SetIndent(Ch indentChar, unsigned indentCharCount) {
-        RAPIDJSON_ASSERT(indentChar == ' ' || indentChar == '\t' || indentChar == '\n' || indentChar == '\r');
-        indentChar_ = indentChar;
+    PrettyWriter& SetIndent(Ch indentChar, unsigned indentCharCount)
+    {
+        RAPIDJSON_ASSERT(indentChar == ' ' || indentChar == '\t' || indentChar == '\n' ||
+                         indentChar == '\r');
+        indentChar_      = indentChar;
         indentCharCount_ = indentCharCount;
         return *this;
     }
 
     //! Set pretty writer formatting options.
     /*! \param options Formatting options.
-    */
-    PrettyWriter& SetFormatOptions(PrettyFormatOptions options) {
+     */
+    PrettyWriter& SetFormatOptions(PrettyFormatOptions options)
+    {
         formatOptions_ = options;
         return *this;
     }
@@ -92,22 +119,52 @@ public:
     */
     //@{
 
-    bool Null()                 { PrettyPrefix(kNullType);   return Base::EndValue(Base::WriteNull()); }
-    bool Bool(bool b)           { PrettyPrefix(b ? kTrueType : kFalseType); return Base::EndValue(Base::WriteBool(b)); }
-    bool Int(int i)             { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteInt(i)); }
-    bool Uint(unsigned u)       { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteUint(u)); }
-    bool Int64(int64_t i64)     { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteInt64(i64)); }
-    bool Uint64(uint64_t u64)   { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteUint64(u64));  }
-    bool Double(double d)       { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteDouble(d)); }
+    bool Null()
+    {
+        PrettyPrefix(kNullType);
+        return Base::EndValue(Base::WriteNull());
+    }
+    bool Bool(bool b)
+    {
+        PrettyPrefix(b ? kTrueType : kFalseType);
+        return Base::EndValue(Base::WriteBool(b));
+    }
+    bool Int(int i)
+    {
+        PrettyPrefix(kNumberType);
+        return Base::EndValue(Base::WriteInt(i));
+    }
+    bool Uint(unsigned u)
+    {
+        PrettyPrefix(kNumberType);
+        return Base::EndValue(Base::WriteUint(u));
+    }
+    bool Int64(int64_t i64)
+    {
+        PrettyPrefix(kNumberType);
+        return Base::EndValue(Base::WriteInt64(i64));
+    }
+    bool Uint64(uint64_t u64)
+    {
+        PrettyPrefix(kNumberType);
+        return Base::EndValue(Base::WriteUint64(u64));
+    }
+    bool Double(double d)
+    {
+        PrettyPrefix(kNumberType);
+        return Base::EndValue(Base::WriteDouble(d));
+    }
 
-    bool RawNumber(const Ch* str, SizeType length, bool copy = false) {
+    bool RawNumber(const Ch* str, SizeType length, bool copy = false)
+    {
         RAPIDJSON_ASSERT(str != 0);
         (void)copy;
         PrettyPrefix(kNumberType);
         return Base::EndValue(Base::WriteString(str, length));
     }
 
-    bool String(const Ch* str, SizeType length, bool copy = false) {
+    bool String(const Ch* str, SizeType length, bool copy = false)
+    {
         RAPIDJSON_ASSERT(str != 0);
         (void)copy;
         PrettyPrefix(kStringType);
@@ -115,65 +172,76 @@ public:
     }
 
 #if RAPIDJSON_HAS_STDSTRING
-    bool String(const std::basic_string<Ch>& str) {
+    bool String(const std::basic_string<Ch>& str)
+    {
         return String(str.data(), SizeType(str.size()));
     }
 #endif
 
-    bool StartObject() {
+    bool StartObject()
+    {
         PrettyPrefix(kObjectType);
-        new (Base::level_stack_.template Push<typename Base::Level>()) typename Base::Level(false);
+        new(Base::level_stack_.template Push<typename Base::Level>()) typename Base::Level(false);
         return Base::WriteStartObject();
     }
 
-    bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); }
+    bool Key(const Ch* str, SizeType length, bool copy = false)
+    {
+        return String(str, length, copy);
+    }
 
 #if RAPIDJSON_HAS_STDSTRING
-    bool Key(const std::basic_string<Ch>& str) {
-        return Key(str.data(), SizeType(str.size()));
-    }
+    bool Key(const std::basic_string<Ch>& str) { return Key(str.data(), SizeType(str.size())); }
 #endif
-	
-    bool EndObject(SizeType memberCount = 0) {
+
+    bool EndObject(SizeType memberCount = 0)
+    {
         (void)memberCount;
-        RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); // not inside an Object
-        RAPIDJSON_ASSERT(!Base::level_stack_.template Top<typename Base::Level>()->inArray); // currently inside an Array, not Object
-        RAPIDJSON_ASSERT(0 == Base::level_stack_.template Top<typename Base::Level>()->valueCount % 2); // Object has a Key without a Value
-       
+        RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >=
+                         sizeof(typename Base::Level)); // not inside an Object
+        RAPIDJSON_ASSERT(!Base::level_stack_.template Top<typename Base::Level>()
+                              ->inArray); // currently inside an Array, not Object
+        RAPIDJSON_ASSERT(0 == Base::level_stack_.template Top<typename Base::Level>()->valueCount %
+                                  2); // Object has a Key without a Value
+
         bool empty = Base::level_stack_.template Pop<typename Base::Level>(1)->valueCount == 0;
 
-        if (!empty) {
+        if(!empty)
+        {
             Base::os_->Put('\n');
             WriteIndent();
         }
         bool ret = Base::EndValue(Base::WriteEndObject());
         (void)ret;
         RAPIDJSON_ASSERT(ret == true);
-        if (Base::level_stack_.Empty()) // end of json text
+        if(Base::level_stack_.Empty()) // end of json text
             Base::Flush();
         return true;
     }
 
-    bool StartArray() {
+    bool StartArray()
+    {
         PrettyPrefix(kArrayType);
-        new (Base::level_stack_.template Push<typename Base::Level>()) typename Base::Level(true);
+        new(Base::level_stack_.template Push<typename Base::Level>()) typename Base::Level(true);
         return Base::WriteStartArray();
     }
 
-    bool EndArray(SizeType memberCount = 0) {
+    bool EndArray(SizeType memberCount = 0)
+    {
         (void)memberCount;
         RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level));
         RAPIDJSON_ASSERT(Base::level_stack_.template Top<typename Base::Level>()->inArray);
         bool empty = Base::level_stack_.template Pop<typename Base::Level>(1)->valueCount == 0;
 
-        if (!empty && !(formatOptions_ & kFormatSingleLineArray)) {
+        if(!empty && !(formatOptions_ & kFormatSingleLineArray))
+        {
             Base::os_->Put('\n');
             WriteIndent();
         }
         bool ret = Base::EndValue(Base::WriteEndArray());
         (void)ret;
         RAPIDJSON_ASSERT(ret == true);
-        if (Base::level_stack_.Empty()) // end of json text
+        if(Base::level_stack_.Empty()) // end of json text
             Base::Flush();
         return true;
     }
@@ -193,42 +261,51 @@ public:
     /*!
         For user to write a stringified JSON as a value.
 
-        \param json A well-formed JSON value. It should not contain null character within [0, length - 1] range.
-        \param length Length of the json.
-        \param type Type of the root of json.
-        \note When using PrettyWriter::RawValue(), the result json may not be indented correctly.
+        \param json A well-formed JSON value. It should not contain null character within [0, length
+       - 1] range. \param length Length of the json. \param type Type of the root of json. \note
+       When using PrettyWriter::RawValue(), the result json may not be indented correctly.
     */
-    bool RawValue(const Ch* json, size_t length, Type type) {
+    bool RawValue(const Ch* json, size_t length, Type type)
+    {
         RAPIDJSON_ASSERT(json != 0);
         PrettyPrefix(type);
         return Base::EndValue(Base::WriteRawValue(json, length));
     }
 
-protected:
-    void PrettyPrefix(Type type) {
+    protected:
+    void PrettyPrefix(Type type)
+    {
         (void)type;
-        if (Base::level_stack_.GetSize() != 0) { // this value is not at root
+        if(Base::level_stack_.GetSize() != 0)
+        { // this value is not at root
             typename Base::Level* level = Base::level_stack_.template Top<typename Base::Level>();
 
-            if (level->inArray) {
-                if (level->valueCount > 0) {
+            if(level->inArray)
+            {
+                if(level->valueCount > 0)
+                {
                     Base::os_->Put(','); // add comma if it is not the first element in array
-                    if (formatOptions_ & kFormatSingleLineArray)
+                    if(formatOptions_ & kFormatSingleLineArray)
                         Base::os_->Put(' ');
                 }
 
-                if (!(formatOptions_ & kFormatSingleLineArray)) {
+                if(!(formatOptions_ & kFormatSingleLineArray))
+                {
                     Base::os_->Put('\n');
                     WriteIndent();
                 }
             }
-            else {  // in object
-                if (level->valueCount > 0) {
-                    if (level->valueCount % 2 == 0) {
+            else
+            { // in object
+                if(level->valueCount > 0)
+                {
+                    if(level->valueCount % 2 == 0)
+                    {
                         Base::os_->Put(',');
                         Base::os_->Put('\n');
                     }
-                    else {
+                    else
+                    {
                         Base::os_->Put(':');
                         Base::os_->Put(' ');
                     }
@@ -236,21 +313,25 @@ protected:
                 else
                     Base::os_->Put('\n');
 
-                if (level->valueCount % 2 == 0)
+                if(level->valueCount % 2 == 0)
                     WriteIndent();
             }
-            if (!level->inArray && level->valueCount % 2 == 0)
-                RAPIDJSON_ASSERT(type == kStringType);  // if it's in object, then even number should be a name
+            if(!level->inArray && level->valueCount % 2 == 0)
+                RAPIDJSON_ASSERT(
+                    type == kStringType); // if it's in object, then even number should be a name
             level->valueCount++;
         }
-        else {
-            RAPIDJSON_ASSERT(!Base::hasRoot_);  // Should only has one and only one root.
+        else
+        {
+            RAPIDJSON_ASSERT(!Base::hasRoot_); // Should only has one and only one root.
             Base::hasRoot_ = true;
         }
     }
 
-    void WriteIndent()  {
-        size_t count = (Base::level_stack_.GetSize() / sizeof(typename Base::Level)) * indentCharCount_;
+    void WriteIndent()
+    {
+        size_t count =
+            (Base::level_stack_.GetSize() / sizeof(typename Base::Level)) * indentCharCount_;
         PutN(*Base::os_, static_cast<typename OutputStream::Ch>(indentChar_), count);
     }
 
@@ -258,7 +339,7 @@ protected:
     unsigned indentCharCount_;
     PrettyFormatOptions formatOptions_;
 
-private:
+    private:
     // Prohibit copy constructor & assignment operator.
     PrettyWriter(const PrettyWriter&);
     PrettyWriter& operator=(const PrettyWriter&);
diff --git a/include/rapidjson/rapidjson.h b/include/rapidjson/rapidjson.h
index 247b8e68db..5f7f8cbc16 100644
--- a/include/rapidjson/rapidjson.h
+++ b/include/rapidjson/rapidjson.h
@@ -36,8 +36,8 @@
           different translation units of a single application.
  */
 
-#include <cstdlib>  // malloc(), realloc(), free(), size_t
-#include <cstring>  // memset(), memcpy(), memmove(), memcmp()
+#include <cstdlib> // malloc(), realloc(), free(), size_t
+#include <cstring> // memset(), memcpy(), memmove(), memcmp()
 
 ///////////////////////////////////////////////////////////////////////////////
 // RAPIDJSON_VERSION_STRING
@@ -226,8 +226,8 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 // RAPIDJSON_ENDIAN
-#define RAPIDJSON_LITTLEENDIAN  0   //!< Little endian machine
-#define RAPIDJSON_BIGENDIAN     1   //!< Big endian machine
+#define RAPIDJSON_LITTLEENDIAN 0 //!< Little endian machine
+#define RAPIDJSON_BIGENDIAN 1    //!< Big endian machine
 
 //! Endianness of the machine.
 /*!
@@ -244,41 +244,46 @@
 */
 #ifndef RAPIDJSON_ENDIAN
 // Detect with GCC 4.6's macro
-#  ifdef __BYTE_ORDER__
-#    if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#      define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
-#    elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-#      define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
-#    else
-#      error Unknown machine endianness detected. User needs to define RAPIDJSON_ENDIAN.
-#    endif // __BYTE_ORDER__
+#ifdef __BYTE_ORDER__
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
+#else
+#error Unknown machine endianness detected. User needs to define RAPIDJSON_ENDIAN.
+#endif // __BYTE_ORDER__
 // Detect with GLIBC's endian.h
-#  elif defined(__GLIBC__)
-#    include <endian.h>
-#    if (__BYTE_ORDER == __LITTLE_ENDIAN)
-#      define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
-#    elif (__BYTE_ORDER == __BIG_ENDIAN)
-#      define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
-#    else
-#      error Unknown machine endianness detected. User needs to define RAPIDJSON_ENDIAN.
-#   endif // __GLIBC__
+#elif defined(__GLIBC__)
+#include <endian.h>
+#if(__BYTE_ORDER == __LITTLE_ENDIAN)
+#define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
+#elif(__BYTE_ORDER == __BIG_ENDIAN)
+#define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
+#else
+#error Unknown machine endianness detected. User needs to define RAPIDJSON_ENDIAN.
+#endif // __GLIBC__
 // Detect with _LITTLE_ENDIAN and _BIG_ENDIAN macro
-#  elif defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
-#    define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
-#  elif defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)
-#    define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
+#elif defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
+#define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
+#elif defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)
+#define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
 // Detect with architecture macros
-#  elif defined(__sparc) || defined(__sparc__) || defined(_POWER) || defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || defined(__hpux) || defined(__hppa) || defined(_MIPSEB) || defined(_POWER) || defined(__s390__)
-#    define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
-#  elif defined(__i386__) || defined(__alpha__) || defined(__ia64) || defined(__ia64__) || defined(_M_IX86) || defined(_M_IA64) || defined(_M_ALPHA) || defined(__amd64) || defined(__amd64__) || defined(_M_AMD64) || defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || defined(__bfin__)
-#    define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
-#  elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
-#    define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
-#  elif defined(RAPIDJSON_DOXYGEN_RUNNING)
-#    define RAPIDJSON_ENDIAN
-#  else
-#    error Unknown machine endianness detected. User needs to define RAPIDJSON_ENDIAN.
-#  endif
+#elif defined(__sparc) || defined(__sparc__) || defined(_POWER) || defined(__powerpc__) || \
+    defined(__ppc__) || defined(__ppc64__) || defined(__hpux) || defined(__hppa) ||        \
+    defined(_MIPSEB) || defined(_POWER) || defined(__s390__)
+#define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
+#elif defined(__i386__) || defined(__alpha__) || defined(__ia64) || defined(__ia64__) ||   \
+    defined(_M_IX86) || defined(_M_IA64) || defined(_M_ALPHA) || defined(__amd64) ||       \
+    defined(__amd64__) || defined(_M_AMD64) || defined(__x86_64) || defined(__x86_64__) || \
+    defined(_M_X64) || defined(__bfin__)
+#define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
+#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
+#elif defined(RAPIDJSON_DOXYGEN_RUNNING)
+#define RAPIDJSON_ENDIAN
+#else
+#error Unknown machine endianness detected. User needs to define RAPIDJSON_ENDIAN.
+#endif
 #endif // RAPIDJSON_ENDIAN
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -286,7 +291,8 @@
 
 //! Whether using 64-bit architecture
 #ifndef RAPIDJSON_64BIT
-#if defined(__LP64__) || (defined(__x86_64__) && defined(__ILP32__)) || defined(_WIN64) || defined(__EMSCRIPTEN__)
+#if defined(__LP64__) || (defined(__x86_64__) && defined(__ILP32__)) || defined(_WIN64) || \
+    defined(__EMSCRIPTEN__)
 #define RAPIDJSON_64BIT 1
 #else
 #define RAPIDJSON_64BIT 0
@@ -317,7 +323,8 @@
     Use this macro to define 64-bit constants by a pair of 32-bit integer.
 */
 #ifndef RAPIDJSON_UINT64_C2
-#define RAPIDJSON_UINT64_C2(high32, low32) ((static_cast<uint64_t>(high32) << 32) | static_cast<uint64_t>(low32))
+#define RAPIDJSON_UINT64_C2(high32, low32) \
+    ((static_cast<uint64_t>(high32) << 32) | static_cast<uint64_t>(low32))
 #endif
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -327,12 +334,13 @@
 /*!
     \ingroup RAPIDJSON_CONFIG
 
-    This optimization uses the fact that current X86-64 architecture only implement lower 48-bit virtual address.
-    The higher 16-bit can be used for storing other data.
-    \c GenericValue uses this optimization to reduce its size form 24 bytes to 16 bytes in 64-bit architecture.
+    This optimization uses the fact that current X86-64 architecture only implement lower 48-bit
+   virtual address. The higher 16-bit can be used for storing other data. \c GenericValue uses this
+   optimization to reduce its size form 24 bytes to 16 bytes in 64-bit architecture.
 */
 #ifndef RAPIDJSON_48BITPOINTER_OPTIMIZATION
-#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
+#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || \
+    defined(_M_X64) || defined(_M_AMD64)
 #define RAPIDJSON_48BITPOINTER_OPTIMIZATION 1
 #else
 #define RAPIDJSON_48BITPOINTER_OPTIMIZATION 0
@@ -343,8 +351,14 @@
 #if RAPIDJSON_64BIT != 1
 #error RAPIDJSON_48BITPOINTER_OPTIMIZATION can only be set to 1 when RAPIDJSON_64BIT=1
 #endif
-#define RAPIDJSON_SETPOINTER(type, p, x) (p = reinterpret_cast<type *>((reinterpret_cast<uintptr_t>(p) & static_cast<uintptr_t>(RAPIDJSON_UINT64_C2(0xFFFF0000, 0x00000000))) | reinterpret_cast<uintptr_t>(reinterpret_cast<const void*>(x))))
-#define RAPIDJSON_GETPOINTER(type, p) (reinterpret_cast<type *>(reinterpret_cast<uintptr_t>(p) & static_cast<uintptr_t>(RAPIDJSON_UINT64_C2(0x0000FFFF, 0xFFFFFFFF))))
+#define RAPIDJSON_SETPOINTER(type, p, x)                                         \
+    (p = reinterpret_cast<type*>(                                                \
+         (reinterpret_cast<uintptr_t>(p) &                                       \
+          static_cast<uintptr_t>(RAPIDJSON_UINT64_C2(0xFFFF0000, 0x00000000))) | \
+         reinterpret_cast<uintptr_t>(reinterpret_cast<const void*>(x))))
+#define RAPIDJSON_GETPOINTER(type, p)                         \
+    (reinterpret_cast<type*>(reinterpret_cast<uintptr_t>(p) & \
+                             static_cast<uintptr_t>(RAPIDJSON_UINT64_C2(0x0000FFFF, 0xFFFFFFFF))))
 #else
 #define RAPIDJSON_SETPOINTER(type, p, x) (p = (x))
 #define RAPIDJSON_GETPOINTER(type, p) (p)
@@ -379,8 +393,8 @@
     If any of these symbols is defined, RapidJSON defines the macro
     \c RAPIDJSON_SIMD to indicate the availability of the optimized code.
 */
-#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) \
-    || defined(RAPIDJSON_NEON) || defined(RAPIDJSON_DOXYGEN_RUNNING)
+#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) || defined(RAPIDJSON_NEON) || \
+    defined(RAPIDJSON_DOXYGEN_RUNNING)
 #define RAPIDJSON_SIMD
 #endif
 
@@ -442,9 +456,8 @@ RAPIDJSON_NAMESPACE_END
 
 // Prefer C++11 static_assert, if available
 #ifndef RAPIDJSON_STATIC_ASSERT
-#if RAPIDJSON_CPLUSPLUS >= 201103L || ( defined(_MSC_VER) && _MSC_VER >= 1800 )
-#define RAPIDJSON_STATIC_ASSERT(x) \
-   static_assert(x, RAPIDJSON_STRINGIFY(x))
+#if RAPIDJSON_CPLUSPLUS >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800)
+#define RAPIDJSON_STATIC_ASSERT(x) static_assert(x, RAPIDJSON_STRINGIFY(x))
 #endif // C++11
 #endif // RAPIDJSON_STATIC_ASSERT
 
@@ -454,15 +467,26 @@ RAPIDJSON_NAMESPACE_END
 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
 #endif
 RAPIDJSON_NAMESPACE_BEGIN
-template <bool x> struct STATIC_ASSERTION_FAILURE;
-template <> struct STATIC_ASSERTION_FAILURE<true> { enum { value = 1 }; };
-template <size_t x> struct StaticAssertTest {};
+template <bool x>
+struct STATIC_ASSERTION_FAILURE;
+template <>
+struct STATIC_ASSERTION_FAILURE<true>
+{
+    enum
+    {
+        value = 1
+    };
+};
+template <size_t x>
+struct StaticAssertTest
+{
+};
 RAPIDJSON_NAMESPACE_END
 
 #if defined(__GNUC__) || defined(__clang__)
 #define RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE __attribute__((unused))
 #else
-#define RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE 
+#define RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE
 #endif
 #ifndef __clang__
 //!@endcond
@@ -473,9 +497,9 @@ RAPIDJSON_NAMESPACE_END
     \param x compile-time condition
     \hideinitializer
  */
-#define RAPIDJSON_STATIC_ASSERT(x) \
-    typedef ::RAPIDJSON_NAMESPACE::StaticAssertTest< \
-      sizeof(::RAPIDJSON_NAMESPACE::STATIC_ASSERTION_FAILURE<bool(x) >)> \
+#define RAPIDJSON_STATIC_ASSERT(x)                                 \
+    typedef ::RAPIDJSON_NAMESPACE::StaticAssertTest<sizeof(        \
+        ::RAPIDJSON_NAMESPACE::STATIC_ASSERTION_FAILURE<bool(x)>)> \
     RAPIDJSON_JOIN(StaticAssertTypedef, __LINE__) RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE
 #endif // RAPIDJSON_STATIC_ASSERT
 
@@ -513,13 +537,15 @@ RAPIDJSON_NAMESPACE_END
 
 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
 
-#define RAPIDJSON_MULTILINEMACRO_BEGIN do {
+#define RAPIDJSON_MULTILINEMACRO_BEGIN \
+    do                                 \
+    {
 #define RAPIDJSON_MULTILINEMACRO_END \
-} while((void)0, 0)
+    }                                \
+    while((void)0, 0)
 
 // adopted from Boost
-#define RAPIDJSON_VERSION_CODE(x,y,z) \
-  (((x)*100000) + ((y)*100) + (z))
+#define RAPIDJSON_VERSION_CODE(x, y, z) (((x) * 100000) + ((y) * 100) + (z))
 
 #if defined(__has_builtin)
 #define RAPIDJSON_HAS_BUILTIN(x) __has_builtin(x)
@@ -531,24 +557,25 @@ RAPIDJSON_NAMESPACE_END
 // RAPIDJSON_DIAG_PUSH/POP, RAPIDJSON_DIAG_OFF
 
 #if defined(__GNUC__)
-#define RAPIDJSON_GNUC \
-    RAPIDJSON_VERSION_CODE(__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__)
+#define RAPIDJSON_GNUC RAPIDJSON_VERSION_CODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
 #endif
 
-#if defined(__clang__) || (defined(RAPIDJSON_GNUC) && RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,2,0))
+#if defined(__clang__) || \
+    (defined(RAPIDJSON_GNUC) && RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4, 2, 0))
 
 #define RAPIDJSON_PRAGMA(x) _Pragma(RAPIDJSON_STRINGIFY(x))
 #define RAPIDJSON_DIAG_PRAGMA(x) RAPIDJSON_PRAGMA(GCC diagnostic x)
 #define RAPIDJSON_DIAG_OFF(x) \
-    RAPIDJSON_DIAG_PRAGMA(ignored RAPIDJSON_STRINGIFY(RAPIDJSON_JOIN(-W,x)))
+    RAPIDJSON_DIAG_PRAGMA(ignored RAPIDJSON_STRINGIFY(RAPIDJSON_JOIN(-W, x)))
 
 // push/pop support in Clang and GCC>=4.6
-#if defined(__clang__) || (defined(RAPIDJSON_GNUC) && RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0))
+#if defined(__clang__) || \
+    (defined(RAPIDJSON_GNUC) && RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4, 6, 0))
 #define RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PRAGMA(push)
-#define RAPIDJSON_DIAG_POP  RAPIDJSON_DIAG_PRAGMA(pop)
-#else // GCC >= 4.2, < 4.6
+#define RAPIDJSON_DIAG_POP RAPIDJSON_DIAG_PRAGMA(pop)
+#else                       // GCC >= 4.2, < 4.6
 #define RAPIDJSON_DIAG_PUSH /* ignored */
-#define RAPIDJSON_DIAG_POP /* ignored */
+#define RAPIDJSON_DIAG_POP  /* ignored */
 #endif
 
 #elif defined(_MSC_VER)
@@ -557,9 +584,9 @@ RAPIDJSON_NAMESPACE_END
 #define RAPIDJSON_PRAGMA(x) __pragma(x)
 #define RAPIDJSON_DIAG_PRAGMA(x) RAPIDJSON_PRAGMA(warning(x))
 
-#define RAPIDJSON_DIAG_OFF(x) RAPIDJSON_DIAG_PRAGMA(disable: x)
+#define RAPIDJSON_DIAG_OFF(x) RAPIDJSON_DIAG_PRAGMA(disable : x)
 #define RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PRAGMA(push)
-#define RAPIDJSON_DIAG_POP  RAPIDJSON_DIAG_PRAGMA(pop)
+#define RAPIDJSON_DIAG_POP RAPIDJSON_DIAG_PRAGMA(pop)
 
 #else
 
@@ -580,15 +607,16 @@ RAPIDJSON_NAMESPACE_END
 #if RAPIDJSON_HAS_CXX11
 #define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1
 #elif defined(__clang__)
-#if __has_feature(cxx_rvalue_references) && \
-    (defined(_MSC_VER) || defined(_LIBCPP_VERSION) || defined(__GLIBCXX__) && __GLIBCXX__ >= 20080306)
+#if __has_feature(cxx_rvalue_references) && (defined(_MSC_VER) || defined(_LIBCPP_VERSION) || \
+                                             defined(__GLIBCXX__) && __GLIBCXX__ >= 20080306)
 #define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1
 #else
 #define RAPIDJSON_HAS_CXX11_RVALUE_REFS 0
 #endif
-#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,3,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \
-      (defined(_MSC_VER) && _MSC_VER >= 1600) || \
-      (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x5140 && defined(__GXX_EXPERIMENTAL_CXX0X__))
+#elif(defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4, 3, 0)) && \
+      defined(__GXX_EXPERIMENTAL_CXX0X__)) ||                                           \
+    (defined(_MSC_VER) && _MSC_VER >= 1600) ||                                          \
+    (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x5140 && defined(__GXX_EXPERIMENTAL_CXX0X__))
 
 #define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1
 #else
@@ -605,8 +633,9 @@ RAPIDJSON_NAMESPACE_END
 #define RAPIDJSON_HAS_CXX11_NOEXCEPT 1
 #elif defined(__clang__)
 #define RAPIDJSON_HAS_CXX11_NOEXCEPT __has_feature(cxx_noexcept)
-#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \
-    (defined(_MSC_VER) && _MSC_VER >= 1900) || \
+#elif(defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4, 6, 0)) && \
+      defined(__GXX_EXPERIMENTAL_CXX0X__)) ||                                           \
+    (defined(_MSC_VER) && _MSC_VER >= 1900) ||                                          \
     (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x5140 && defined(__GXX_EXPERIMENTAL_CXX0X__))
 #define RAPIDJSON_HAS_CXX11_NOEXCEPT 1
 #else
@@ -623,7 +652,7 @@ RAPIDJSON_NAMESPACE_END
 
 // no automatic detection, yet
 #ifndef RAPIDJSON_HAS_CXX11_TYPETRAITS
-#if (defined(_MSC_VER) && _MSC_VER >= 1700)
+#if(defined(_MSC_VER) && _MSC_VER >= 1700)
 #define RAPIDJSON_HAS_CXX11_TYPETRAITS 1
 #else
 #define RAPIDJSON_HAS_CXX11_TYPETRAITS 0
@@ -633,9 +662,10 @@ RAPIDJSON_NAMESPACE_END
 #ifndef RAPIDJSON_HAS_CXX11_RANGE_FOR
 #if defined(__clang__)
 #define RAPIDJSON_HAS_CXX11_RANGE_FOR __has_feature(cxx_range_for)
-#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \
-      (defined(_MSC_VER) && _MSC_VER >= 1700) || \
-      (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x5140 && defined(__GXX_EXPERIMENTAL_CXX0X__))
+#elif(defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4, 6, 0)) && \
+      defined(__GXX_EXPERIMENTAL_CXX0X__)) ||                                           \
+    (defined(_MSC_VER) && _MSC_VER >= 1700) ||                                          \
+    (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x5140 && defined(__GXX_EXPERIMENTAL_CXX0X__))
 #define RAPIDJSON_HAS_CXX11_RANGE_FOR 1
 #else
 #define RAPIDJSON_HAS_CXX11_RANGE_FOR 0
@@ -650,31 +680,31 @@ RAPIDJSON_NAMESPACE_END
 #endif
 
 #if RAPIDJSON_HAS_CXX17
-# define RAPIDJSON_DELIBERATE_FALLTHROUGH [[fallthrough]]
+#define RAPIDJSON_DELIBERATE_FALLTHROUGH [[fallthrough]]
 #elif defined(__has_cpp_attribute)
-# if __has_cpp_attribute(clang::fallthrough)
-#  define RAPIDJSON_DELIBERATE_FALLTHROUGH [[clang::fallthrough]]
-# elif __has_cpp_attribute(fallthrough)
-#  define RAPIDJSON_DELIBERATE_FALLTHROUGH __attribute__((fallthrough))
-# else
-#  define RAPIDJSON_DELIBERATE_FALLTHROUGH
-# endif
+#if __has_cpp_attribute(clang::fallthrough)
+#define RAPIDJSON_DELIBERATE_FALLTHROUGH [[clang::fallthrough]]
+#elif __has_cpp_attribute(fallthrough)
+#define RAPIDJSON_DELIBERATE_FALLTHROUGH __attribute__((fallthrough))
 #else
-# define RAPIDJSON_DELIBERATE_FALLTHROUGH
+#define RAPIDJSON_DELIBERATE_FALLTHROUGH
+#endif
+#else
+#define RAPIDJSON_DELIBERATE_FALLTHROUGH
 #endif
 
 //!@endcond
 
 //! Assertion (in non-throwing contexts).
- /*! \ingroup RAPIDJSON_CONFIG
-    Some functions provide a \c noexcept guarantee, if the compiler supports it.
-    In these cases, the \ref RAPIDJSON_ASSERT macro cannot be overridden to
-    throw an exception.  This macro adds a separate customization point for
-    such cases.
+/*! \ingroup RAPIDJSON_CONFIG
+   Some functions provide a \c noexcept guarantee, if the compiler supports it.
+   In these cases, the \ref RAPIDJSON_ASSERT macro cannot be overridden to
+   throw an exception.  This macro adds a separate customization point for
+   such cases.
 
-    Defaults to C \c assert() (as \ref RAPIDJSON_ASSERT), if \c noexcept is
-    supported, and to \ref RAPIDJSON_ASSERT otherwise.
- */
+   Defaults to C \c assert() (as \ref RAPIDJSON_ASSERT), if \c noexcept is
+   supported, and to \ref RAPIDJSON_ASSERT otherwise.
+*/
 
 ///////////////////////////////////////////////////////////////////////////////
 // RAPIDJSON_NOEXCEPT_ASSERT
@@ -726,14 +756,15 @@ RAPIDJSON_NAMESPACE_END
 RAPIDJSON_NAMESPACE_BEGIN
 
 //! Type of JSON value
-enum Type {
-    kNullType = 0,      //!< null
-    kFalseType = 1,     //!< false
-    kTrueType = 2,      //!< true
-    kObjectType = 3,    //!< object
-    kArrayType = 4,     //!< array
-    kStringType = 5,    //!< string
-    kNumberType = 6     //!< number
+enum Type
+{
+    kNullType   = 0, //!< null
+    kFalseType  = 1, //!< false
+    kTrueType   = 2, //!< true
+    kObjectType = 3, //!< object
+    kArrayType  = 4, //!< array
+    kStringType = 5, //!< string
+    kNumberType = 6  //!< number
 };
 
 RAPIDJSON_NAMESPACE_END
diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h
index f7ef610244..fe4d6e3ec9 100644
--- a/include/rapidjson/reader.h
+++ b/include/rapidjson/reader.h
@@ -40,13 +40,13 @@
 
 #ifdef __clang__
 RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(old-style-cast)
+RAPIDJSON_DIAG_OFF(old - style - cast)
 RAPIDJSON_DIAG_OFF(padded)
-RAPIDJSON_DIAG_OFF(switch-enum)
+RAPIDJSON_DIAG_OFF(switch - enum)
 #elif defined(_MSC_VER)
 RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(4127)  // conditional expression is constant
-RAPIDJSON_DIAG_OFF(4702)  // unreachable code
+RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
+RAPIDJSON_DIAG_OFF(4702) // unreachable code
 #endif
 
 #ifdef __GNUC__
@@ -58,8 +58,11 @@ RAPIDJSON_DIAG_OFF(effc++)
 #define RAPIDJSON_NOTHING /* deliberately empty */
 #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
-    RAPIDJSON_MULTILINEMACRO_BEGIN \
-    if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \
+    RAPIDJSON_MULTILINEMACRO_BEGIN                \
+    if(RAPIDJSON_UNLIKELY(HasParseError()))       \
+    {                                             \
+        return value;                             \
+    }                                             \
     RAPIDJSON_MULTILINEMACRO_END
 #endif
 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
@@ -97,10 +100,10 @@ RAPIDJSON_DIAG_OFF(effc++)
     \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
  */
 #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
-#define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
-    RAPIDJSON_MULTILINEMACRO_BEGIN \
+#define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset)                \
+    RAPIDJSON_MULTILINEMACRO_BEGIN                                            \
     RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
-    SetParseError(parseErrorCode, offset); \
+    SetParseError(parseErrorCode, offset);                                    \
     RAPIDJSON_MULTILINEMACRO_END
 #endif
 
@@ -116,10 +119,10 @@ RAPIDJSON_DIAG_OFF(effc++)
     \hideinitializer
  */
 #ifndef RAPIDJSON_PARSE_ERROR
-#define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
-    RAPIDJSON_MULTILINEMACRO_BEGIN \
+#define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset)       \
+    RAPIDJSON_MULTILINEMACRO_BEGIN                          \
     RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
-    RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
+    RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;                \
     RAPIDJSON_MULTILINEMACRO_END
 #endif
 
@@ -143,19 +146,25 @@ RAPIDJSON_NAMESPACE_BEGIN
 //! Combination of parseFlags
 /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
  */
-enum ParseFlag {
-    kParseNoFlags = 0,              //!< No flags are set.
-    kParseInsituFlag = 1,           //!< In-situ(destructive) parsing.
+enum ParseFlag
+{
+    kParseNoFlags              = 0, //!< No flags are set.
+    kParseInsituFlag           = 1, //!< In-situ(destructive) parsing.
     kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
-    kParseIterativeFlag = 4,        //!< Iterative(constant complexity in terms of function call stack size) parsing.
-    kParseStopWhenDoneFlag = 8,     //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
-    kParseFullPrecisionFlag = 16,   //!< Parse number in full precision (but slower).
-    kParseCommentsFlag = 32,        //!< Allow one-line (//) and multi-line (/**/) comments.
-    kParseNumbersAsStringsFlag = 64,    //!< Parse all numbers (ints/doubles) as strings.
-    kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays.
-    kParseNanAndInfFlag = 256,      //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
-    kParseEscapedApostropheFlag = 512,  //!< Allow escaped apostrophe in strings.
-    kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS  //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
+    kParseIterativeFlag =
+        4, //!< Iterative(constant complexity in terms of function call stack size) parsing.
+    kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further
+                                //!< processing the rest of stream. When this flag is used, parser
+                                //!< will not generate kParseErrorDocumentRootNotSingular error.
+    kParseFullPrecisionFlag    = 16,  //!< Parse number in full precision (but slower).
+    kParseCommentsFlag         = 32,  //!< Allow one-line (//) and multi-line (/**/) comments.
+    kParseNumbersAsStringsFlag = 64,  //!< Parse all numbers (ints/doubles) as strings.
+    kParseTrailingCommasFlag   = 128, //!< Allow trailing commas at the end of objects and arrays.
+    kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
+    kParseEscapedApostropheFlag = 512, //!< Allow escaped apostrophe in strings.
+    kParseDefaultFlags =
+        RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining
+                                      //!< RAPIDJSON_PARSE_DEFAULT_FLAGS
 };
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -194,11 +203,13 @@ concept Handler {
 /*! This can be used as base class of any reader handler.
     \note implements Handler concept
 */
-template<typename Encoding = UTF8<>, typename Derived = void>
-struct BaseReaderHandler {
+template <typename Encoding = UTF8<>, typename Derived = void>
+struct BaseReaderHandler
+{
     typedef typename Encoding::Ch Ch;
 
-    typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
+    typedef typename internal::
+        SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
 
     bool Default() { return true; }
     bool Null() { return static_cast<Override&>(*this).Default(); }
@@ -209,10 +220,16 @@ struct BaseReaderHandler {
     bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
     bool Double(double) { return static_cast<Override&>(*this).Default(); }
     /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
-    bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
+    bool RawNumber(const Ch* str, SizeType len, bool copy)
+    {
+        return static_cast<Override&>(*this).String(str, len, copy);
+    }
     bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
     bool StartObject() { return static_cast<Override&>(*this).Default(); }
-    bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
+    bool Key(const Ch* str, SizeType len, bool copy)
+    {
+        return static_cast<Override&>(*this).String(str, len, copy);
+    }
     bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
     bool StartArray() { return static_cast<Override&>(*this).Default(); }
     bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
@@ -223,33 +240,35 @@ struct BaseReaderHandler {
 
 namespace internal {
 
-template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
+template <typename Stream, int = StreamTraits<Stream>::copyOptimization>
 class StreamLocalCopy;
 
 //! Do copy optimization.
-template<typename Stream>
-class StreamLocalCopy<Stream, 1> {
-public:
+template <typename Stream>
+class StreamLocalCopy<Stream, 1>
+{
+    public:
     StreamLocalCopy(Stream& original) : s(original), original_(original) {}
     ~StreamLocalCopy() { original_ = s; }
 
     Stream s;
 
-private:
+    private:
     StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
 
     Stream& original_;
 };
 
 //! Keep reference.
-template<typename Stream>
-class StreamLocalCopy<Stream, 0> {
-public:
+template <typename Stream>
+class StreamLocalCopy<Stream, 0>
+{
+    public:
     StreamLocalCopy(Stream& original) : s(original) {}
 
     Stream& s;
 
-private:
+    private:
     StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
 };
 
@@ -262,66 +281,79 @@ private:
 /*! \param is A input stream for skipping white spaces.
     \note This function has SSE2/SSE4.2 specialization.
 */
-template<typename InputStream>
-void SkipWhitespace(InputStream& is) {
+template <typename InputStream>
+void SkipWhitespace(InputStream& is)
+{
     internal::StreamLocalCopy<InputStream> copy(is);
     InputStream& s(copy.s);
 
     typename InputStream::Ch c;
-    while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t')
+    while((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t')
         s.Take();
 }
 
-inline const char* SkipWhitespace(const char* p, const char* end) {
-    while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
+inline const char* SkipWhitespace(const char* p, const char* end)
+{
+    while(p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
         ++p;
     return p;
 }
 
 #ifdef RAPIDJSON_SSE42
 //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
-inline const char *SkipWhitespace_SIMD(const char* p) {
+inline const char* SkipWhitespace_SIMD(const char* p)
+{
     // Fast return for single non-whitespace
-    if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+    if(*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
         ++p;
     else
         return p;
 
     // 16-byte align to the next boundary
-    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
-    while (p != nextAligned)
-        if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) &
+                                                            static_cast<size_t>(~15));
+    while(p != nextAligned)
+        if(*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
             ++p;
         else
             return p;
 
     // The rest of string using SIMD
     static const char whitespace[16] = " \n\r\t";
-    const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
+    const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&whitespace[0]));
 
-    for (;; p += 16) {
-        const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
-        const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
-        if (r != 16)    // some of characters is non-whitespace
+    for(;; p += 16)
+    {
+        const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i*>(p));
+        const int r     = _mm_cmpistri(w,
+                                   s,
+                                   _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT |
+                                       _SIDD_NEGATIVE_POLARITY);
+        if(r != 16) // some of characters is non-whitespace
             return p + r;
     }
 }
 
-inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
+inline const char* SkipWhitespace_SIMD(const char* p, const char* end)
+{
     // Fast return for single non-whitespace
-    if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
+    if(p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
         ++p;
     else
         return p;
 
     // The middle of string using SIMD
     static const char whitespace[16] = " \n\r\t";
-    const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
+    const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&whitespace[0]));
 
-    for (; p <= end - 16; p += 16) {
-        const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
-        const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
-        if (r != 16)    // some of characters is non-whitespace
+    for(; p <= end - 16; p += 16)
+    {
+        const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
+        const int r     = _mm_cmpistri(w,
+                                   s,
+                                   _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT |
+                                       _SIDD_NEGATIVE_POLARITY);
+        if(r != 16) // some of characters is non-whitespace
             return p + r;
     }
 
@@ -331,40 +363,47 @@ inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
 #elif defined(RAPIDJSON_SSE2)
 
 //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
-inline const char *SkipWhitespace_SIMD(const char* p) {
+inline const char* SkipWhitespace_SIMD(const char* p)
+{
     // Fast return for single non-whitespace
-    if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+    if(*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
         ++p;
     else
         return p;
 
     // 16-byte align to the next boundary
-    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
-    while (p != nextAligned)
-        if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) &
+                                                            static_cast<size_t>(~15));
+    while(p != nextAligned)
+        if(*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
             ++p;
         else
             return p;
 
-    // The rest of string
-    #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
-    static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
-    #undef C16
+// The rest of string
+#define C16(c)                                         \
+    {                                                  \
+        c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c \
+    }
+    static const char whitespaces[4][16] = {C16(' '), C16('\n'), C16('\r'), C16('\t')};
+#undef C16
 
-    const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
-    const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
-    const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
-    const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
+    const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&whitespaces[0][0]));
+    const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&whitespaces[1][0]));
+    const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&whitespaces[2][0]));
+    const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&whitespaces[3][0]));
 
-    for (;; p += 16) {
-        const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
-        __m128i x = _mm_cmpeq_epi8(s, w0);
-        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
-        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
-        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
+    for(;; p += 16)
+    {
+        const __m128i s  = _mm_load_si128(reinterpret_cast<const __m128i*>(p));
+        __m128i x        = _mm_cmpeq_epi8(s, w0);
+        x                = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
+        x                = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
+        x                = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
         unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
-        if (r != 0) {   // some of characters may be non-whitespace
-#ifdef _MSC_VER         // Find the index of first non-whitespace
+        if(r != 0)
+        { // some of characters may be non-whitespace
+#ifdef _MSC_VER // Find the index of first non-whitespace
             unsigned long offset;
             _BitScanForward(&offset, r);
             return p + offset;
@@ -375,32 +414,38 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
     }
 }
 
-inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
+inline const char* SkipWhitespace_SIMD(const char* p, const char* end)
+{
     // Fast return for single non-whitespace
-    if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
+    if(p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
         ++p;
     else
         return p;
 
-    // The rest of string
-    #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
-    static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
-    #undef C16
+// The rest of string
+#define C16(c)                                         \
+    {                                                  \
+        c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c \
+    }
+    static const char whitespaces[4][16] = {C16(' '), C16('\n'), C16('\r'), C16('\t')};
+#undef C16
 
-    const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
-    const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
-    const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
-    const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
+    const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&whitespaces[0][0]));
+    const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&whitespaces[1][0]));
+    const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&whitespaces[2][0]));
+    const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&whitespaces[3][0]));
 
-    for (; p <= end - 16; p += 16) {
-        const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
-        __m128i x = _mm_cmpeq_epi8(s, w0);
-        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
-        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
-        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
+    for(; p <= end - 16; p += 16)
+    {
+        const __m128i s  = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
+        __m128i x        = _mm_cmpeq_epi8(s, w0);
+        x                = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
+        x                = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
+        x                = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
         unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
-        if (r != 0) {   // some of characters may be non-whitespace
-#ifdef _MSC_VER         // Find the index of first non-whitespace
+        if(r != 0)
+        { // some of characters may be non-whitespace
+#ifdef _MSC_VER // Find the index of first non-whitespace
             unsigned long offset;
             _BitScanForward(&offset, r);
             return p + offset;
@@ -416,17 +461,19 @@ inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
 #elif defined(RAPIDJSON_NEON)
 
 //! Skip whitespace with ARM Neon instructions, testing 16 8-byte characters at once.
-inline const char *SkipWhitespace_SIMD(const char* p) {
+inline const char* SkipWhitespace_SIMD(const char* p)
+{
     // Fast return for single non-whitespace
-    if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+    if(*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
         ++p;
     else
         return p;
 
     // 16-byte align to the next boundary
-    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
-    while (p != nextAligned)
-        if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) &
+                                                            static_cast<size_t>(~15));
+    while(p != nextAligned)
+        if(*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
             ++p;
         else
             return p;
@@ -436,33 +483,39 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
     const uint8x16_t w2 = vmovq_n_u8('\r');
     const uint8x16_t w3 = vmovq_n_u8('\t');
 
-    for (;; p += 16) {
-        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
-        uint8x16_t x = vceqq_u8(s, w0);
-        x = vorrq_u8(x, vceqq_u8(s, w1));
-        x = vorrq_u8(x, vceqq_u8(s, w2));
-        x = vorrq_u8(x, vceqq_u8(s, w3));
+    for(;; p += 16)
+    {
+        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t*>(p));
+        uint8x16_t x       = vceqq_u8(s, w0);
+        x                  = vorrq_u8(x, vceqq_u8(s, w1));
+        x                  = vorrq_u8(x, vceqq_u8(s, w2));
+        x                  = vorrq_u8(x, vceqq_u8(s, w3));
 
-        x = vmvnq_u8(x);                       // Negate
-        x = vrev64q_u8(x);                     // Rev in 64
-        uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
-        uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
+        x             = vmvnq_u8(x);                                // Negate
+        x             = vrev64q_u8(x);                              // Rev in 64
+        uint64_t low  = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
+        uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
 
-        if (low == 0) {
-            if (high != 0) {
+        if(low == 0)
+        {
+            if(high != 0)
+            {
                 uint32_t lz = internal::clzll(high);
                 return p + 8 + (lz >> 3);
             }
-        } else {
+        }
+        else
+        {
             uint32_t lz = internal::clzll(low);
             return p + (lz >> 3);
         }
     }
 }
 
-inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
+inline const char* SkipWhitespace_SIMD(const char* p, const char* end)
+{
     // Fast return for single non-whitespace
-    if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
+    if(p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
         ++p;
     else
         return p;
@@ -472,24 +525,29 @@ inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
     const uint8x16_t w2 = vmovq_n_u8('\r');
     const uint8x16_t w3 = vmovq_n_u8('\t');
 
-    for (; p <= end - 16; p += 16) {
-        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
-        uint8x16_t x = vceqq_u8(s, w0);
-        x = vorrq_u8(x, vceqq_u8(s, w1));
-        x = vorrq_u8(x, vceqq_u8(s, w2));
-        x = vorrq_u8(x, vceqq_u8(s, w3));
+    for(; p <= end - 16; p += 16)
+    {
+        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t*>(p));
+        uint8x16_t x       = vceqq_u8(s, w0);
+        x                  = vorrq_u8(x, vceqq_u8(s, w1));
+        x                  = vorrq_u8(x, vceqq_u8(s, w2));
+        x                  = vorrq_u8(x, vceqq_u8(s, w3));
 
-        x = vmvnq_u8(x);                       // Negate
-        x = vrev64q_u8(x);                     // Rev in 64
-        uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
-        uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
+        x             = vmvnq_u8(x);                                // Negate
+        x             = vrev64q_u8(x);                              // Rev in 64
+        uint64_t low  = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
+        uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
 
-        if (low == 0) {
-            if (high != 0) {
+        if(low == 0)
+        {
+            if(high != 0)
+            {
                 uint32_t lz = internal::clzll(high);
                 return p + 8 + (lz >> 3);
             }
-        } else {
+        }
+        else
+        {
             uint32_t lz = internal::clzll(low);
             return p + (lz >> 3);
         }
@@ -502,16 +560,22 @@ inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
 
 #ifdef RAPIDJSON_SIMD
 //! Template function specialization for InsituStringStream
-template<> inline void SkipWhitespace(InsituStringStream& is) {
+template <>
+inline void SkipWhitespace(InsituStringStream& is)
+{
     is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
 }
 
 //! Template function specialization for StringStream
-template<> inline void SkipWhitespace(StringStream& is) {
+template <>
+inline void SkipWhitespace(StringStream& is)
+{
     is.src_ = SkipWhitespace_SIMD(is.src_);
 }
 
-template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
+template <>
+inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is)
+{
     is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
 }
 #endif // RAPIDJSON_SIMD
@@ -536,16 +600,20 @@ template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>&
     \tparam StackAllocator Allocator type for stack.
 */
 template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
-class GenericReader {
-public:
+class GenericReader
+{
+    public:
     typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
 
     //! Constructor.
-    /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
-        \param stackCapacity stack capacity in bytes for storing a single decoded string.  (Only use for non-destructive parsing)
+    /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for
+       non-destructive parsing) \param stackCapacity stack capacity in bytes for storing a single
+       decoded string.  (Only use for non-destructive parsing)
     */
-    GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) :
-        stack_(stackAllocator, stackCapacity), parseResult_(), state_(IterativeParsingStartState) {}
+    GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity)
+        : stack_(stackAllocator, stackCapacity), parseResult_(), state_(IterativeParsingStartState)
+    {
+    }
 
     //! Parse JSON text.
     /*! \tparam parseFlags Combination of \ref ParseFlag.
@@ -556,8 +624,9 @@ public:
         \return Whether the parsing is successful.
     */
     template <unsigned parseFlags, typename InputStream, typename Handler>
-    ParseResult Parse(InputStream& is, Handler& handler) {
-        if (parseFlags & kParseIterativeFlag)
+    ParseResult Parse(InputStream& is, Handler& handler)
+    {
+        if(parseFlags & kParseIterativeFlag)
             return IterativeParse<parseFlags>(is, handler);
 
         parseResult_.Clear();
@@ -567,19 +636,23 @@ public:
         SkipWhitespaceAndComments<parseFlags>(is);
         RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
 
-        if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
+        if(RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
+        {
             RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
             RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
         }
-        else {
+        else
+        {
             ParseValue<parseFlags>(is, handler);
             RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
 
-            if (!(parseFlags & kParseStopWhenDoneFlag)) {
+            if(!(parseFlags & kParseStopWhenDoneFlag))
+            {
                 SkipWhitespaceAndComments<parseFlags>(is);
                 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
 
-                if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
+                if(RAPIDJSON_UNLIKELY(is.Peek() != '\0'))
+                {
                     RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
                     RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
                 }
@@ -597,14 +670,16 @@ public:
         \return Whether the parsing is successful.
     */
     template <typename InputStream, typename Handler>
-    ParseResult Parse(InputStream& is, Handler& handler) {
+    ParseResult Parse(InputStream& is, Handler& handler)
+    {
         return Parse<kParseDefaultFlags>(is, handler);
     }
 
     //! Initialize JSON text token-by-token parsing
     /*!
      */
-    void IterativeParseInit() {
+    void IterativeParseInit()
+    {
         parseResult_.Clear();
         state_ = IterativeParsingStartState;
     }
@@ -617,18 +692,22 @@ public:
         \return Whether the parsing is successful.
      */
     template <unsigned parseFlags, typename InputStream, typename Handler>
-    bool IterativeParseNext(InputStream& is, Handler& handler) {
-        while (RAPIDJSON_LIKELY(is.Peek() != '\0')) {
+    bool IterativeParseNext(InputStream& is, Handler& handler)
+    {
+        while(RAPIDJSON_LIKELY(is.Peek() != '\0'))
+        {
             SkipWhitespaceAndComments<parseFlags>(is);
 
-            Token t = Tokenize(is.Peek());
+            Token t                 = Tokenize(is.Peek());
             IterativeParsingState n = Predict(state_, t);
             IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler);
 
             // If we've finished or hit an error...
-            if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) {
+            if(RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d)))
+            {
                 // Report errors.
-                if (d == IterativeParsingErrorState) {
+                if(d == IterativeParsingErrorState)
+                {
                     HandleError(state_, is);
                     return false;
                 }
@@ -638,10 +717,12 @@ public:
                 state_ = d;
 
                 // If StopWhenDone is not set...
-                if (!(parseFlags & kParseStopWhenDoneFlag)) {
+                if(!(parseFlags & kParseStopWhenDoneFlag))
+                {
                     // ... and extra non-whitespace data is found...
                     SkipWhitespaceAndComments<parseFlags>(is);
-                    if (is.Peek() != '\0') {
+                    if(is.Peek() != '\0')
+                    {
                         // ... this is considered an error.
                         HandleError(state_, is);
                         return false;
@@ -655,15 +736,17 @@ public:
             // Transition to the new state.
             state_ = d;
 
-            // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now.
-            if (!IsIterativeParsingDelimiterState(n))
+            // If we parsed anything other than a delimiter, we invoked the handler, so we can
+            // return true now.
+            if(!IsIterativeParsingDelimiterState(n))
                 return true;
         }
 
         // We reached the end of file.
         stack_.Clear();
 
-        if (state_ != IterativeParsingFinishState) {
+        if(state_ != IterativeParsingFinishState)
+        {
             HandleError(state_, is);
             return false;
         }
@@ -674,7 +757,8 @@ public:
     //! Check if token-by-token parsing JSON text is complete
     /*! \return Whether the JSON has been fully decoded.
      */
-    RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const {
+    RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const
+    {
         return IsIterativeParsingCompleteState(state_);
     }
 
@@ -687,10 +771,10 @@ public:
     //! Get the position of last parsing error in input, 0 otherwise.
     size_t GetErrorOffset() const { return parseResult_.Offset(); }
 
-protected:
+    protected:
     void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
 
-private:
+    private:
     // Prohibit copy constructor & assignment operator.
     GenericReader(const GenericReader&);
     GenericReader& operator=(const GenericReader&);
@@ -698,35 +782,43 @@ private:
     void ClearStack() { stack_.Clear(); }
 
     // clear stack on any exit from ParseStream, e.g. due to exception
-    struct ClearStackOnExit {
+    struct ClearStackOnExit
+    {
         explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
         ~ClearStackOnExit() { r_.ClearStack(); }
-    private:
+
+        private:
         GenericReader& r_;
         ClearStackOnExit(const ClearStackOnExit&);
         ClearStackOnExit& operator=(const ClearStackOnExit&);
     };
 
-    template<unsigned parseFlags, typename InputStream>
-    void SkipWhitespaceAndComments(InputStream& is) {
+    template <unsigned parseFlags, typename InputStream>
+    void SkipWhitespaceAndComments(InputStream& is)
+    {
         SkipWhitespace(is);
 
-        if (parseFlags & kParseCommentsFlag) {
-            while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
-                if (Consume(is, '*')) {
-                    while (true) {
-                        if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
+        if(parseFlags & kParseCommentsFlag)
+        {
+            while(RAPIDJSON_UNLIKELY(Consume(is, '/')))
+            {
+                if(Consume(is, '*'))
+                {
+                    while(true)
+                    {
+                        if(RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
                             RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
-                        else if (Consume(is, '*')) {
-                            if (Consume(is, '/'))
+                        else if(Consume(is, '*'))
+                        {
+                            if(Consume(is, '/'))
                                 break;
                         }
                         else
                             is.Take();
                     }
                 }
-                else if (RAPIDJSON_LIKELY(Consume(is, '/')))
-                    while (is.Peek() != '\0' && is.Take() != '\n') {}
+                else if(RAPIDJSON_LIKELY(Consume(is, '/')))
+                    while(is.Peek() != '\0' && is.Take() != '\n') {}
                 else
                     RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
 
@@ -736,25 +828,28 @@ private:
     }
 
     // Parse object: { string : value, ... }
-    template<unsigned parseFlags, typename InputStream, typename Handler>
-    void ParseObject(InputStream& is, Handler& handler) {
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseObject(InputStream& is, Handler& handler)
+    {
         RAPIDJSON_ASSERT(is.Peek() == '{');
-        is.Take();  // Skip '{'
+        is.Take(); // Skip '{'
 
-        if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
+        if(RAPIDJSON_UNLIKELY(!handler.StartObject()))
             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
 
         SkipWhitespaceAndComments<parseFlags>(is);
         RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
 
-        if (Consume(is, '}')) {
-            if (RAPIDJSON_UNLIKELY(!handler.EndObject(0)))  // empty object
+        if(Consume(is, '}'))
+        {
+            if(RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object
                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
             return;
         }
 
-        for (SizeType memberCount = 0;;) {
-            if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
+        for(SizeType memberCount = 0;;)
+        {
+            if(RAPIDJSON_UNLIKELY(is.Peek() != '"'))
                 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
 
             ParseString<parseFlags>(is, handler, true);
@@ -763,7 +858,7 @@ private:
             SkipWhitespaceAndComments<parseFlags>(is);
             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
 
-            if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
+            if(RAPIDJSON_UNLIKELY(!Consume(is, ':')))
                 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
 
             SkipWhitespaceAndComments<parseFlags>(is);
@@ -777,24 +872,28 @@ private:
 
             ++memberCount;
 
-            switch (is.Peek()) {
-                case ',':
-                    is.Take();
-                    SkipWhitespaceAndComments<parseFlags>(is);
-                    RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
-                    break;
-                case '}':
-                    is.Take();
-                    if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
-                        RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
-                    return;
-                default:
-                    RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy
+            switch(is.Peek())
+            {
+            case ',':
+                is.Take();
+                SkipWhitespaceAndComments<parseFlags>(is);
+                RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+                break;
+            case '}':
+                is.Take();
+                if(RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
+                    RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+                return;
+            default:
+                RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
+                break; // This useless break is only for making warning and coverage happy
             }
 
-            if (parseFlags & kParseTrailingCommasFlag) {
-                if (is.Peek() == '}') {
-                    if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
+            if(parseFlags & kParseTrailingCommasFlag)
+            {
+                if(is.Peek() == '}')
+                {
+                    if(RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
                         RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
                     is.Take();
                     return;
@@ -804,24 +903,27 @@ private:
     }
 
     // Parse array: [ value, ... ]
-    template<unsigned parseFlags, typename InputStream, typename Handler>
-    void ParseArray(InputStream& is, Handler& handler) {
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseArray(InputStream& is, Handler& handler)
+    {
         RAPIDJSON_ASSERT(is.Peek() == '[');
-        is.Take();  // Skip '['
+        is.Take(); // Skip '['
 
-        if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
+        if(RAPIDJSON_UNLIKELY(!handler.StartArray()))
             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
 
         SkipWhitespaceAndComments<parseFlags>(is);
         RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
 
-        if (Consume(is, ']')) {
-            if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
+        if(Consume(is, ']'))
+        {
+            if(RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
             return;
         }
 
-        for (SizeType elementCount = 0;;) {
+        for(SizeType elementCount = 0;;)
+        {
             ParseValue<parseFlags>(is, handler);
             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
 
@@ -829,21 +931,25 @@ private:
             SkipWhitespaceAndComments<parseFlags>(is);
             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
 
-            if (Consume(is, ',')) {
+            if(Consume(is, ','))
+            {
                 SkipWhitespaceAndComments<parseFlags>(is);
                 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
             }
-            else if (Consume(is, ']')) {
-                if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
+            else if(Consume(is, ']'))
+            {
+                if(RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
                     RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
                 return;
             }
             else
                 RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
 
-            if (parseFlags & kParseTrailingCommasFlag) {
-                if (is.Peek() == ']') {
-                    if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
+            if(parseFlags & kParseTrailingCommasFlag)
+            {
+                if(is.Peek() == ']')
+                {
+                    if(RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
                         RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
                     is.Take();
                     return;
@@ -852,48 +958,57 @@ private:
         }
     }
 
-    template<unsigned parseFlags, typename InputStream, typename Handler>
-    void ParseNull(InputStream& is, Handler& handler) {
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseNull(InputStream& is, Handler& handler)
+    {
         RAPIDJSON_ASSERT(is.Peek() == 'n');
         is.Take();
 
-        if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) {
-            if (RAPIDJSON_UNLIKELY(!handler.Null()))
+        if(RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l')))
+        {
+            if(RAPIDJSON_UNLIKELY(!handler.Null()))
                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
         }
         else
             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
     }
 
-    template<unsigned parseFlags, typename InputStream, typename Handler>
-    void ParseTrue(InputStream& is, Handler& handler) {
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseTrue(InputStream& is, Handler& handler)
+    {
         RAPIDJSON_ASSERT(is.Peek() == 't');
         is.Take();
 
-        if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) {
-            if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
+        if(RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e')))
+        {
+            if(RAPIDJSON_UNLIKELY(!handler.Bool(true)))
                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
         }
         else
             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
     }
 
-    template<unsigned parseFlags, typename InputStream, typename Handler>
-    void ParseFalse(InputStream& is, Handler& handler) {
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseFalse(InputStream& is, Handler& handler)
+    {
         RAPIDJSON_ASSERT(is.Peek() == 'f');
         is.Take();
 
-        if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) {
-            if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
+        if(RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') &&
+                            Consume(is, 'e')))
+        {
+            if(RAPIDJSON_UNLIKELY(!handler.Bool(false)))
                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
         }
         else
             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
     }
 
-    template<typename InputStream>
-    RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) {
-        if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
+    template <typename InputStream>
+    RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect)
+    {
+        if(RAPIDJSON_LIKELY(is.Peek() == expect))
+        {
             is.Take();
             return true;
         }
@@ -902,21 +1017,25 @@ private:
     }
 
     // Helper function to parse four hexadecimal digits in \uXXXX in ParseString().
-    template<typename InputStream>
-    unsigned ParseHex4(InputStream& is, size_t escapeOffset) {
+    template <typename InputStream>
+    unsigned ParseHex4(InputStream& is, size_t escapeOffset)
+    {
         unsigned codepoint = 0;
-        for (int i = 0; i < 4; i++) {
+        for(int i = 0; i < 4; i++)
+        {
             Ch c = is.Peek();
             codepoint <<= 4;
             codepoint += static_cast<unsigned>(c);
-            if (c >= '0' && c <= '9')
+            if(c >= '0' && c <= '9')
                 codepoint -= '0';
-            else if (c >= 'A' && c <= 'F')
+            else if(c >= 'A' && c <= 'F')
                 codepoint -= 'A' - 10;
-            else if (c >= 'a' && c <= 'f')
+            else if(c >= 'a' && c <= 'f')
                 codepoint -= 'a' - 10;
-            else {
-                RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset);
+            else
+            {
+                RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex,
+                                               escapeOffset);
                 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
             }
             is.Take();
@@ -925,28 +1044,29 @@ private:
     }
 
     template <typename CharType>
-    class StackStream {
-    public:
+    class StackStream
+    {
+        public:
         typedef CharType Ch;
 
         StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
-        RAPIDJSON_FORCEINLINE void Put(Ch c) {
+        RAPIDJSON_FORCEINLINE void Put(Ch c)
+        {
             *stack_.template Push<Ch>() = c;
             ++length_;
         }
 
-        RAPIDJSON_FORCEINLINE void* Push(SizeType count) {
+        RAPIDJSON_FORCEINLINE void* Push(SizeType count)
+        {
             length_ += count;
             return stack_.template Push<Ch>(count);
         }
 
         size_t Length() const { return length_; }
 
-        Ch* Pop() {
-            return stack_.template Pop<Ch>(length_);
-        }
+        Ch* Pop() { return stack_.template Pop<Ch>(length_); }
 
-    private:
+        private:
         StackStream(const StackStream&);
         StackStream& operator=(const StackStream&);
 
@@ -955,25 +1075,30 @@ private:
     };
 
     // Parse string and generate String event. Different code paths for kParseInsituFlag.
-    template<unsigned parseFlags, typename InputStream, typename Handler>
-    void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseString(InputStream& is, Handler& handler, bool isKey = false)
+    {
         internal::StreamLocalCopy<InputStream> copy(is);
         InputStream& s(copy.s);
 
         RAPIDJSON_ASSERT(s.Peek() == '\"');
-        s.Take();  // Skip '\"'
+        s.Take(); // Skip '\"'
 
         bool success = false;
-        if (parseFlags & kParseInsituFlag) {
-            typename InputStream::Ch *head = s.PutBegin();
+        if(parseFlags & kParseInsituFlag)
+        {
+            typename InputStream::Ch* head = s.PutBegin();
             ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
             size_t length = s.PutEnd(head) - 1;
             RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
-            const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
-            success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
+            const typename TargetEncoding::Ch* const str =
+                reinterpret_cast<typename TargetEncoding::Ch*>(head);
+            success = (isKey ? handler.Key(str, SizeType(length), false)
+                             : handler.String(str, SizeType(length), false));
         }
-        else {
+        else
+        {
             StackStream<typename TargetEncoding::Ch> stackStream(stack_);
             ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
@@ -981,64 +1106,82 @@ private:
             const typename TargetEncoding::Ch* const str = stackStream.Pop();
             success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
         }
-        if (RAPIDJSON_UNLIKELY(!success))
+        if(RAPIDJSON_UNLIKELY(!success))
             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
     }
 
     // Parse string to an output is
-    // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
-    template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
-    RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
+    // This function handles the prefix/suffix double quotes, escaping, and optional encoding
+    // validation.
+    template <unsigned parseFlags,
+              typename SEncoding,
+              typename TEncoding,
+              typename InputStream,
+              typename OutputStream>
+    RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os)
+    {
 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
-#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+#define Z16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
         static const char escape[256] = {
-            Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '/',
-            Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
-            0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
-            0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
-        };
+            Z16,  Z16, 0, 0, '\"', 0, 0, 0, 0, 0, 0, 0,   0,    0,    0,   0,   0,    '/', Z16,
+            Z16,  0,   0, 0, 0,    0, 0, 0, 0, 0, 0, 0,   0,    '\\', 0,   0,   0,    0,   0,
+            '\b', 0,   0, 0, '\f', 0, 0, 0, 0, 0, 0, 0,   '\n', 0,    0,   0,   '\r', 0,   '\t',
+            0,    0,   0, 0, 0,    0, 0, 0, 0, 0, 0, Z16, Z16,  Z16,  Z16, Z16, Z16,  Z16, Z16};
 #undef Z16
-//!@endcond
+        //!@endcond
 
-        for (;;) {
+        for(;;)
+        {
             // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
-            if (!(parseFlags & kParseValidateEncodingFlag))
+            if(!(parseFlags & kParseValidateEncodingFlag))
                 ScanCopyUnescapedString(is, os);
 
             Ch c = is.Peek();
-            if (RAPIDJSON_UNLIKELY(c == '\\')) {    // Escape
-                size_t escapeOffset = is.Tell();    // For invalid escaping, report the initial '\\' as error offset
+            if(RAPIDJSON_UNLIKELY(c == '\\'))
+            { // Escape
+                size_t escapeOffset =
+                    is.Tell(); // For invalid escaping, report the initial '\\' as error offset
                 is.Take();
                 Ch e = is.Peek();
-                if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
+                if((sizeof(Ch) == 1 || unsigned(e) < 256) &&
+                   RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)]))
+                {
                     is.Take();
-                    os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
+                    os.Put(
+                        static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
                 }
-                else if ((parseFlags & kParseEscapedApostropheFlag) && RAPIDJSON_LIKELY(e == '\'')) { // Allow escaped apostrophe
+                else if((parseFlags & kParseEscapedApostropheFlag) && RAPIDJSON_LIKELY(e == '\''))
+                { // Allow escaped apostrophe
                     is.Take();
                     os.Put('\'');
                 }
-                else if (RAPIDJSON_LIKELY(e == 'u')) {    // Unicode
+                else if(RAPIDJSON_LIKELY(e == 'u'))
+                { // Unicode
                     is.Take();
                     unsigned codepoint = ParseHex4(is, escapeOffset);
                     RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
-                    if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
+                    if(RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF))
+                    {
                         // high surrogate, check if followed by valid low surrogate
-                        if (RAPIDJSON_LIKELY(codepoint <= 0xDBFF)) {
+                        if(RAPIDJSON_LIKELY(codepoint <= 0xDBFF))
+                        {
                             // Handle UTF-16 surrogate pair
-                            if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
-                                RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
+                            if(RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
+                                RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid,
+                                                      escapeOffset);
                             unsigned codepoint2 = ParseHex4(is, escapeOffset);
                             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
-                            if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
-                                RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
-                            codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
+                            if(RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
+                                RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid,
+                                                      escapeOffset);
+                            codepoint =
+                                (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
                         }
                         // single low surrogate
                         else
                         {
-                            RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
+                            RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid,
+                                                  escapeOffset);
                         }
                     }
                     TEncoding::Encode(os, codepoint);
@@ -1046,41 +1189,50 @@ private:
                 else
                     RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset);
             }
-            else if (RAPIDJSON_UNLIKELY(c == '"')) {    // Closing double quote
+            else if(RAPIDJSON_UNLIKELY(c == '"'))
+            { // Closing double quote
                 is.Take();
-                os.Put('\0');   // null-terminate the string
+                os.Put('\0'); // null-terminate the string
                 return;
             }
-            else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
-                if (c == '\0')
+            else if(RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20))
+            { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
+                if(c == '\0')
                     RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
                 else
                     RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
             }
-            else {
+            else
+            {
                 size_t offset = is.Tell();
-                if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ?
-                    !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
-                    !Transcoder<SEncoding, TEncoding>::Transcode(is, os))))
+                if(RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag
+                                           ? !Transcoder<SEncoding, TEncoding>::Validate(is, os)
+                                           : !Transcoder<SEncoding, TEncoding>::Transcode(is, os))))
                     RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset);
             }
         }
     }
 
-    template<typename InputStream, typename OutputStream>
-    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) {
-            // Do nothing for generic version
+    template <typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&)
+    {
+        // Do nothing for generic version
     }
 
 #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
     // StringStream -> StackStream<char>
-    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is,
+                                                              StackStream<char>& os)
+    {
         const char* p = is.src_;
 
         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
-        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
-        while (p != nextAligned)
-            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) &
+                                                                static_cast<size_t>(~15));
+        while(p != nextAligned)
+            if(RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') ||
+               RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20))
+            {
                 is.src_ = p;
                 return;
             }
@@ -1088,61 +1240,116 @@ private:
                 os.Put(*p++);
 
         // The rest of string using SIMD
-        static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
-        static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
-        static const char space[16]  = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
-        const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
-        const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
-        const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
+        static const char dquote[16] = {'\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"'};
+        static const char bslash[16] = {'\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\'};
+        static const char space[16]  = {0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F};
+        const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&dquote[0]));
+        const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&bslash[0]));
+        const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&space[0]));
 
-        for (;; p += 16) {
-            const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+        for(;; p += 16)
+        {
+            const __m128i s  = _mm_load_si128(reinterpret_cast<const __m128i*>(p));
             const __m128i t1 = _mm_cmpeq_epi8(s, dq);
             const __m128i t2 = _mm_cmpeq_epi8(s, bs);
-            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
-            const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
+            const __m128i t3 =
+                _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
+            const __m128i x  = _mm_or_si128(_mm_or_si128(t1, t2), t3);
             unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
-            if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
+            if(RAPIDJSON_UNLIKELY(r != 0))
+            { // some of characters is escaped
                 SizeType length;
-    #ifdef _MSC_VER         // Find the index of first escaped
+#ifdef _MSC_VER // Find the index of first escaped
                 unsigned long offset;
                 _BitScanForward(&offset, r);
                 length = offset;
-    #else
+#else
                 length = static_cast<SizeType>(__builtin_ffs(r) - 1);
-    #endif
-                if (length != 0) {
+#endif
+                if(length != 0)
+                {
                     char* q = reinterpret_cast<char*>(os.Push(length));
-                    for (size_t i = 0; i < length; i++)
+                    for(size_t i = 0; i < length; i++)
                         q[i] = p[i];
 
                     p += length;
                 }
                 break;
             }
-            _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
+            _mm_storeu_si128(reinterpret_cast<__m128i*>(os.Push(16)), s);
         }
 
         is.src_ = p;
     }
 
     // InsituStringStream -> InsituStringStream
-    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is,
+                                                              InsituStringStream& os)
+    {
         RAPIDJSON_ASSERT(&is == &os);
         (void)os;
 
-        if (is.src_ == is.dst_) {
+        if(is.src_ == is.dst_)
+        {
             SkipUnescapedString(is);
             return;
         }
 
         char* p = is.src_;
-        char *q = is.dst_;
+        char* q = is.dst_;
 
         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
-        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
-        while (p != nextAligned)
-            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) &
+                                                                static_cast<size_t>(~15));
+        while(p != nextAligned)
+            if(RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') ||
+               RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20))
+            {
                 is.src_ = p;
                 is.dst_ = q;
                 return;
@@ -1151,34 +1358,82 @@ private:
                 *q++ = *p++;
 
         // The rest of string using SIMD
-        static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
-        static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
-        static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
-        const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
-        const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
-        const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
+        static const char dquote[16] = {'\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"'};
+        static const char bslash[16] = {'\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\'};
+        static const char space[16]  = {0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F};
+        const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&dquote[0]));
+        const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&bslash[0]));
+        const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&space[0]));
 
-        for (;; p += 16, q += 16) {
-            const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+        for(;; p += 16, q += 16)
+        {
+            const __m128i s  = _mm_load_si128(reinterpret_cast<const __m128i*>(p));
             const __m128i t1 = _mm_cmpeq_epi8(s, dq);
             const __m128i t2 = _mm_cmpeq_epi8(s, bs);
-            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
-            const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
+            const __m128i t3 =
+                _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
+            const __m128i x  = _mm_or_si128(_mm_or_si128(t1, t2), t3);
             unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
-            if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
+            if(RAPIDJSON_UNLIKELY(r != 0))
+            { // some of characters is escaped
                 size_t length;
-#ifdef _MSC_VER         // Find the index of first escaped
+#ifdef _MSC_VER // Find the index of first escaped
                 unsigned long offset;
                 _BitScanForward(&offset, r);
                 length = offset;
 #else
                 length = static_cast<size_t>(__builtin_ffs(r) - 1);
 #endif
-                for (const char* pend = p + length; p != pend; )
+                for(const char* pend = p + length; p != pend;)
                     *q++ = *p++;
                 break;
             }
-            _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
+            _mm_storeu_si128(reinterpret_cast<__m128i*>(q), s);
         }
 
         is.src_ = p;
@@ -1186,36 +1441,88 @@ private:
     }
 
     // When read/write pointers are the same for insitu stream, just skip unescaped characters
-    static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
+    static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is)
+    {
         RAPIDJSON_ASSERT(is.src_ == is.dst_);
         char* p = is.src_;
 
         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
-        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
-        for (; p != nextAligned; p++)
-            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) &
+                                                                static_cast<size_t>(~15));
+        for(; p != nextAligned; p++)
+            if(RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') ||
+               RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20))
+            {
                 is.src_ = is.dst_ = p;
                 return;
             }
 
         // The rest of string using SIMD
-        static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
-        static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
-        static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
-        const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
-        const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
-        const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
+        static const char dquote[16] = {'\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"',
+                                        '\"'};
+        static const char bslash[16] = {'\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\',
+                                        '\\'};
+        static const char space[16]  = {0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F,
+                                        0x1F};
+        const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&dquote[0]));
+        const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&bslash[0]));
+        const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&space[0]));
 
-        for (;; p += 16) {
-            const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+        for(;; p += 16)
+        {
+            const __m128i s  = _mm_load_si128(reinterpret_cast<const __m128i*>(p));
             const __m128i t1 = _mm_cmpeq_epi8(s, dq);
             const __m128i t2 = _mm_cmpeq_epi8(s, bs);
-            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
-            const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
+            const __m128i t3 =
+                _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
+            const __m128i x  = _mm_or_si128(_mm_or_si128(t1, t2), t3);
             unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
-            if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
+            if(RAPIDJSON_UNLIKELY(r != 0))
+            { // some of characters is escaped
                 size_t length;
-#ifdef _MSC_VER         // Find the index of first escaped
+#ifdef _MSC_VER // Find the index of first escaped
                 unsigned long offset;
                 _BitScanForward(&offset, r);
                 length = offset;
@@ -1231,13 +1538,18 @@ private:
     }
 #elif defined(RAPIDJSON_NEON)
     // StringStream -> StackStream<char>
-    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is,
+                                                              StackStream<char>& os)
+    {
         const char* p = is.src_;
 
         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
-        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
-        while (p != nextAligned)
-            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) &
+                                                                static_cast<size_t>(~15));
+        while(p != nextAligned)
+            if(RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') ||
+               RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20))
+            {
                 is.src_ = p;
                 return;
             }
@@ -1250,63 +1562,76 @@ private:
         const uint8x16_t s2 = vmovq_n_u8('\b');
         const uint8x16_t s3 = vmovq_n_u8(32);
 
-        for (;; p += 16) {
-            const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
-            uint8x16_t x = vceqq_u8(s, s0);
-            x = vorrq_u8(x, vceqq_u8(s, s1));
-            x = vorrq_u8(x, vceqq_u8(s, s2));
-            x = vorrq_u8(x, vcltq_u8(s, s3));
+        for(;; p += 16)
+        {
+            const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t*>(p));
+            uint8x16_t x       = vceqq_u8(s, s0);
+            x                  = vorrq_u8(x, vceqq_u8(s, s1));
+            x                  = vorrq_u8(x, vceqq_u8(s, s2));
+            x                  = vorrq_u8(x, vcltq_u8(s, s3));
 
-            x = vrev64q_u8(x);                     // Rev in 64
-            uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
-            uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
+            x             = vrev64q_u8(x);                              // Rev in 64
+            uint64_t low  = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
+            uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
 
             SizeType length = 0;
-            bool escaped = false;
-            if (low == 0) {
-                if (high != 0) {
+            bool escaped    = false;
+            if(low == 0)
+            {
+                if(high != 0)
+                {
                     uint32_t lz = internal::clzll(high);
-                    length = 8 + (lz >> 3);
-                    escaped = true;
+                    length      = 8 + (lz >> 3);
+                    escaped     = true;
                 }
-            } else {
-                uint32_t lz = internal::clzll(low);
-                length = lz >> 3;
-                escaped = true;
             }
-            if (RAPIDJSON_UNLIKELY(escaped)) {   // some of characters is escaped
-                if (length != 0) {
+            else
+            {
+                uint32_t lz = internal::clzll(low);
+                length      = lz >> 3;
+                escaped     = true;
+            }
+            if(RAPIDJSON_UNLIKELY(escaped))
+            { // some of characters is escaped
+                if(length != 0)
+                {
                     char* q = reinterpret_cast<char*>(os.Push(length));
-                    for (size_t i = 0; i < length; i++)
+                    for(size_t i = 0; i < length; i++)
                         q[i] = p[i];
 
                     p += length;
                 }
                 break;
             }
-            vst1q_u8(reinterpret_cast<uint8_t *>(os.Push(16)), s);
+            vst1q_u8(reinterpret_cast<uint8_t*>(os.Push(16)), s);
         }
 
         is.src_ = p;
     }
 
     // InsituStringStream -> InsituStringStream
-    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is,
+                                                              InsituStringStream& os)
+    {
         RAPIDJSON_ASSERT(&is == &os);
         (void)os;
 
-        if (is.src_ == is.dst_) {
+        if(is.src_ == is.dst_)
+        {
             SkipUnescapedString(is);
             return;
         }
 
         char* p = is.src_;
-        char *q = is.dst_;
+        char* q = is.dst_;
 
         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
-        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
-        while (p != nextAligned)
-            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) &
+                                                                static_cast<size_t>(~15));
+        while(p != nextAligned)
+            if(RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') ||
+               RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20))
+            {
                 is.src_ = p;
                 is.dst_ = q;
                 return;
@@ -1320,37 +1645,44 @@ private:
         const uint8x16_t s2 = vmovq_n_u8('\b');
         const uint8x16_t s3 = vmovq_n_u8(32);
 
-        for (;; p += 16, q += 16) {
-            const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
-            uint8x16_t x = vceqq_u8(s, s0);
-            x = vorrq_u8(x, vceqq_u8(s, s1));
-            x = vorrq_u8(x, vceqq_u8(s, s2));
-            x = vorrq_u8(x, vcltq_u8(s, s3));
+        for(;; p += 16, q += 16)
+        {
+            const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t*>(p));
+            uint8x16_t x       = vceqq_u8(s, s0);
+            x                  = vorrq_u8(x, vceqq_u8(s, s1));
+            x                  = vorrq_u8(x, vceqq_u8(s, s2));
+            x                  = vorrq_u8(x, vcltq_u8(s, s3));
 
-            x = vrev64q_u8(x);                     // Rev in 64
-            uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
-            uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
+            x             = vrev64q_u8(x);                              // Rev in 64
+            uint64_t low  = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
+            uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
 
             SizeType length = 0;
-            bool escaped = false;
-            if (low == 0) {
-                if (high != 0) {
+            bool escaped    = false;
+            if(low == 0)
+            {
+                if(high != 0)
+                {
                     uint32_t lz = internal::clzll(high);
-                    length = 8 + (lz >> 3);
-                    escaped = true;
+                    length      = 8 + (lz >> 3);
+                    escaped     = true;
                 }
-            } else {
-                uint32_t lz = internal::clzll(low);
-                length = lz >> 3;
-                escaped = true;
             }
-            if (RAPIDJSON_UNLIKELY(escaped)) {   // some of characters is escaped
-                for (const char* pend = p + length; p != pend; ) {
+            else
+            {
+                uint32_t lz = internal::clzll(low);
+                length      = lz >> 3;
+                escaped     = true;
+            }
+            if(RAPIDJSON_UNLIKELY(escaped))
+            { // some of characters is escaped
+                for(const char* pend = p + length; p != pend;)
+                {
                     *q++ = *p++;
                 }
                 break;
             }
-            vst1q_u8(reinterpret_cast<uint8_t *>(q), s);
+            vst1q_u8(reinterpret_cast<uint8_t*>(q), s);
         }
 
         is.src_ = p;
@@ -1358,14 +1690,18 @@ private:
     }
 
     // When read/write pointers are the same for insitu stream, just skip unescaped characters
-    static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
+    static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is)
+    {
         RAPIDJSON_ASSERT(is.src_ == is.dst_);
         char* p = is.src_;
 
         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
-        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
-        for (; p != nextAligned; p++)
-            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) &
+                                                                static_cast<size_t>(~15));
+        for(; p != nextAligned; p++)
+            if(RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') ||
+               RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20))
+            {
                 is.src_ = is.dst_ = p;
                 return;
             }
@@ -1376,24 +1712,29 @@ private:
         const uint8x16_t s2 = vmovq_n_u8('\b');
         const uint8x16_t s3 = vmovq_n_u8(32);
 
-        for (;; p += 16) {
-            const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
-            uint8x16_t x = vceqq_u8(s, s0);
-            x = vorrq_u8(x, vceqq_u8(s, s1));
-            x = vorrq_u8(x, vceqq_u8(s, s2));
-            x = vorrq_u8(x, vcltq_u8(s, s3));
+        for(;; p += 16)
+        {
+            const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t*>(p));
+            uint8x16_t x       = vceqq_u8(s, s0);
+            x                  = vorrq_u8(x, vceqq_u8(s, s1));
+            x                  = vorrq_u8(x, vceqq_u8(s, s2));
+            x                  = vorrq_u8(x, vcltq_u8(s, s3));
 
-            x = vrev64q_u8(x);                     // Rev in 64
-            uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
-            uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
+            x             = vrev64q_u8(x);                              // Rev in 64
+            uint64_t low  = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
+            uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
 
-            if (low == 0) {
-                if (high != 0) {
+            if(low == 0)
+            {
+                if(high != 0)
+                {
                     uint32_t lz = internal::clzll(high);
                     p += 8 + (lz >> 3);
                     break;
                 }
-            } else {
+            }
+            else
+            {
                 uint32_t lz = internal::clzll(low);
                 p += lz >> 3;
                 break;
@@ -1404,15 +1745,16 @@ private:
     }
 #endif // RAPIDJSON_NEON
 
-    template<typename InputStream, typename StackCharacter, bool backup, bool pushOnTake>
+    template <typename InputStream, typename StackCharacter, bool backup, bool pushOnTake>
     class NumberStream;
 
-    template<typename InputStream, typename StackCharacter>
-    class NumberStream<InputStream, StackCharacter, false, false> {
-    public:
+    template <typename InputStream, typename StackCharacter>
+    class NumberStream<InputStream, StackCharacter, false, false>
+    {
+        public:
         typedef typename InputStream::Ch Ch;
 
-        NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader;  }
+        NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; }
 
         RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
         RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
@@ -1423,83 +1765,103 @@ private:
         size_t Length() { return 0; }
         const StackCharacter* Pop() { return 0; }
 
-    protected:
+        protected:
         NumberStream& operator=(const NumberStream&);
 
         InputStream& is;
     };
 
-    template<typename InputStream, typename StackCharacter>
-    class NumberStream<InputStream, StackCharacter, true, false> : public NumberStream<InputStream, StackCharacter, false, false> {
+    template <typename InputStream, typename StackCharacter>
+    class NumberStream<InputStream, StackCharacter, true, false>
+        : public NumberStream<InputStream, StackCharacter, false, false>
+    {
         typedef NumberStream<InputStream, StackCharacter, false, false> Base;
-    public:
-        NumberStream(GenericReader& reader, InputStream& s) : Base(reader, s), stackStream(reader.stack_) {}
 
-        RAPIDJSON_FORCEINLINE Ch TakePush() {
+        public:
+        NumberStream(GenericReader& reader, InputStream& s)
+            : Base(reader, s), stackStream(reader.stack_)
+        {
+        }
+
+        RAPIDJSON_FORCEINLINE Ch TakePush()
+        {
             stackStream.Put(static_cast<StackCharacter>(Base::is.Peek()));
             return Base::is.Take();
         }
 
-        RAPIDJSON_FORCEINLINE void Push(StackCharacter c) {
-            stackStream.Put(c);
-        }
+        RAPIDJSON_FORCEINLINE void Push(StackCharacter c) { stackStream.Put(c); }
 
         size_t Length() { return stackStream.Length(); }
 
-        const StackCharacter* Pop() {
+        const StackCharacter* Pop()
+        {
             stackStream.Put('\0');
             return stackStream.Pop();
         }
 
-    private:
+        private:
         StackStream<StackCharacter> stackStream;
     };
 
-    template<typename InputStream, typename StackCharacter>
-    class NumberStream<InputStream, StackCharacter, true, true> : public NumberStream<InputStream, StackCharacter, true, false> {
+    template <typename InputStream, typename StackCharacter>
+    class NumberStream<InputStream, StackCharacter, true, true>
+        : public NumberStream<InputStream, StackCharacter, true, false>
+    {
         typedef NumberStream<InputStream, StackCharacter, true, false> Base;
-    public:
+
+        public:
         NumberStream(GenericReader& reader, InputStream& s) : Base(reader, s) {}
 
         RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
     };
 
-    template<unsigned parseFlags, typename InputStream, typename Handler>
-    void ParseNumber(InputStream& is, Handler& handler) {
-        typedef typename internal::SelectIf<internal::BoolType<(parseFlags & kParseNumbersAsStringsFlag) != 0>, typename TargetEncoding::Ch, char>::Type NumberCharacter;
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseNumber(InputStream& is, Handler& handler)
+    {
+        typedef typename internal::SelectIf<
+            internal::BoolType<(parseFlags & kParseNumbersAsStringsFlag) != 0>,
+            typename TargetEncoding::Ch,
+            char>::Type NumberCharacter;
 
         internal::StreamLocalCopy<InputStream> copy(is);
-        NumberStream<InputStream, NumberCharacter,
-            ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
-                ((parseFlags & kParseInsituFlag) == 0) :
-                ((parseFlags & kParseFullPrecisionFlag) != 0),
-            (parseFlags & kParseNumbersAsStringsFlag) != 0 &&
-                (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s);
+        NumberStream<InputStream,
+                     NumberCharacter,
+                     ((parseFlags & kParseNumbersAsStringsFlag) != 0)
+                         ? ((parseFlags & kParseInsituFlag) == 0)
+                         : ((parseFlags & kParseFullPrecisionFlag) != 0),
+                     (parseFlags & kParseNumbersAsStringsFlag) != 0 &&
+                         (parseFlags & kParseInsituFlag) == 0>
+            s(*this, copy.s);
 
         size_t startOffset = s.Tell();
-        double d = 0.0;
-        bool useNanOrInf = false;
+        double d           = 0.0;
+        bool useNanOrInf   = false;
 
         // Parse minus
         bool minus = Consume(s, '-');
 
         // Parse int: zero / ( digit1-9 *DIGIT )
-        unsigned i = 0;
-        uint64_t i64 = 0;
-        bool use64bit = false;
+        unsigned i           = 0;
+        uint64_t i64         = 0;
+        bool use64bit        = false;
         int significandDigit = 0;
-        if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
+        if(RAPIDJSON_UNLIKELY(s.Peek() == '0'))
+        {
             i = 0;
             s.TakePush();
         }
-        else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
+        else if(RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9'))
+        {
             i = static_cast<unsigned>(s.TakePush() - '0');
 
-            if (minus)
-                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
-                    if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
-                        if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
-                            i64 = i;
+            if(minus)
+                while(RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9'))
+                {
+                    if(RAPIDJSON_UNLIKELY(i >= 214748364))
+                    { // 2^31 = 2147483648
+                        if(RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8'))
+                        {
+                            i64      = i;
                             use64bit = true;
                             break;
                         }
@@ -1508,10 +1870,13 @@ private:
                     significandDigit++;
                 }
             else
-                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
-                    if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
-                        if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
-                            i64 = i;
+                while(RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9'))
+                {
+                    if(RAPIDJSON_UNLIKELY(i >= 429496729))
+                    { // 2^32 - 1 = 4294967295
+                        if(RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5'))
+                        {
+                            i64      = i;
                             use64bit = true;
                             break;
                         }
@@ -1521,26 +1886,36 @@ private:
                 }
         }
         // Parse NaN or Infinity here
-        else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) {
-            if (Consume(s, 'N')) {
-                if (Consume(s, 'a') && Consume(s, 'N')) {
-                    d = std::numeric_limits<double>::quiet_NaN();
+        else if((parseFlags & kParseNanAndInfFlag) &&
+                RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N')))
+        {
+            if(Consume(s, 'N'))
+            {
+                if(Consume(s, 'a') && Consume(s, 'N'))
+                {
+                    d           = std::numeric_limits<double>::quiet_NaN();
                     useNanOrInf = true;
                 }
             }
-            else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) {
-                if (Consume(s, 'n') && Consume(s, 'f')) {
-                    d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity());
+            else if(RAPIDJSON_LIKELY(Consume(s, 'I')))
+            {
+                if(Consume(s, 'n') && Consume(s, 'f'))
+                {
+                    d           = (minus ? -std::numeric_limits<double>::infinity()
+                                         : std::numeric_limits<double>::infinity());
                     useNanOrInf = true;
 
-                    if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n')
-                                                                && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) {
+                    if(RAPIDJSON_UNLIKELY(s.Peek() == 'i' &&
+                                          !(Consume(s, 'i') && Consume(s, 'n') && Consume(s, 'i') &&
+                                            Consume(s, 't') && Consume(s, 'y'))))
+                    {
                         RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
                     }
                 }
             }
 
-            if (RAPIDJSON_UNLIKELY(!useNanOrInf)) {
+            if(RAPIDJSON_UNLIKELY(!useNanOrInf))
+            {
                 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
             }
         }
@@ -1549,12 +1924,18 @@ private:
 
         // Parse 64bit int
         bool useDouble = false;
-        if (use64bit) {
-            if (minus)
-                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
-                     if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
-                        if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) {
-                            d = static_cast<double>(i64);
+        if(use64bit)
+        {
+            if(minus)
+                while(RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9'))
+                {
+                    if(RAPIDJSON_UNLIKELY(
+                           i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC,
+                                                      0xCCCCCCCC))) // 2^63 = 9223372036854775808
+                        if(RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) ||
+                                            s.Peek() > '8'))
+                        {
+                            d         = static_cast<double>(i64);
                             useDouble = true;
                             break;
                         }
@@ -1562,10 +1943,15 @@ private:
                     significandDigit++;
                 }
             else
-                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
-                    if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
-                        if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) {
-                            d = static_cast<double>(i64);
+                while(RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9'))
+                {
+                    if(RAPIDJSON_UNLIKELY(
+                           i64 >= RAPIDJSON_UINT64_C2(
+                                      0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
+                        if(RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) ||
+                                            s.Peek() > '5'))
+                        {
+                            d         = static_cast<double>(i64);
                             useDouble = true;
                             break;
                         }
@@ -1575,8 +1961,10 @@ private:
         }
 
         // Force double for big integer
-        if (useDouble) {
-            while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+        if(useDouble)
+        {
+            while(RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9'))
+            {
                 d = d * 10 + (s.TakePush() - '0');
             }
         }
@@ -1584,25 +1972,29 @@ private:
         // Parse frac = decimal-point 1*DIGIT
         int expFrac = 0;
         size_t decimalPosition;
-        if (!useNanOrInf && Consume(s, '.')) {
+        if(!useNanOrInf && Consume(s, '.'))
+        {
             decimalPosition = s.Length();
 
-            if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
+            if(RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
                 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
 
-            if (!useDouble) {
+            if(!useDouble)
+            {
 #if RAPIDJSON_64BIT
                 // Use i64 to store significand in 64-bit architecture
-                if (!use64bit)
+                if(!use64bit)
                     i64 = i;
 
-                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
-                    if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
+                while(RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9'))
+                {
+                    if(i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
                         break;
-                    else {
+                    else
+                    {
                         i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
                         --expFrac;
-                        if (i64 != 0)
+                        if(i64 != 0)
                             significandDigit++;
                     }
                 }
@@ -1615,11 +2007,13 @@ private:
                 useDouble = true;
             }
 
-            while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
-                if (significandDigit < 17) {
+            while(RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9'))
+            {
+                if(significandDigit < 17)
+                {
                     d = d * 10.0 + (s.TakePush() - '0');
                     --expFrac;
-                    if (RAPIDJSON_LIKELY(d > 0.0))
+                    if(RAPIDJSON_LIKELY(d > 0.0))
                         significandDigit++;
                 }
                 else
@@ -1631,21 +2025,25 @@ private:
 
         // Parse exp = e [ minus / plus ] 1*DIGIT
         int exp = 0;
-        if (!useNanOrInf && (Consume(s, 'e') || Consume(s, 'E'))) {
-            if (!useDouble) {
-                d = static_cast<double>(use64bit ? i64 : i);
+        if(!useNanOrInf && (Consume(s, 'e') || Consume(s, 'E')))
+        {
+            if(!useDouble)
+            {
+                d         = static_cast<double>(use64bit ? i64 : i);
                 useDouble = true;
             }
 
             bool expMinus = false;
-            if (Consume(s, '+'))
+            if(Consume(s, '+'))
                 ;
-            else if (Consume(s, '-'))
+            else if(Consume(s, '-'))
                 expMinus = true;
 
-            if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+            if(RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9'))
+            {
                 exp = static_cast<int>(s.Take() - '0');
-                if (expMinus) {
+                if(expMinus)
+                {
                     // (exp + expFrac) must not underflow int => we're detecting when -exp gets
                     // dangerously close to INT_MIN (a pessimistic next digit 9 would push it into
                     // underflow territory):
@@ -1655,19 +2053,24 @@ private:
                     RAPIDJSON_ASSERT(expFrac <= 0);
                     int maxExp = (expFrac + 2147483639) / 10;
 
-                    while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                    while(RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9'))
+                    {
                         exp = exp * 10 + static_cast<int>(s.Take() - '0');
-                        if (RAPIDJSON_UNLIKELY(exp > maxExp)) {
-                            while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9'))  // Consume the rest of exponent
+                        if(RAPIDJSON_UNLIKELY(exp > maxExp))
+                        {
+                            while(RAPIDJSON_UNLIKELY(
+                                s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent
                                 s.Take();
                         }
                     }
                 }
-                else {  // positive exp
+                else
+                { // positive exp
                     int maxExp = 308 - expFrac;
-                    while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                    while(RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9'))
+                    {
                         exp = exp * 10 + static_cast<int>(s.Take() - '0');
-                        if (RAPIDJSON_UNLIKELY(exp > maxExp))
+                        if(RAPIDJSON_UNLIKELY(exp > maxExp))
                             RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
                     }
                 }
@@ -1675,99 +2078,113 @@ private:
             else
                 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
 
-            if (expMinus)
+            if(expMinus)
                 exp = -exp;
         }
 
         // Finish parsing, call event according to the type of number.
         bool cont = true;
 
-        if (parseFlags & kParseNumbersAsStringsFlag) {
-            if (parseFlags & kParseInsituFlag) {
-                s.Pop();  // Pop stack no matter if it will be used or not.
+        if(parseFlags & kParseNumbersAsStringsFlag)
+        {
+            if(parseFlags & kParseInsituFlag)
+            {
+                s.Pop(); // Pop stack no matter if it will be used or not.
                 typename InputStream::Ch* head = is.PutBegin();
-                const size_t length = s.Tell() - startOffset;
+                const size_t length            = s.Tell() - startOffset;
                 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
                 // unable to insert the \0 character here, it will erase the comma after this number
-                const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
+                const typename TargetEncoding::Ch* const str =
+                    reinterpret_cast<typename TargetEncoding::Ch*>(head);
                 cont = handler.RawNumber(str, SizeType(length), false);
             }
-            else {
+            else
+            {
                 SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
-                GenericStringStream<UTF8<NumberCharacter> > srcStream(s.Pop());
+                GenericStringStream<UTF8<NumberCharacter>> srcStream(s.Pop());
                 StackStream<typename TargetEncoding::Ch> dstStream(stack_);
-                while (numCharsToCopy--) {
-                    Transcoder<UTF8<typename TargetEncoding::Ch>, TargetEncoding>::Transcode(srcStream, dstStream);
+                while(numCharsToCopy--)
+                {
+                    Transcoder<UTF8<typename TargetEncoding::Ch>, TargetEncoding>::Transcode(
+                        srcStream, dstStream);
                 }
                 dstStream.Put('\0');
                 const typename TargetEncoding::Ch* str = dstStream.Pop();
                 const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1;
-                cont = handler.RawNumber(str, SizeType(length), true);
+                cont                  = handler.RawNumber(str, SizeType(length), true);
             }
         }
-        else {
-           size_t length = s.Length();
-           const NumberCharacter* decimal = s.Pop();  // Pop stack no matter if it will be used or not.
+        else
+        {
+            size_t length = s.Length();
+            const NumberCharacter* decimal =
+                s.Pop(); // Pop stack no matter if it will be used or not.
 
-           if (useDouble) {
-               int p = exp + expFrac;
-               if (parseFlags & kParseFullPrecisionFlag)
-                   d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
-               else
-                   d = internal::StrtodNormalPrecision(d, p);
+            if(useDouble)
+            {
+                int p = exp + expFrac;
+                if(parseFlags & kParseFullPrecisionFlag)
+                    d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
+                else
+                    d = internal::StrtodNormalPrecision(d, p);
 
-               // Use > max, instead of == inf, to fix bogus warning -Wfloat-equal
-               if (d > (std::numeric_limits<double>::max)()) {
-                   // Overflow
-                   // TODO: internal::StrtodX should report overflow (or underflow)
-                   RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
-               }
+                // Use > max, instead of == inf, to fix bogus warning -Wfloat-equal
+                if(d > (std::numeric_limits<double>::max)())
+                {
+                    // Overflow
+                    // TODO: internal::StrtodX should report overflow (or underflow)
+                    RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
+                }
 
-               cont = handler.Double(minus ? -d : d);
-           }
-           else if (useNanOrInf) {
-               cont = handler.Double(d);
-           }
-           else {
-               if (use64bit) {
-                   if (minus)
-                       cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
-                   else
-                       cont = handler.Uint64(i64);
-               }
-               else {
-                   if (minus)
-                       cont = handler.Int(static_cast<int32_t>(~i + 1));
-                   else
-                       cont = handler.Uint(i);
-               }
-           }
+                cont = handler.Double(minus ? -d : d);
+            }
+            else if(useNanOrInf)
+            {
+                cont = handler.Double(d);
+            }
+            else
+            {
+                if(use64bit)
+                {
+                    if(minus)
+                        cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
+                    else
+                        cont = handler.Uint64(i64);
+                }
+                else
+                {
+                    if(minus)
+                        cont = handler.Int(static_cast<int32_t>(~i + 1));
+                    else
+                        cont = handler.Uint(i);
+                }
+            }
         }
-        if (RAPIDJSON_UNLIKELY(!cont))
+        if(RAPIDJSON_UNLIKELY(!cont))
             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset);
     }
 
     // Parse any JSON value
-    template<unsigned parseFlags, typename InputStream, typename Handler>
-    void ParseValue(InputStream& is, Handler& handler) {
-        switch (is.Peek()) {
-            case 'n': ParseNull  <parseFlags>(is, handler); break;
-            case 't': ParseTrue  <parseFlags>(is, handler); break;
-            case 'f': ParseFalse <parseFlags>(is, handler); break;
-            case '"': ParseString<parseFlags>(is, handler); break;
-            case '{': ParseObject<parseFlags>(is, handler); break;
-            case '[': ParseArray <parseFlags>(is, handler); break;
-            default :
-                      ParseNumber<parseFlags>(is, handler);
-                      break;
-
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseValue(InputStream& is, Handler& handler)
+    {
+        switch(is.Peek())
+        {
+        case 'n': ParseNull<parseFlags>(is, handler); break;
+        case 't': ParseTrue<parseFlags>(is, handler); break;
+        case 'f': ParseFalse<parseFlags>(is, handler); break;
+        case '"': ParseString<parseFlags>(is, handler); break;
+        case '{': ParseObject<parseFlags>(is, handler); break;
+        case '[': ParseArray<parseFlags>(is, handler); break;
+        default: ParseNumber<parseFlags>(is, handler); break;
         }
     }
 
     // Iterative Parsing
 
     // States
-    enum IterativeParsingState {
+    enum IterativeParsingState
+    {
         IterativeParsingFinishState = 0, // sink states at top
         IterativeParsingErrorState,      // sink states at top
         IterativeParsingStartState,
@@ -1795,7 +2212,8 @@ private:
     };
 
     // Tokens
-    enum Token {
+    enum Token
+    {
         LeftBracketToken = 0,
         RightBracketToken,
 
@@ -1814,48 +2232,101 @@ private:
         kTokenCount
     };
 
-    RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) const {
+    RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) const
+    {
 
 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
 #define N NumberToken
-#define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
+#define N16 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
         // Maps from ASCII to Token
         static const unsigned char tokenMap[256] = {
             N16, // 00~0F
             N16, // 10~1F
-            N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
-            N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
-            N16, // 40~4F
-            N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
-            N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
-            N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
-            N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
+            N,           N,
+            StringToken, N,
+            N,           N,
+            N,           N,
+            N,           N,
+            N,           N,
+            CommaToken,  N,
+            N,           N, // 20~2F
+            N,           N,
+            N,           N,
+            N,           N,
+            N,           N,
+            N,           N,
+            ColonToken,  N,
+            N,           N,
+            N,           N, // 30~3F
+            N16,            // 40~4F
+            N,           N,
+            N,           N,
+            N,           N,
+            N,           N,
+            N,           N,
+            N,           LeftBracketToken,
+            N,           RightBracketToken,
+            N,           N, // 50~5F
+            N,           N,
+            N,           N,
+            N,           N,
+            FalseToken,  N,
+            N,           N,
+            N,           N,
+            N,           N,
+            NullToken,   N, // 60~6F
+            N,           N,
+            N,           N,
+            TrueToken,   N,
+            N,           N,
+            N,           N,
+            N,           LeftCurlyBracketToken,
+            N,           RightCurlyBracketToken,
+            N,           N, // 70~7F
+            N16,         N16,
+            N16,         N16,
+            N16,         N16,
+            N16,         N16 // 80~FF
         };
 #undef N
 #undef N16
-//!@endcond
+        //!@endcond
 
-        if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
+        if(sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
             return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
         else
             return NumberToken;
     }
 
-    RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) const {
+    RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state,
+                                                        Token token) const
+    {
         // current state x one lookahead token -> new state
         static const char G[cIterativeParsingStateCount][kTokenCount] = {
             // Finish(sink state)
-            {
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState
-            },
+            {IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState},
             // Error(sink state)
-            {
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState
-            },
+            {IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState},
             // Start
             {
                 IterativeParsingArrayInitialState,  // Left bracket
@@ -1872,17 +2343,17 @@ private:
             },
             // ObjectInitial
             {
-                IterativeParsingErrorState,         // Left bracket
-                IterativeParsingErrorState,         // Right bracket
-                IterativeParsingErrorState,         // Left curly bracket
-                IterativeParsingObjectFinishState,  // Right curly bracket
-                IterativeParsingErrorState,         // Comma
-                IterativeParsingErrorState,         // Colon
-                IterativeParsingMemberKeyState,     // String
-                IterativeParsingErrorState,         // False
-                IterativeParsingErrorState,         // True
-                IterativeParsingErrorState,         // Null
-                IterativeParsingErrorState          // Number
+                IterativeParsingErrorState,        // Left bracket
+                IterativeParsingErrorState,        // Right bracket
+                IterativeParsingErrorState,        // Left curly bracket
+                IterativeParsingObjectFinishState, // Right curly bracket
+                IterativeParsingErrorState,        // Comma
+                IterativeParsingErrorState,        // Colon
+                IterativeParsingMemberKeyState,    // String
+                IterativeParsingErrorState,        // False
+                IterativeParsingErrorState,        // True
+                IterativeParsingErrorState,        // Null
+                IterativeParsingErrorState         // Number
             },
             // MemberKey
             {
@@ -1900,143 +2371,170 @@ private:
             },
             // MemberValue
             {
-                IterativeParsingErrorState,             // Left bracket
-                IterativeParsingErrorState,             // Right bracket
-                IterativeParsingErrorState,             // Left curly bracket
-                IterativeParsingObjectFinishState,      // Right curly bracket
-                IterativeParsingMemberDelimiterState,   // Comma
-                IterativeParsingErrorState,             // Colon
-                IterativeParsingErrorState,             // String
-                IterativeParsingErrorState,             // False
-                IterativeParsingErrorState,             // True
-                IterativeParsingErrorState,             // Null
-                IterativeParsingErrorState              // Number
+                IterativeParsingErrorState,           // Left bracket
+                IterativeParsingErrorState,           // Right bracket
+                IterativeParsingErrorState,           // Left curly bracket
+                IterativeParsingObjectFinishState,    // Right curly bracket
+                IterativeParsingMemberDelimiterState, // Comma
+                IterativeParsingErrorState,           // Colon
+                IterativeParsingErrorState,           // String
+                IterativeParsingErrorState,           // False
+                IterativeParsingErrorState,           // True
+                IterativeParsingErrorState,           // Null
+                IterativeParsingErrorState            // Number
             },
             // ObjectFinish(sink state)
-            {
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState
-            },
+            {IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState},
             // ArrayInitial
             {
-                IterativeParsingArrayInitialState,      // Left bracket(push Element state)
-                IterativeParsingArrayFinishState,       // Right bracket
-                IterativeParsingObjectInitialState,     // Left curly bracket(push Element state)
-                IterativeParsingErrorState,             // Right curly bracket
-                IterativeParsingErrorState,             // Comma
-                IterativeParsingErrorState,             // Colon
-                IterativeParsingElementState,           // String
-                IterativeParsingElementState,           // False
-                IterativeParsingElementState,           // True
-                IterativeParsingElementState,           // Null
-                IterativeParsingElementState            // Number
+                IterativeParsingArrayInitialState,  // Left bracket(push Element state)
+                IterativeParsingArrayFinishState,   // Right bracket
+                IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
+                IterativeParsingErrorState,         // Right curly bracket
+                IterativeParsingErrorState,         // Comma
+                IterativeParsingErrorState,         // Colon
+                IterativeParsingElementState,       // String
+                IterativeParsingElementState,       // False
+                IterativeParsingElementState,       // True
+                IterativeParsingElementState,       // Null
+                IterativeParsingElementState        // Number
             },
             // Element
             {
-                IterativeParsingErrorState,             // Left bracket
-                IterativeParsingArrayFinishState,       // Right bracket
-                IterativeParsingErrorState,             // Left curly bracket
-                IterativeParsingErrorState,             // Right curly bracket
-                IterativeParsingElementDelimiterState,  // Comma
-                IterativeParsingErrorState,             // Colon
-                IterativeParsingErrorState,             // String
-                IterativeParsingErrorState,             // False
-                IterativeParsingErrorState,             // True
-                IterativeParsingErrorState,             // Null
-                IterativeParsingErrorState              // Number
+                IterativeParsingErrorState,            // Left bracket
+                IterativeParsingArrayFinishState,      // Right bracket
+                IterativeParsingErrorState,            // Left curly bracket
+                IterativeParsingErrorState,            // Right curly bracket
+                IterativeParsingElementDelimiterState, // Comma
+                IterativeParsingErrorState,            // Colon
+                IterativeParsingErrorState,            // String
+                IterativeParsingErrorState,            // False
+                IterativeParsingErrorState,            // True
+                IterativeParsingErrorState,            // Null
+                IterativeParsingErrorState             // Number
             },
             // ArrayFinish(sink state)
-            {
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState
-            },
+            {IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState},
             // Single Value (sink state)
-            {
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState
-            },
+            {IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState,
+             IterativeParsingErrorState},
             // ElementDelimiter
             {
-                IterativeParsingArrayInitialState,      // Left bracket(push Element state)
-                IterativeParsingArrayFinishState,       // Right bracket
-                IterativeParsingObjectInitialState,     // Left curly bracket(push Element state)
-                IterativeParsingErrorState,             // Right curly bracket
-                IterativeParsingErrorState,             // Comma
-                IterativeParsingErrorState,             // Colon
-                IterativeParsingElementState,           // String
-                IterativeParsingElementState,           // False
-                IterativeParsingElementState,           // True
-                IterativeParsingElementState,           // Null
-                IterativeParsingElementState            // Number
+                IterativeParsingArrayInitialState,  // Left bracket(push Element state)
+                IterativeParsingArrayFinishState,   // Right bracket
+                IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
+                IterativeParsingErrorState,         // Right curly bracket
+                IterativeParsingErrorState,         // Comma
+                IterativeParsingErrorState,         // Colon
+                IterativeParsingElementState,       // String
+                IterativeParsingElementState,       // False
+                IterativeParsingElementState,       // True
+                IterativeParsingElementState,       // Null
+                IterativeParsingElementState        // Number
             },
             // MemberDelimiter
             {
-                IterativeParsingErrorState,         // Left bracket
-                IterativeParsingErrorState,         // Right bracket
-                IterativeParsingErrorState,         // Left curly bracket
-                IterativeParsingObjectFinishState,  // Right curly bracket
-                IterativeParsingErrorState,         // Comma
-                IterativeParsingErrorState,         // Colon
-                IterativeParsingMemberKeyState,     // String
-                IterativeParsingErrorState,         // False
-                IterativeParsingErrorState,         // True
-                IterativeParsingErrorState,         // Null
-                IterativeParsingErrorState          // Number
+                IterativeParsingErrorState,        // Left bracket
+                IterativeParsingErrorState,        // Right bracket
+                IterativeParsingErrorState,        // Left curly bracket
+                IterativeParsingObjectFinishState, // Right curly bracket
+                IterativeParsingErrorState,        // Comma
+                IterativeParsingErrorState,        // Colon
+                IterativeParsingMemberKeyState,    // String
+                IterativeParsingErrorState,        // False
+                IterativeParsingErrorState,        // True
+                IterativeParsingErrorState,        // Null
+                IterativeParsingErrorState         // Number
             },
             // KeyValueDelimiter
             {
-                IterativeParsingArrayInitialState,      // Left bracket(push MemberValue state)
-                IterativeParsingErrorState,             // Right bracket
-                IterativeParsingObjectInitialState,     // Left curly bracket(push MemberValue state)
-                IterativeParsingErrorState,             // Right curly bracket
-                IterativeParsingErrorState,             // Comma
-                IterativeParsingErrorState,             // Colon
-                IterativeParsingMemberValueState,       // String
-                IterativeParsingMemberValueState,       // False
-                IterativeParsingMemberValueState,       // True
-                IterativeParsingMemberValueState,       // Null
-                IterativeParsingMemberValueState        // Number
+                IterativeParsingArrayInitialState,  // Left bracket(push MemberValue state)
+                IterativeParsingErrorState,         // Right bracket
+                IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
+                IterativeParsingErrorState,         // Right curly bracket
+                IterativeParsingErrorState,         // Comma
+                IterativeParsingErrorState,         // Colon
+                IterativeParsingMemberValueState,   // String
+                IterativeParsingMemberValueState,   // False
+                IterativeParsingMemberValueState,   // True
+                IterativeParsingMemberValueState,   // Null
+                IterativeParsingMemberValueState    // Number
             },
         }; // End of G
 
         return static_cast<IterativeParsingState>(G[state][token]);
     }
 
-    // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
-    // May return a new state on state pop.
+    // Make an advance in the token stream and state based on the candidate destination state which
+    // was returned by Transit(). May return a new state on state pop.
     template <unsigned parseFlags, typename InputStream, typename Handler>
-    RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
+    RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src,
+                                                        Token token,
+                                                        IterativeParsingState dst,
+                                                        InputStream& is,
+                                                        Handler& handler)
+    {
         (void)token;
 
-        switch (dst) {
-        case IterativeParsingErrorState:
-            return dst;
+        switch(dst)
+        {
+        case IterativeParsingErrorState: return dst;
 
         case IterativeParsingObjectInitialState:
-        case IterativeParsingArrayInitialState:
-        {
-            // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
-            // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
+        case IterativeParsingArrayInitialState: {
+            // Push the state(Element or MemeberValue) if we are nested in another array or value of
+            // member. In this way we can get the correct state on ObjectFinish or ArrayFinish by
+            // frame pop.
             IterativeParsingState n = src;
-            if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
+            if(src == IterativeParsingArrayInitialState ||
+               src == IterativeParsingElementDelimiterState)
                 n = IterativeParsingElementState;
-            else if (src == IterativeParsingKeyValueDelimiterState)
+            else if(src == IterativeParsingKeyValueDelimiterState)
                 n = IterativeParsingMemberValueState;
             // Push current state.
             *stack_.template Push<SizeType>(1) = n;
             // Initialize and push the member/element count.
             *stack_.template Push<SizeType>(1) = 0;
             // Call handler
-            bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
+            bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject()
+                                                                  : handler.StartArray();
             // On handler short circuits the parsing.
-            if (!hr) {
+            if(!hr)
+            {
                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
                 return IterativeParsingErrorState;
             }
-            else {
+            else
+            {
                 is.Take();
                 return dst;
             }
@@ -2044,7 +2542,7 @@ private:
 
         case IterativeParsingMemberKeyState:
             ParseString<parseFlags>(is, handler, true);
-            if (HasParseError())
+            if(HasParseError())
                 return IterativeParsingErrorState;
             else
                 return dst;
@@ -2057,7 +2555,8 @@ private:
         case IterativeParsingMemberValueState:
             // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
             ParseValue<parseFlags>(is, handler);
-            if (HasParseError()) {
+            if(HasParseError())
+            {
                 return IterativeParsingErrorState;
             }
             return dst;
@@ -2065,7 +2564,8 @@ private:
         case IterativeParsingElementState:
             // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
             ParseValue<parseFlags>(is, handler);
-            if (HasParseError()) {
+            if(HasParseError())
+            {
                 return IterativeParsingErrorState;
             }
             return dst;
@@ -2077,61 +2577,69 @@ private:
             *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
             return dst;
 
-        case IterativeParsingObjectFinishState:
-        {
+        case IterativeParsingObjectFinishState: {
             // Transit from delimiter is only allowed when trailing commas are enabled
-            if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) {
+            if(!(parseFlags & kParseTrailingCommasFlag) &&
+               src == IterativeParsingMemberDelimiterState)
+            {
                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell());
                 return IterativeParsingErrorState;
             }
             // Get member count.
             SizeType c = *stack_.template Pop<SizeType>(1);
             // If the object is not empty, count the last member.
-            if (src == IterativeParsingMemberValueState)
+            if(src == IterativeParsingMemberValueState)
                 ++c;
             // Restore the state.
-            IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
+            IterativeParsingState n =
+                static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
             // Transit to Finish state if this is the topmost scope.
-            if (n == IterativeParsingStartState)
+            if(n == IterativeParsingStartState)
                 n = IterativeParsingFinishState;
             // Call handler
             bool hr = handler.EndObject(c);
             // On handler short circuits the parsing.
-            if (!hr) {
+            if(!hr)
+            {
                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
                 return IterativeParsingErrorState;
             }
-            else {
+            else
+            {
                 is.Take();
                 return n;
             }
         }
 
-        case IterativeParsingArrayFinishState:
-        {
+        case IterativeParsingArrayFinishState: {
             // Transit from delimiter is only allowed when trailing commas are enabled
-            if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) {
+            if(!(parseFlags & kParseTrailingCommasFlag) &&
+               src == IterativeParsingElementDelimiterState)
+            {
                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell());
                 return IterativeParsingErrorState;
             }
             // Get element count.
             SizeType c = *stack_.template Pop<SizeType>(1);
             // If the array is not empty, count the last element.
-            if (src == IterativeParsingElementState)
+            if(src == IterativeParsingElementState)
                 ++c;
             // Restore the state.
-            IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
+            IterativeParsingState n =
+                static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
             // Transit to Finish state if this is the topmost scope.
-            if (n == IterativeParsingStartState)
+            if(n == IterativeParsingStartState)
                 n = IterativeParsingFinishState;
             // Call handler
             bool hr = handler.EndArray(c);
             // On handler short circuits the parsing.
-            if (!hr) {
+            if(!hr)
+            {
                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
                 return IterativeParsingErrorState;
             }
-            else {
+            else
+            {
                 is.Take();
                 return n;
             }
@@ -2152,7 +2660,8 @@ private:
 
             // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
             ParseValue<parseFlags>(is, handler);
-            if (HasParseError()) {
+            if(HasParseError())
+            {
                 return IterativeParsingErrorState;
             }
             return IterativeParsingFinishState;
@@ -2160,48 +2669,71 @@ private:
     }
 
     template <typename InputStream>
-    void HandleError(IterativeParsingState src, InputStream& is) {
-        if (HasParseError()) {
+    void HandleError(IterativeParsingState src, InputStream& is)
+    {
+        if(HasParseError())
+        {
             // Error flag has been set.
             return;
         }
 
-        switch (src) {
-        case IterativeParsingStartState:            RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
-        case IterativeParsingFinishState:           RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
+        switch(src)
+        {
+        case IterativeParsingStartState:
+            RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell());
+            return;
+        case IterativeParsingFinishState:
+            RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell());
+            return;
         case IterativeParsingObjectInitialState:
-        case IterativeParsingMemberDelimiterState:  RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
-        case IterativeParsingMemberKeyState:        RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
-        case IterativeParsingMemberValueState:      RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
+        case IterativeParsingMemberDelimiterState:
+            RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
+            return;
+        case IterativeParsingMemberKeyState:
+            RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
+            return;
+        case IterativeParsingMemberValueState:
+            RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
+            return;
         case IterativeParsingKeyValueDelimiterState:
         case IterativeParsingArrayInitialState:
-        case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return;
-        default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
+        case IterativeParsingElementDelimiterState:
+            RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
+            return;
+        default:
+            RAPIDJSON_ASSERT(src == IterativeParsingElementState);
+            RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
+            return;
         }
     }
 
-    RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) const {
+    RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) const
+    {
         return s >= IterativeParsingElementDelimiterState;
     }
 
-    RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) const {
+    RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) const
+    {
         return s <= IterativeParsingErrorState;
     }
 
     template <unsigned parseFlags, typename InputStream, typename Handler>
-    ParseResult IterativeParse(InputStream& is, Handler& handler) {
+    ParseResult IterativeParse(InputStream& is, Handler& handler)
+    {
         parseResult_.Clear();
         ClearStackOnExit scope(*this);
         IterativeParsingState state = IterativeParsingStartState;
 
         SkipWhitespaceAndComments<parseFlags>(is);
         RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
-        while (is.Peek() != '\0') {
-            Token t = Tokenize(is.Peek());
+        while(is.Peek() != '\0')
+        {
+            Token t                 = Tokenize(is.Peek());
             IterativeParsingState n = Predict(state, t);
             IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
 
-            if (d == IterativeParsingErrorState) {
+            if(d == IterativeParsingErrorState)
+            {
                 HandleError(state, is);
                 break;
             }
@@ -2209,7 +2741,7 @@ private:
             state = d;
 
             // Do not further consume streams if a root JSON has been parsed.
-            if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
+            if((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
                 break;
 
             SkipWhitespaceAndComments<parseFlags>(is);
@@ -2217,20 +2749,22 @@ private:
         }
 
         // Handle the end of file.
-        if (state != IterativeParsingFinishState)
+        if(state != IterativeParsingFinishState)
             HandleError(state, is);
 
         return parseResult_;
     }
 
-    static const size_t kDefaultStackCapacity = 256;    //!< Default stack capacity in bytes for storing a single decoded string.
-    internal::Stack<StackAllocator> stack_;  //!< A stack for storing decoded string temporarily during non-destructive parsing.
+    static const size_t kDefaultStackCapacity =
+        256; //!< Default stack capacity in bytes for storing a single decoded string.
+    internal::Stack<StackAllocator>
+        stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
     ParseResult parseResult_;
     IterativeParsingState state_;
 }; // class GenericReader
 
 //! Reader with UTF8 encoding and default allocator.
-typedef GenericReader<UTF8<>, UTF8<> > Reader;
+typedef GenericReader<UTF8<>, UTF8<>> Reader;
 
 RAPIDJSON_NAMESPACE_END
 
@@ -2238,7 +2772,6 @@ RAPIDJSON_NAMESPACE_END
 RAPIDJSON_DIAG_POP
 #endif
 
-
 #ifdef __GNUC__
 RAPIDJSON_DIAG_POP
 #endif
diff --git a/include/rapidjson/schema.h b/include/rapidjson/schema.h
index f049285f4e..8a542afc66 100644
--- a/include/rapidjson/schema.h
+++ b/include/rapidjson/schema.h
@@ -26,7 +26,8 @@
 #define RAPIDJSON_SCHEMA_USE_INTERNALREGEX 1
 #endif
 
-#if !defined(RAPIDJSON_SCHEMA_USE_STDREGEX) || !(__cplusplus >=201103L || (defined(_MSC_VER) && _MSC_VER >= 1800))
+#if !defined(RAPIDJSON_SCHEMA_USE_STDREGEX) || \
+    !(__cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800))
 #define RAPIDJSON_SCHEMA_USE_STDREGEX 0
 #endif
 
@@ -53,10 +54,10 @@ RAPIDJSON_DIAG_OFF(effc++)
 #endif
 
 #ifdef __clang__
-RAPIDJSON_DIAG_OFF(weak-vtables)
-RAPIDJSON_DIAG_OFF(exit-time-destructors)
-RAPIDJSON_DIAG_OFF(c++98-compat-pedantic)
-RAPIDJSON_DIAG_OFF(variadic-macros)
+RAPIDJSON_DIAG_OFF(weak - vtables)
+RAPIDJSON_DIAG_OFF(exit - time - destructors)
+RAPIDJSON_DIAG_OFF(c++ 98 - compat - pedantic)
+RAPIDJSON_DIAG_OFF(variadic - macros)
 #elif defined(_MSC_VER)
 RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
 #endif
@@ -70,71 +71,86 @@ RAPIDJSON_NAMESPACE_BEGIN
 
 namespace internal {
 
-inline void PrintInvalidKeywordData(const char* keyword) {
+inline void PrintInvalidKeywordData(const char* keyword)
+{
     printf("    Fail keyword: '%s'\n", keyword);
 }
 
-inline void PrintInvalidKeywordData(const wchar_t* keyword) {
+inline void PrintInvalidKeywordData(const wchar_t* keyword)
+{
     wprintf(L"    Fail keyword: '%ls'\n", keyword);
 }
 
-inline void PrintInvalidDocumentData(const char* document) {
+inline void PrintInvalidDocumentData(const char* document)
+{
     printf("    Fail document: '%s'\n", document);
 }
 
-inline void PrintInvalidDocumentData(const wchar_t* document) {
+inline void PrintInvalidDocumentData(const wchar_t* document)
+{
     wprintf(L"    Fail document: '%ls'\n", document);
 }
 
-inline void PrintValidatorPointersData(const char* s, const char* d, unsigned depth) {
+inline void PrintValidatorPointersData(const char* s, const char* d, unsigned depth)
+{
     printf("    Sch: %*s'%s'\n    Doc: %*s'%s'\n", depth * 4, " ", s, depth * 4, " ", d);
 }
 
-inline void PrintValidatorPointersData(const wchar_t* s, const wchar_t* d, unsigned depth) {
+inline void PrintValidatorPointersData(const wchar_t* s, const wchar_t* d, unsigned depth)
+{
     wprintf(L"    Sch: %*ls'%ls'\n    Doc: %*ls'%ls'\n", depth * 4, L" ", s, depth * 4, L" ", d);
 }
 
-inline void PrintSchemaIdsData(const char* base, const char* local, const char* resolved) {
+inline void PrintSchemaIdsData(const char* base, const char* local, const char* resolved)
+{
     printf("    Resolving id: Base: '%s', Local: '%s', Resolved: '%s'\n", base, local, resolved);
 }
 
-inline void PrintSchemaIdsData(const wchar_t* base, const wchar_t* local, const wchar_t* resolved) {
-    wprintf(L"    Resolving id: Base: '%ls', Local: '%ls', Resolved: '%ls'\n", base, local, resolved);
+inline void PrintSchemaIdsData(const wchar_t* base, const wchar_t* local, const wchar_t* resolved)
+{
+    wprintf(
+        L"    Resolving id: Base: '%ls', Local: '%ls', Resolved: '%ls'\n", base, local, resolved);
 }
 
-inline void PrintMethodData(const char* method) {
-    printf("%s\n", method);
-}
+inline void PrintMethodData(const char* method) { printf("%s\n", method); }
 
-inline void PrintMethodData(const char* method, bool b) {
+inline void PrintMethodData(const char* method, bool b)
+{
     printf("%s, Data: '%s'\n", method, b ? "true" : "false");
 }
 
-inline void PrintMethodData(const char* method, int64_t i) {
+inline void PrintMethodData(const char* method, int64_t i)
+{
     printf("%s, Data: '%" PRId64 "'\n", method, i);
 }
 
-inline void PrintMethodData(const char* method, uint64_t u) {
+inline void PrintMethodData(const char* method, uint64_t u)
+{
     printf("%s, Data: '%" PRIu64 "'\n", method, u);
 }
 
-inline void PrintMethodData(const char* method, double d) {
+inline void PrintMethodData(const char* method, double d)
+{
     printf("%s, Data: '%lf'\n", method, d);
 }
 
-inline void PrintMethodData(const char* method, const char* s) {
+inline void PrintMethodData(const char* method, const char* s)
+{
     printf("%s, Data: '%s'\n", method, s);
 }
 
-inline void PrintMethodData(const char* method, const wchar_t* s) {
+inline void PrintMethodData(const char* method, const wchar_t* s)
+{
     wprintf(L"%hs, Data: '%ls'\n", method, s);
 }
 
-inline void PrintMethodData(const char* method, const char* s1, const char* s2) {
+inline void PrintMethodData(const char* method, const char* s1, const char* s2)
+{
     printf("%s, Data: '%s', '%s'\n", method, s1, s2);
 }
 
-inline void PrintMethodData(const char* method, const wchar_t* s1, const wchar_t* s2) {
+inline void PrintMethodData(const char* method, const wchar_t* s1, const wchar_t* s2)
+{
     wprintf(L"%hs, Data: '%ls', '%ls'\n", method, s1, s2);
 }
 
@@ -153,13 +169,13 @@ inline void PrintMethodData(const char* method, const wchar_t* s1, const wchar_t
 ///////////////////////////////////////////////////////////////////////////////
 // RAPIDJSON_INVALID_KEYWORD_RETURN
 
-#define RAPIDJSON_INVALID_KEYWORD_RETURN(code)\
-RAPIDJSON_MULTILINEMACRO_BEGIN\
-    context.invalidCode = code;\
-    context.invalidKeyword = SchemaType::GetValidateErrorKeyword(code).GetString();\
-    RAPIDJSON_SCHEMA_PRINT(InvalidKeyword, context.invalidKeyword);\
-    return false;\
-RAPIDJSON_MULTILINEMACRO_END
+#define RAPIDJSON_INVALID_KEYWORD_RETURN(code)                                      \
+    RAPIDJSON_MULTILINEMACRO_BEGIN                                                  \
+    context.invalidCode    = code;                                                  \
+    context.invalidKeyword = SchemaType::GetValidateErrorKeyword(code).GetString(); \
+    RAPIDJSON_SCHEMA_PRINT(InvalidKeyword, context.invalidKeyword);                 \
+    return false;                                                                   \
+    RAPIDJSON_MULTILINEMACRO_END
 
 ///////////////////////////////////////////////////////////////////////////////
 // ValidateFlag
@@ -175,51 +191,64 @@ RAPIDJSON_MULTILINEMACRO_END
 #endif
 
 //! Combination of validate flags
-enum ValidateFlag {
-    kValidateNoFlags = 0,                                       //!< No flags are set.
-    kValidateContinueOnErrorFlag = 1,                           //!< Don't stop after first validation error.
-    kValidateReadFlag = 2,                                      //!< Validation is for a read semantic.
-    kValidateWriteFlag = 4,                                     //!< Validation is for a write semantic.
-    kValidateDefaultFlags = RAPIDJSON_VALIDATE_DEFAULT_FLAGS    //!< Default validate flags. Can be customized by defining RAPIDJSON_VALIDATE_DEFAULT_FLAGS
+enum ValidateFlag
+{
+    kValidateNoFlags             = 0, //!< No flags are set.
+    kValidateContinueOnErrorFlag = 1, //!< Don't stop after first validation error.
+    kValidateReadFlag            = 2, //!< Validation is for a read semantic.
+    kValidateWriteFlag           = 4, //!< Validation is for a write semantic.
+    kValidateDefaultFlags =
+        RAPIDJSON_VALIDATE_DEFAULT_FLAGS //!< Default validate flags. Can be customized by defining
+                                         //!< RAPIDJSON_VALIDATE_DEFAULT_FLAGS
 };
 
 ///////////////////////////////////////////////////////////////////////////////
 // Specification
-enum SchemaDraft {
+enum SchemaDraft
+{
     kDraftUnknown = -1,
-    kDraftNone = 0,
-    kDraft03 = 3,
-    kDraftMin = 4,                       //!< Current minimum supported draft
-    kDraft04 = 4,
-    kDraft05 = 5,
-    kDraftMax = 5,                       //!< Current maximum supported draft
-    kDraft06 = 6,
-    kDraft07 = 7,
+    kDraftNone    = 0,
+    kDraft03      = 3,
+    kDraftMin     = 4, //!< Current minimum supported draft
+    kDraft04      = 4,
+    kDraft05      = 5,
+    kDraftMax     = 5, //!< Current maximum supported draft
+    kDraft06      = 6,
+    kDraft07      = 7,
     kDraft2019_09 = 8,
     kDraft2020_12 = 9
 };
 
-enum OpenApiVersion {
+enum OpenApiVersion
+{
     kVersionUnknown = -1,
-    kVersionNone = 0,
-    kVersionMin = 2,                      //!< Current minimum supported version
-    kVersion20 = 2,
-    kVersion30 = 3,
-    kVersionMax = 3,                      //!< Current maximum supported version
-    kVersion31 = 4,
+    kVersionNone    = 0,
+    kVersionMin     = 2, //!< Current minimum supported version
+    kVersion20      = 2,
+    kVersion30      = 3,
+    kVersionMax     = 3, //!< Current maximum supported version
+    kVersion31      = 4,
 };
 
-struct Specification {
+struct Specification
+{
     Specification(SchemaDraft d) : draft(d), oapi(kVersionNone) {}
-    Specification(OpenApiVersion o) : oapi(o) {
-        if (oapi == kVersion20) draft = kDraft04;
-        else if (oapi == kVersion30) draft = kDraft05;
-        else if (oapi == kVersion31) draft = kDraft2020_12;
-        else draft = kDraft04;
+    Specification(OpenApiVersion o) : oapi(o)
+    {
+        if(oapi == kVersion20)
+            draft = kDraft04;
+        else if(oapi == kVersion30)
+            draft = kDraft05;
+        else if(oapi == kVersion31)
+            draft = kDraft2020_12;
+        else
+            draft = kDraft04;
     }
     ~Specification() {}
-    bool IsSupported() const {
-        return ((draft >= kDraftMin && draft <= kDraftMax) && ((oapi == kVersionNone) || (oapi >= kVersionMin && oapi <= kVersionMax)));
+    bool IsSupported() const
+    {
+        return ((draft >= kDraftMin && draft <= kDraftMax) &&
+                ((oapi == kVersionNone) || (oapi >= kVersionMin && oapi <= kVersionMax)));
     }
     SchemaDraft draft;
     OpenApiVersion oapi;
@@ -239,142 +268,182 @@ class Schema;
 ///////////////////////////////////////////////////////////////////////////////
 // ISchemaValidator
 
-class ISchemaValidator {
-public:
+class ISchemaValidator
+{
+    public:
     virtual ~ISchemaValidator() {}
-    virtual bool IsValid() const = 0;
+    virtual bool IsValid() const                  = 0;
     virtual void SetValidateFlags(unsigned flags) = 0;
-    virtual unsigned GetValidateFlags() const = 0;
+    virtual unsigned GetValidateFlags() const     = 0;
 };
 
 ///////////////////////////////////////////////////////////////////////////////
 // ISchemaStateFactory
 
 template <typename SchemaType>
-class ISchemaStateFactory {
-public:
+class ISchemaStateFactory
+{
+    public:
     virtual ~ISchemaStateFactory() {}
-    virtual ISchemaValidator* CreateSchemaValidator(const SchemaType&, const bool inheritContinueOnErrors) = 0;
-    virtual void DestroySchemaValidator(ISchemaValidator* validator) = 0;
-    virtual void* CreateHasher() = 0;
-    virtual uint64_t GetHashCode(void* hasher) = 0;
-    virtual void DestroryHasher(void* hasher) = 0;
-    virtual void* MallocState(size_t size) = 0;
-    virtual void FreeState(void* p) = 0;
+    virtual ISchemaValidator* CreateSchemaValidator(const SchemaType&,
+                                                    const bool inheritContinueOnErrors) = 0;
+    virtual void DestroySchemaValidator(ISchemaValidator* validator)                    = 0;
+    virtual void* CreateHasher()                                                        = 0;
+    virtual uint64_t GetHashCode(void* hasher)                                          = 0;
+    virtual void DestroryHasher(void* hasher)                                           = 0;
+    virtual void* MallocState(size_t size)                                              = 0;
+    virtual void FreeState(void* p)                                                     = 0;
 };
 
 ///////////////////////////////////////////////////////////////////////////////
 // IValidationErrorHandler
 
 template <typename SchemaType>
-class IValidationErrorHandler {
-public:
+class IValidationErrorHandler
+{
+    public:
     typedef typename SchemaType::Ch Ch;
     typedef typename SchemaType::SValue SValue;
 
     virtual ~IValidationErrorHandler() {}
 
-    virtual void NotMultipleOf(int64_t actual, const SValue& expected) = 0;
-    virtual void NotMultipleOf(uint64_t actual, const SValue& expected) = 0;
-    virtual void NotMultipleOf(double actual, const SValue& expected) = 0;
-    virtual void AboveMaximum(int64_t actual, const SValue& expected, bool exclusive) = 0;
+    virtual void NotMultipleOf(int64_t actual, const SValue& expected)                 = 0;
+    virtual void NotMultipleOf(uint64_t actual, const SValue& expected)                = 0;
+    virtual void NotMultipleOf(double actual, const SValue& expected)                  = 0;
+    virtual void AboveMaximum(int64_t actual, const SValue& expected, bool exclusive)  = 0;
     virtual void AboveMaximum(uint64_t actual, const SValue& expected, bool exclusive) = 0;
-    virtual void AboveMaximum(double actual, const SValue& expected, bool exclusive) = 0;
-    virtual void BelowMinimum(int64_t actual, const SValue& expected, bool exclusive) = 0;
+    virtual void AboveMaximum(double actual, const SValue& expected, bool exclusive)   = 0;
+    virtual void BelowMinimum(int64_t actual, const SValue& expected, bool exclusive)  = 0;
     virtual void BelowMinimum(uint64_t actual, const SValue& expected, bool exclusive) = 0;
-    virtual void BelowMinimum(double actual, const SValue& expected, bool exclusive) = 0;
+    virtual void BelowMinimum(double actual, const SValue& expected, bool exclusive)   = 0;
 
-    virtual void TooLong(const Ch* str, SizeType length, SizeType expected) = 0;
+    virtual void TooLong(const Ch* str, SizeType length, SizeType expected)  = 0;
     virtual void TooShort(const Ch* str, SizeType length, SizeType expected) = 0;
-    virtual void DoesNotMatch(const Ch* str, SizeType length) = 0;
+    virtual void DoesNotMatch(const Ch* str, SizeType length)                = 0;
 
-    virtual void DisallowedItem(SizeType index) = 0;
-    virtual void TooFewItems(SizeType actualCount, SizeType expectedCount) = 0;
+    virtual void DisallowedItem(SizeType index)                             = 0;
+    virtual void TooFewItems(SizeType actualCount, SizeType expectedCount)  = 0;
     virtual void TooManyItems(SizeType actualCount, SizeType expectedCount) = 0;
-    virtual void DuplicateItems(SizeType index1, SizeType index2) = 0;
+    virtual void DuplicateItems(SizeType index1, SizeType index2)           = 0;
 
-    virtual void TooManyProperties(SizeType actualCount, SizeType expectedCount) = 0;
-    virtual void TooFewProperties(SizeType actualCount, SizeType expectedCount) = 0;
-    virtual void StartMissingProperties() = 0;
-    virtual void AddMissingProperty(const SValue& name) = 0;
-    virtual bool EndMissingProperties() = 0;
+    virtual void TooManyProperties(SizeType actualCount, SizeType expectedCount)      = 0;
+    virtual void TooFewProperties(SizeType actualCount, SizeType expectedCount)       = 0;
+    virtual void StartMissingProperties()                                             = 0;
+    virtual void AddMissingProperty(const SValue& name)                               = 0;
+    virtual bool EndMissingProperties()                                               = 0;
     virtual void PropertyViolations(ISchemaValidator** subvalidators, SizeType count) = 0;
-    virtual void DisallowedProperty(const Ch* name, SizeType length) = 0;
+    virtual void DisallowedProperty(const Ch* name, SizeType length)                  = 0;
 
-    virtual void StartDependencyErrors() = 0;
-    virtual void StartMissingDependentProperties() = 0;
-    virtual void AddMissingDependentProperty(const SValue& targetName) = 0;
-    virtual void EndMissingDependentProperties(const SValue& sourceName) = 0;
-    virtual void AddDependencySchemaError(const SValue& souceName, ISchemaValidator* subvalidator) = 0;
-    virtual bool EndDependencyErrors() = 0;
+    virtual void StartDependencyErrors()                                  = 0;
+    virtual void StartMissingDependentProperties()                        = 0;
+    virtual void AddMissingDependentProperty(const SValue& targetName)    = 0;
+    virtual void EndMissingDependentProperties(const SValue& sourceName)  = 0;
+    virtual void AddDependencySchemaError(const SValue& souceName,
+                                          ISchemaValidator* subvalidator) = 0;
+    virtual bool EndDependencyErrors()                                    = 0;
 
-    virtual void DisallowedValue(const ValidateErrorCode code) = 0;
-    virtual void StartDisallowedType() = 0;
+    virtual void DisallowedValue(const ValidateErrorCode code)                       = 0;
+    virtual void StartDisallowedType()                                               = 0;
     virtual void AddExpectedType(const typename SchemaType::ValueType& expectedType) = 0;
     virtual void EndDisallowedType(const typename SchemaType::ValueType& actualType) = 0;
-    virtual void NotAllOf(ISchemaValidator** subvalidators, SizeType count) = 0;
-    virtual void NoneOf(ISchemaValidator** subvalidators, SizeType count) = 0;
-    virtual void NotOneOf(ISchemaValidator** subvalidators, SizeType count) = 0;
-    virtual void MultipleOneOf(SizeType index1, SizeType index2) = 0;
-    virtual void Disallowed() = 0;
-    virtual void DisallowedWhenWriting() = 0;
-    virtual void DisallowedWhenReading() = 0;
+    virtual void NotAllOf(ISchemaValidator** subvalidators, SizeType count)          = 0;
+    virtual void NoneOf(ISchemaValidator** subvalidators, SizeType count)            = 0;
+    virtual void NotOneOf(ISchemaValidator** subvalidators, SizeType count)          = 0;
+    virtual void MultipleOneOf(SizeType index1, SizeType index2)                     = 0;
+    virtual void Disallowed()                                                        = 0;
+    virtual void DisallowedWhenWriting()                                             = 0;
+    virtual void DisallowedWhenReading()                                             = 0;
 };
 
-
 ///////////////////////////////////////////////////////////////////////////////
 // Hasher
 
 // For comparison of compound value
-template<typename Encoding, typename Allocator>
-class Hasher {
-public:
+template <typename Encoding, typename Allocator>
+class Hasher
+{
+    public:
     typedef typename Encoding::Ch Ch;
 
-    Hasher(Allocator* allocator = 0, size_t stackCapacity = kDefaultSize) : stack_(allocator, stackCapacity) {}
+    Hasher(Allocator* allocator = 0, size_t stackCapacity = kDefaultSize)
+        : stack_(allocator, stackCapacity)
+    {
+    }
 
     bool Null() { return WriteType(kNullType); }
     bool Bool(bool b) { return WriteType(b ? kTrueType : kFalseType); }
-    bool Int(int i) { Number n; n.u.i = i; n.d = static_cast<double>(i); return WriteNumber(n); }
-    bool Uint(unsigned u) { Number n; n.u.u = u; n.d = static_cast<double>(u); return WriteNumber(n); }
-    bool Int64(int64_t i) { Number n; n.u.i = i; n.d = static_cast<double>(i); return WriteNumber(n); }
-    bool Uint64(uint64_t u) { Number n; n.u.u = u; n.d = static_cast<double>(u); return WriteNumber(n); }
-    bool Double(double d) {
+    bool Int(int i)
+    {
         Number n;
-        if (d < 0) n.u.i = static_cast<int64_t>(d);
-        else       n.u.u = static_cast<uint64_t>(d);
+        n.u.i = i;
+        n.d   = static_cast<double>(i);
+        return WriteNumber(n);
+    }
+    bool Uint(unsigned u)
+    {
+        Number n;
+        n.u.u = u;
+        n.d   = static_cast<double>(u);
+        return WriteNumber(n);
+    }
+    bool Int64(int64_t i)
+    {
+        Number n;
+        n.u.i = i;
+        n.d   = static_cast<double>(i);
+        return WriteNumber(n);
+    }
+    bool Uint64(uint64_t u)
+    {
+        Number n;
+        n.u.u = u;
+        n.d   = static_cast<double>(u);
+        return WriteNumber(n);
+    }
+    bool Double(double d)
+    {
+        Number n;
+        if(d < 0)
+            n.u.i = static_cast<int64_t>(d);
+        else
+            n.u.u = static_cast<uint64_t>(d);
         n.d = d;
         return WriteNumber(n);
     }
 
-    bool RawNumber(const Ch* str, SizeType len, bool) {
+    bool RawNumber(const Ch* str, SizeType len, bool)
+    {
         WriteBuffer(kNumberType, str, len * sizeof(Ch));
         return true;
     }
 
-    bool String(const Ch* str, SizeType len, bool) {
+    bool String(const Ch* str, SizeType len, bool)
+    {
         WriteBuffer(kStringType, str, len * sizeof(Ch));
         return true;
     }
 
     bool StartObject() { return true; }
     bool Key(const Ch* str, SizeType len, bool copy) { return String(str, len, copy); }
-    bool EndObject(SizeType memberCount) { 
-        uint64_t h = Hash(0, kObjectType);
+    bool EndObject(SizeType memberCount)
+    {
+        uint64_t h   = Hash(0, kObjectType);
         uint64_t* kv = stack_.template Pop<uint64_t>(memberCount * 2);
-        for (SizeType i = 0; i < memberCount; i++)
+        for(SizeType i = 0; i < memberCount; i++)
             // Issue #2205
             // Hasing the key to avoid key=value cases with bug-prone zero-value hash
-            h ^= Hash(Hash(0, kv[i * 2]), kv[i * 2 + 1]);  // Use xor to achieve member order insensitive
+            h ^= Hash(Hash(0, kv[i * 2]),
+                      kv[i * 2 + 1]); // Use xor to achieve member order insensitive
         *stack_.template Push<uint64_t>() = h;
         return true;
     }
-    
+
     bool StartArray() { return true; }
-    bool EndArray(SizeType elementCount) { 
-        uint64_t h = Hash(0, kArrayType);
+    bool EndArray(SizeType elementCount)
+    {
+        uint64_t h  = Hash(0, kArrayType);
         uint64_t* e = stack_.template Pop<uint64_t>(elementCount);
-        for (SizeType i = 0; i < elementCount; i++)
+        for(SizeType i = 0; i < elementCount; i++)
             h = Hash(h, e[i]); // Use hash to achieve element order sensitive
         *stack_.template Push<uint64_t>() = h;
         return true;
@@ -382,36 +451,41 @@ public:
 
     bool IsValid() const { return stack_.GetSize() == sizeof(uint64_t); }
 
-    uint64_t GetHashCode() const {
+    uint64_t GetHashCode() const
+    {
         RAPIDJSON_ASSERT(IsValid());
         return *stack_.template Top<uint64_t>();
     }
 
-private:
+    private:
     static const size_t kDefaultSize = 256;
-    struct Number {
-        union U {
+    struct Number
+    {
+        union U
+        {
             uint64_t u;
             int64_t i;
-        }u;
+        } u;
         double d;
     };
 
     bool WriteType(Type type) { return WriteBuffer(type, 0, 0); }
-    
+
     bool WriteNumber(const Number& n) { return WriteBuffer(kNumberType, &n, sizeof(n)); }
-    
-    bool WriteBuffer(Type type, const void* data, size_t len) {
+
+    bool WriteBuffer(Type type, const void* data, size_t len)
+    {
         // FNV-1a from http://isthe.com/chongo/tech/comp/fnv/
-        uint64_t h = Hash(RAPIDJSON_UINT64_C2(0xcbf29ce4, 0x84222325), type);
+        uint64_t h             = Hash(RAPIDJSON_UINT64_C2(0xcbf29ce4, 0x84222325), type);
         const unsigned char* d = static_cast<const unsigned char*>(data);
-        for (size_t i = 0; i < len; i++)
+        for(size_t i = 0; i < len; i++)
             h = Hash(h, d[i]);
         *stack_.template Push<uint64_t>() = h;
         return true;
     }
 
-    static uint64_t Hash(uint64_t h, uint64_t d) {
+    static uint64_t Hash(uint64_t h, uint64_t d)
+    {
         static const uint64_t kPrime = RAPIDJSON_UINT64_C2(0x00000100, 0x000001b3);
         h ^= d;
         h *= kPrime;
@@ -425,65 +499,77 @@ private:
 // SchemaValidationContext
 
 template <typename SchemaDocumentType>
-struct SchemaValidationContext {
+struct SchemaValidationContext
+{
     typedef Schema<SchemaDocumentType> SchemaType;
     typedef ISchemaStateFactory<SchemaType> SchemaValidatorFactoryType;
     typedef IValidationErrorHandler<SchemaType> ErrorHandlerType;
     typedef typename SchemaType::ValueType ValueType;
     typedef typename ValueType::Ch Ch;
 
-    enum PatternValidatorType {
+    enum PatternValidatorType
+    {
         kPatternValidatorOnly,
         kPatternValidatorWithProperty,
         kPatternValidatorWithAdditionalProperty
     };
 
-    SchemaValidationContext(SchemaValidatorFactoryType& f, ErrorHandlerType& eh, const SchemaType* s, unsigned fl = 0) :
-        factory(f),
-        error_handler(eh),
-        schema(s),
-        flags(fl),
-        valueSchema(),
-        invalidKeyword(),
-        invalidCode(),
-        hasher(),
-        arrayElementHashCodes(),
-        validators(),
-        validatorCount(),
-        patternPropertiesValidators(),
-        patternPropertiesValidatorCount(),
-        patternPropertiesSchemas(),
-        patternPropertiesSchemaCount(),
-        valuePatternValidatorType(kPatternValidatorOnly),
-        propertyExist(),
-        inArray(false),
-        valueUniqueness(false),
-        arrayUniqueness(false)
+    SchemaValidationContext(SchemaValidatorFactoryType& f,
+                            ErrorHandlerType& eh,
+                            const SchemaType* s,
+                            unsigned fl = 0)
+        : factory(f),
+          error_handler(eh),
+          schema(s),
+          flags(fl),
+          valueSchema(),
+          invalidKeyword(),
+          invalidCode(),
+          hasher(),
+          arrayElementHashCodes(),
+          validators(),
+          validatorCount(),
+          patternPropertiesValidators(),
+          patternPropertiesValidatorCount(),
+          patternPropertiesSchemas(),
+          patternPropertiesSchemaCount(),
+          valuePatternValidatorType(kPatternValidatorOnly),
+          propertyExist(),
+          inArray(false),
+          valueUniqueness(false),
+          arrayUniqueness(false)
     {
     }
 
-    ~SchemaValidationContext() {
-        if (hasher)
+    ~SchemaValidationContext()
+    {
+        if(hasher)
             factory.DestroryHasher(hasher);
-        if (validators) {
-            for (SizeType i = 0; i < validatorCount; i++) {
-                if (validators[i]) {
+        if(validators)
+        {
+            for(SizeType i = 0; i < validatorCount; i++)
+            {
+                if(validators[i])
+                {
                     factory.DestroySchemaValidator(validators[i]);
                 }
             }
             factory.FreeState(validators);
         }
-        if (patternPropertiesValidators) {
-            for (SizeType i = 0; i < patternPropertiesValidatorCount; i++) {
-                if (patternPropertiesValidators[i]) {
+        if(patternPropertiesValidators)
+        {
+            for(SizeType i = 0; i < patternPropertiesValidatorCount; i++)
+            {
+                if(patternPropertiesValidators[i])
+                {
                     factory.DestroySchemaValidator(patternPropertiesValidators[i]);
                 }
             }
             factory.FreeState(patternPropertiesValidators);
         }
-        if (patternPropertiesSchemas)
+        if(patternPropertiesSchemas)
             factory.FreeState(patternPropertiesSchemas);
-        if (propertyExist)
+        if(propertyExist)
             factory.FreeState(propertyExist);
     }
 
@@ -494,7 +580,7 @@ struct SchemaValidationContext {
     const SchemaType* valueSchema;
     const Ch* invalidKeyword;
     ValidateErrorCode invalidCode;
-    void* hasher; // Only validator access
+    void* hasher;                // Only validator access
     void* arrayElementHashCodes; // Only validator access this
     ISchemaValidator** validators;
     SizeType validatorCount;
@@ -515,8 +601,9 @@ struct SchemaValidationContext {
 // Schema
 
 template <typename SchemaDocumentType>
-class Schema {
-public:
+class Schema
+{
+    public:
     typedef typename SchemaDocumentType::ValueType ValueType;
     typedef typename SchemaDocumentType::AllocatorType AllocatorType;
     typedef typename SchemaDocumentType::PointerType PointerType;
@@ -529,47 +616,52 @@ public:
     typedef GenericUri<ValueType, AllocatorType> UriType;
     friend class GenericSchemaDocument<ValueType, AllocatorType>;
 
-    Schema(SchemaDocumentType* schemaDocument, const PointerType& p, const ValueType& value, const ValueType& document, AllocatorType* allocator, const UriType& id = UriType()) :
-        allocator_(allocator),
-        uri_(schemaDocument->GetURI(), *allocator),
-        id_(id, allocator),
-        spec_(schemaDocument->GetSpecification()),
-        pointer_(p, allocator),
-        typeless_(schemaDocument->GetTypeless()),
-        enum_(),
-        enumCount_(),
-        not_(),
-        type_((1 << kTotalSchemaType) - 1), // typeless
-        validatorCount_(),
-        notValidatorIndex_(),
-        properties_(),
-        additionalPropertiesSchema_(),
-        patternProperties_(),
-        patternPropertyCount_(),
-        propertyCount_(),
-        minProperties_(),
-        maxProperties_(SizeType(~0)),
-        additionalProperties_(true),
-        hasDependencies_(),
-        hasRequired_(),
-        hasSchemaDependencies_(),
-        additionalItemsSchema_(),
-        itemsList_(),
-        itemsTuple_(),
-        itemsTupleCount_(),
-        minItems_(),
-        maxItems_(SizeType(~0)),
-        additionalItems_(true),
-        uniqueItems_(false),
-        pattern_(),
-        minLength_(0),
-        maxLength_(~SizeType(0)),
-        exclusiveMinimum_(false),
-        exclusiveMaximum_(false),
-        defaultValueLength_(0),
-        readOnly_(false),
-        writeOnly_(false),
-        nullable_(false)
+    Schema(SchemaDocumentType* schemaDocument,
+           const PointerType& p,
+           const ValueType& value,
+           const ValueType& document,
+           AllocatorType* allocator,
+           const UriType& id = UriType())
+        : allocator_(allocator),
+          uri_(schemaDocument->GetURI(), *allocator),
+          id_(id, allocator),
+          spec_(schemaDocument->GetSpecification()),
+          pointer_(p, allocator),
+          typeless_(schemaDocument->GetTypeless()),
+          enum_(),
+          enumCount_(),
+          not_(),
+          type_((1 << kTotalSchemaType) - 1), // typeless
+          validatorCount_(),
+          notValidatorIndex_(),
+          properties_(),
+          additionalPropertiesSchema_(),
+          patternProperties_(),
+          patternPropertyCount_(),
+          propertyCount_(),
+          minProperties_(),
+          maxProperties_(SizeType(~0)),
+          additionalProperties_(true),
+          hasDependencies_(),
+          hasRequired_(),
+          hasSchemaDependencies_(),
+          additionalItemsSchema_(),
+          itemsList_(),
+          itemsTuple_(),
+          itemsTupleCount_(),
+          minItems_(),
+          maxItems_(SizeType(~0)),
+          additionalItems_(true),
+          uniqueItems_(false),
+          pattern_(),
+          minLength_(0),
+          maxLength_(~SizeType(0)),
+          exclusiveMinimum_(false),
+          exclusiveMaximum_(false),
+          defaultValueLength_(0),
+          readOnly_(false),
+          writeOnly_(false),
+          nullable_(false)
     {
         GenericStringBuffer<EncodingType> sb;
         p.StringifyUriFragment(sb);
@@ -582,41 +674,49 @@ public:
         // Early add this Schema and its $ref(s) in schemaDocument's map to avoid infinite
         // recursion (with recursive schemas), since schemaDocument->getSchema() is always
         // checked before creating a new one. Don't cache typeless_, though.
-        if (this != typeless_) {
-          typedef typename SchemaDocumentType::SchemaEntry SchemaEntry;
-          SchemaEntry *entry = schemaDocument->schemaMap_.template Push<SchemaEntry>();
-          new (entry) SchemaEntry(pointer_, this, true, allocator_);
-          schemaDocument->AddSchemaRefs(this);
+        if(this != typeless_)
+        {
+            typedef typename SchemaDocumentType::SchemaEntry SchemaEntry;
+            SchemaEntry* entry = schemaDocument->schemaMap_.template Push<SchemaEntry>();
+            new(entry) SchemaEntry(pointer_, this, true, allocator_);
+            schemaDocument->AddSchemaRefs(this);
         }
 
-        if (!value.IsObject())
+        if(!value.IsObject())
             return;
 
         // If we have an id property, resolve it with the in-scope id
         // Not supported for open api 2.0 or 3.0
-        if (spec_.oapi != kVersion20 && spec_.oapi != kVersion30)
-        if (const ValueType* v = GetMember(value, GetIdString())) {
-            if (v->IsString()) {
-                UriType local(*v, allocator);
-                id_ = local.Resolve(id_, allocator);
-                    RAPIDJSON_SCHEMA_PRINT(SchemaIds, id.GetString(), v->GetString(), id_.GetString());
+        if(spec_.oapi != kVersion20 && spec_.oapi != kVersion30)
+            if(const ValueType* v = GetMember(value, GetIdString()))
+            {
+                if(v->IsString())
+                {
+                    UriType local(*v, allocator);
+                    id_ = local.Resolve(id_, allocator);
+                    RAPIDJSON_SCHEMA_PRINT(
+                        SchemaIds, id.GetString(), v->GetString(), id_.GetString());
+                }
             }
-        }
 
-        if (const ValueType* v = GetMember(value, GetTypeString())) {
+        if(const ValueType* v = GetMember(value, GetTypeString()))
+        {
             type_ = 0;
-            if (v->IsString())
+            if(v->IsString())
                 AddType(*v);
-            else if (v->IsArray())
-                for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr)
+            else if(v->IsArray())
+                for(ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr)
                     AddType(*itr);
         }
 
-        if (const ValueType* v = GetMember(value, GetEnumString())) {
-            if (v->IsArray() && v->Size() > 0) {
+        if(const ValueType* v = GetMember(value, GetEnumString()))
+        {
+            if(v->IsArray() && v->Size() > 0)
+            {
                 enum_ = static_cast<uint64_t*>(allocator_->Malloc(sizeof(uint64_t) * v->Size()));
-                for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr) {
-                    typedef Hasher<EncodingType, MemoryPoolAllocator<AllocatorType> > EnumHasherType;
+                for(ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr)
+                {
+                    typedef Hasher<EncodingType, MemoryPoolAllocator<AllocatorType>> EnumHasherType;
                     char buffer[256u + 24];
                     MemoryPoolAllocator<AllocatorType> hasherAllocator(buffer, sizeof(buffer));
                     EnumHasherType h(&hasherAllocator, 256);
@@ -626,16 +726,19 @@ public:
             }
         }
 
-        if (schemaDocument)
+        if(schemaDocument)
             AssignIfExist(allOf_, *schemaDocument, p, value, GetAllOfString(), document);
 
         // AnyOf, OneOf, Not not supported for open api 2.0
-        if (schemaDocument && spec_.oapi != kVersion20) {
+        if(schemaDocument && spec_.oapi != kVersion20)
+        {
             AssignIfExist(anyOf_, *schemaDocument, p, value, GetAnyOfString(), document);
             AssignIfExist(oneOf_, *schemaDocument, p, value, GetOneOfString(), document);
 
-            if (const ValueType* v = GetMember(value, GetNotString())) {
-                schemaDocument->CreateSchema(&not_, p.Append(GetNotString(), allocator_), *v, document, id_);
+            if(const ValueType* v = GetMember(value, GetNotString()))
+            {
+                schemaDocument->CreateSchema(
+                    &not_, p.Append(GetNotString(), allocator_), *v, document, id_);
                 notValidatorIndex_ = validatorCount_;
                 validatorCount_++;
             }
@@ -643,126 +746,182 @@ public:
 
         // Object
 
-        const ValueType* properties = GetMember(value, GetPropertiesString());
-        const ValueType* required = GetMember(value, GetRequiredString());
+        const ValueType* properties   = GetMember(value, GetPropertiesString());
+        const ValueType* required     = GetMember(value, GetRequiredString());
         const ValueType* dependencies = GetMember(value, GetDependenciesString());
         {
             // Gather properties from properties/required/dependencies
             SValue allProperties(kArrayType);
 
-            if (properties && properties->IsObject())
-                for (ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd(); ++itr)
+            if(properties && properties->IsObject())
+                for(ConstMemberIterator itr = properties->MemberBegin();
+                    itr != properties->MemberEnd();
+                    ++itr)
                     AddUniqueElement(allProperties, itr->name);
 
-            if (required && required->IsArray())
-                for (ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr)
-                    if (itr->IsString())
+            if(required && required->IsArray())
+                for(ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr)
+                    if(itr->IsString())
                         AddUniqueElement(allProperties, *itr);
 
             // Dependencies not supported for open api 2.0 and 3.0
-            if (spec_.oapi != kVersion20 && spec_.oapi != kVersion30)
-            if (dependencies && dependencies->IsObject())
-                for (ConstMemberIterator itr = dependencies->MemberBegin(); itr != dependencies->MemberEnd(); ++itr) {
-                    AddUniqueElement(allProperties, itr->name);
-                    if (itr->value.IsArray())
-                        for (ConstValueIterator i = itr->value.Begin(); i != itr->value.End(); ++i)
-                            if (i->IsString())
-                                AddUniqueElement(allProperties, *i);
-                }
+            if(spec_.oapi != kVersion20 && spec_.oapi != kVersion30)
+                if(dependencies && dependencies->IsObject())
+                    for(ConstMemberIterator itr = dependencies->MemberBegin();
+                        itr != dependencies->MemberEnd();
+                        ++itr)
+                    {
+                        AddUniqueElement(allProperties, itr->name);
+                        if(itr->value.IsArray())
+                            for(ConstValueIterator i = itr->value.Begin(); i != itr->value.End();
+                                ++i)
+                                if(i->IsString())
+                                    AddUniqueElement(allProperties, *i);
+                    }
 
-            if (allProperties.Size() > 0) {
+            if(allProperties.Size() > 0)
+            {
                 propertyCount_ = allProperties.Size();
-                properties_ = static_cast<Property*>(allocator_->Malloc(sizeof(Property) * propertyCount_));
-                for (SizeType i = 0; i < propertyCount_; i++) {
-                    new (&properties_[i]) Property();
-                    properties_[i].name = allProperties[i];
+                properties_ =
+                    static_cast<Property*>(allocator_->Malloc(sizeof(Property) * propertyCount_));
+                for(SizeType i = 0; i < propertyCount_; i++)
+                {
+                    new(&properties_[i]) Property();
+                    properties_[i].name   = allProperties[i];
                     properties_[i].schema = typeless_;
                 }
             }
         }
 
-        if (properties && properties->IsObject()) {
+        if(properties && properties->IsObject())
+        {
             PointerType q = p.Append(GetPropertiesString(), allocator_);
-            for (ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd(); ++itr) {
+            for(ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd();
+                ++itr)
+            {
                 SizeType index;
-                if (FindPropertyIndex(itr->name, &index))
-                    schemaDocument->CreateSchema(&properties_[index].schema, q.Append(itr->name, allocator_), itr->value, document, id_);
+                if(FindPropertyIndex(itr->name, &index))
+                    schemaDocument->CreateSchema(&properties_[index].schema,
+                                                 q.Append(itr->name, allocator_),
+                                                 itr->value,
+                                                 document,
+                                                 id_);
             }
         }
 
         // PatternProperties not supported for open api 2.0 and 3.0
-        if (spec_.oapi != kVersion20 && spec_.oapi != kVersion30)
-        if (const ValueType* v = GetMember(value, GetPatternPropertiesString())) {
-            PointerType q = p.Append(GetPatternPropertiesString(), allocator_);
-            patternProperties_ = static_cast<PatternProperty*>(allocator_->Malloc(sizeof(PatternProperty) * v->MemberCount()));
-            patternPropertyCount_ = 0;
+        if(spec_.oapi != kVersion20 && spec_.oapi != kVersion30)
+            if(const ValueType* v = GetMember(value, GetPatternPropertiesString()))
+            {
+                PointerType q      = p.Append(GetPatternPropertiesString(), allocator_);
+                patternProperties_ = static_cast<PatternProperty*>(
+                    allocator_->Malloc(sizeof(PatternProperty) * v->MemberCount()));
+                patternPropertyCount_ = 0;
 
-            for (ConstMemberIterator itr = v->MemberBegin(); itr != v->MemberEnd(); ++itr) {
-                new (&patternProperties_[patternPropertyCount_]) PatternProperty();
-                PointerType r = q.Append(itr->name, allocator_);
-                patternProperties_[patternPropertyCount_].pattern = CreatePattern(itr->name, schemaDocument, r);
-                schemaDocument->CreateSchema(&patternProperties_[patternPropertyCount_].schema, r, itr->value, document, id_);
-                patternPropertyCount_++;
+                for(ConstMemberIterator itr = v->MemberBegin(); itr != v->MemberEnd(); ++itr)
+                {
+                    new(&patternProperties_[patternPropertyCount_]) PatternProperty();
+                    PointerType r = q.Append(itr->name, allocator_);
+                    patternProperties_[patternPropertyCount_].pattern =
+                        CreatePattern(itr->name, schemaDocument, r);
+                    schemaDocument->CreateSchema(&patternProperties_[patternPropertyCount_].schema,
+                                                 r,
+                                                 itr->value,
+                                                 document,
+                                                 id_);
+                    patternPropertyCount_++;
+                }
             }
-        }
 
-        if (required && required->IsArray())
-            for (ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr)
-                if (itr->IsString()) {
+        if(required && required->IsArray())
+            for(ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr)
+                if(itr->IsString())
+                {
                     SizeType index;
-                    if (FindPropertyIndex(*itr, &index)) {
+                    if(FindPropertyIndex(*itr, &index))
+                    {
                         properties_[index].required = true;
-                        hasRequired_ = true;
+                        hasRequired_                = true;
                     }
                 }
 
         // Dependencies not supported for open api 2.0 and 3.0
-        if (spec_.oapi != kVersion20 && spec_.oapi != kVersion30)
-        if (dependencies && dependencies->IsObject()) {
-            PointerType q = p.Append(GetDependenciesString(), allocator_);
-            hasDependencies_ = true;
-            for (ConstMemberIterator itr = dependencies->MemberBegin(); itr != dependencies->MemberEnd(); ++itr) {
-                SizeType sourceIndex;
-                if (FindPropertyIndex(itr->name, &sourceIndex)) {
-                    if (itr->value.IsArray()) {
-                        properties_[sourceIndex].dependencies = static_cast<bool*>(allocator_->Malloc(sizeof(bool) * propertyCount_));
-                        std::memset(properties_[sourceIndex].dependencies, 0, sizeof(bool)* propertyCount_);
-                        for (ConstValueIterator targetItr = itr->value.Begin(); targetItr != itr->value.End(); ++targetItr) {
-                            SizeType targetIndex;
-                            if (FindPropertyIndex(*targetItr, &targetIndex))
-                                properties_[sourceIndex].dependencies[targetIndex] = true;
+        if(spec_.oapi != kVersion20 && spec_.oapi != kVersion30)
+            if(dependencies && dependencies->IsObject())
+            {
+                PointerType q    = p.Append(GetDependenciesString(), allocator_);
+                hasDependencies_ = true;
+                for(ConstMemberIterator itr = dependencies->MemberBegin();
+                    itr != dependencies->MemberEnd();
+                    ++itr)
+                {
+                    SizeType sourceIndex;
+                    if(FindPropertyIndex(itr->name, &sourceIndex))
+                    {
+                        if(itr->value.IsArray())
+                        {
+                            properties_[sourceIndex].dependencies = static_cast<bool*>(
+                                allocator_->Malloc(sizeof(bool) * propertyCount_));
+                            std::memset(properties_[sourceIndex].dependencies,
+                                        0,
+                                        sizeof(bool) * propertyCount_);
+                            for(ConstValueIterator targetItr = itr->value.Begin();
+                                targetItr != itr->value.End();
+                                ++targetItr)
+                            {
+                                SizeType targetIndex;
+                                if(FindPropertyIndex(*targetItr, &targetIndex))
+                                    properties_[sourceIndex].dependencies[targetIndex] = true;
+                            }
+                        }
+                        else if(itr->value.IsObject())
+                        {
+                            hasSchemaDependencies_ = true;
+                            schemaDocument->CreateSchema(
+                                &properties_[sourceIndex].dependenciesSchema,
+                                q.Append(itr->name, allocator_),
+                                itr->value,
+                                document,
+                                id_);
+                            properties_[sourceIndex].dependenciesValidatorIndex = validatorCount_;
+                            validatorCount_++;
                         }
-                    }
-                    else if (itr->value.IsObject()) {
-                        hasSchemaDependencies_ = true;
-                        schemaDocument->CreateSchema(&properties_[sourceIndex].dependenciesSchema, q.Append(itr->name, allocator_), itr->value, document, id_);
-                        properties_[sourceIndex].dependenciesValidatorIndex = validatorCount_;
-                        validatorCount_++;
                     }
                 }
             }
-        }
 
-        if (const ValueType* v = GetMember(value, GetAdditionalPropertiesString())) {
-            if (v->IsBool())
+        if(const ValueType* v = GetMember(value, GetAdditionalPropertiesString()))
+        {
+            if(v->IsBool())
                 additionalProperties_ = v->GetBool();
-            else if (v->IsObject())
-                schemaDocument->CreateSchema(&additionalPropertiesSchema_, p.Append(GetAdditionalPropertiesString(), allocator_), *v, document, id_);
+            else if(v->IsObject())
+                schemaDocument->CreateSchema(&additionalPropertiesSchema_,
+                                             p.Append(GetAdditionalPropertiesString(), allocator_),
+                                             *v,
+                                             document,
+                                             id_);
         }
 
         AssignIfExist(minProperties_, value, GetMinPropertiesString());
         AssignIfExist(maxProperties_, value, GetMaxPropertiesString());
 
         // Array
-        if (const ValueType* v = GetMember(value, GetItemsString())) {
+        if(const ValueType* v = GetMember(value, GetItemsString()))
+        {
             PointerType q = p.Append(GetItemsString(), allocator_);
-            if (v->IsObject()) // List validation
+            if(v->IsObject()) // List validation
                 schemaDocument->CreateSchema(&itemsList_, q, *v, document, id_);
-            else if (v->IsArray()) { // Tuple validation
-                itemsTuple_ = static_cast<const Schema**>(allocator_->Malloc(sizeof(const Schema*) * v->Size()));
+            else if(v->IsArray())
+            { // Tuple validation
+                itemsTuple_ = static_cast<const Schema**>(
+                    allocator_->Malloc(sizeof(const Schema*) * v->Size()));
                 SizeType index = 0;
-                for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr, index++)
-                    schemaDocument->CreateSchema(&itemsTuple_[itemsTupleCount_++], q.Append(index, allocator_), *itr, document, id_);
+                for(ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr, index++)
+                    schemaDocument->CreateSchema(&itemsTuple_[itemsTupleCount_++],
+                                                 q.Append(index, allocator_),
+                                                 *itr,
+                                                 document,
+                                                 id_);
             }
         }
 
@@ -770,13 +929,18 @@ public:
         AssignIfExist(maxItems_, value, GetMaxItemsString());
 
         // AdditionalItems not supported for openapi 2.0 and 3.0
-        if (spec_.oapi != kVersion20 && spec_.oapi != kVersion30)
-        if (const ValueType* v = GetMember(value, GetAdditionalItemsString())) {
-            if (v->IsBool())
-                additionalItems_ = v->GetBool();
-            else if (v->IsObject())
-                schemaDocument->CreateSchema(&additionalItemsSchema_, p.Append(GetAdditionalItemsString(), allocator_), *v, document, id_);
-        }
+        if(spec_.oapi != kVersion20 && spec_.oapi != kVersion30)
+            if(const ValueType* v = GetMember(value, GetAdditionalItemsString()))
+            {
+                if(v->IsBool())
+                    additionalItems_ = v->GetBool();
+                else if(v->IsObject())
+                    schemaDocument->CreateSchema(&additionalItemsSchema_,
+                                                 p.Append(GetAdditionalItemsString(), allocator_),
+                                                 *v,
+                                                 document,
+                                                 id_);
+            }
 
         AssignIfExist(uniqueItems_, value, GetUniqueItemsString());
 
@@ -784,104 +948,106 @@ public:
         AssignIfExist(minLength_, value, GetMinLengthString());
         AssignIfExist(maxLength_, value, GetMaxLengthString());
 
-        if (const ValueType* v = GetMember(value, GetPatternString()))
+        if(const ValueType* v = GetMember(value, GetPatternString()))
             pattern_ = CreatePattern(*v, schemaDocument, p.Append(GetPatternString(), allocator_));
 
         // Number
-        if (const ValueType* v = GetMember(value, GetMinimumString()))
-            if (v->IsNumber())
+        if(const ValueType* v = GetMember(value, GetMinimumString()))
+            if(v->IsNumber())
                 minimum_.CopyFrom(*v, *allocator_);
 
-        if (const ValueType* v = GetMember(value, GetMaximumString()))
-            if (v->IsNumber())
+        if(const ValueType* v = GetMember(value, GetMaximumString()))
+            if(v->IsNumber())
                 maximum_.CopyFrom(*v, *allocator_);
 
         AssignIfExist(exclusiveMinimum_, value, GetExclusiveMinimumString());
         AssignIfExist(exclusiveMaximum_, value, GetExclusiveMaximumString());
 
-        if (const ValueType* v = GetMember(value, GetMultipleOfString()))
-            if (v->IsNumber() && v->GetDouble() > 0.0)
+        if(const ValueType* v = GetMember(value, GetMultipleOfString()))
+            if(v->IsNumber() && v->GetDouble() > 0.0)
                 multipleOf_.CopyFrom(*v, *allocator_);
 
         // Default
-        if (const ValueType* v = GetMember(value, GetDefaultValueString()))
-            if (v->IsString())
+        if(const ValueType* v = GetMember(value, GetDefaultValueString()))
+            if(v->IsString())
                 defaultValueLength_ = v->GetStringLength();
 
         // ReadOnly - open api only (until draft 7 supported)
         // WriteOnly - open api 3 only (until draft 7 supported)
         // Both can't be true
-        if (spec_.oapi != kVersionNone)
+        if(spec_.oapi != kVersionNone)
             AssignIfExist(readOnly_, value, GetReadOnlyString());
-        if (spec_.oapi >= kVersion30)
+        if(spec_.oapi >= kVersion30)
             AssignIfExist(writeOnly_, value, GetWriteOnlyString());
-        if (readOnly_ && writeOnly_)
+        if(readOnly_ && writeOnly_)
             schemaDocument->SchemaError(kSchemaErrorReadOnlyAndWriteOnly, p);
 
         // Nullable - open api 3 only
         // If true add 'null' as allowable type
-        if (spec_.oapi >= kVersion30) {
+        if(spec_.oapi >= kVersion30)
+        {
             AssignIfExist(nullable_, value, GetNullableString());
-            if (nullable_)
+            if(nullable_)
                 AddType(GetNullString());
         }
     }
 
-    ~Schema() {
+    ~Schema()
+    {
         AllocatorType::Free(enum_);
-        if (properties_) {
-            for (SizeType i = 0; i < propertyCount_; i++)
+        if(properties_)
+        {
+            for(SizeType i = 0; i < propertyCount_; i++)
                 properties_[i].~Property();
             AllocatorType::Free(properties_);
         }
-        if (patternProperties_) {
-            for (SizeType i = 0; i < patternPropertyCount_; i++)
+        if(patternProperties_)
+        {
+            for(SizeType i = 0; i < patternPropertyCount_; i++)
                 patternProperties_[i].~PatternProperty();
             AllocatorType::Free(patternProperties_);
         }
         AllocatorType::Free(itemsTuple_);
 #if RAPIDJSON_SCHEMA_HAS_REGEX
-        if (pattern_) {
+        if(pattern_)
+        {
             pattern_->~RegexType();
             AllocatorType::Free(pattern_);
         }
 #endif
     }
 
-    const SValue& GetURI() const {
-        return uri_;
-    }
+    const SValue& GetURI() const { return uri_; }
 
-    const UriType& GetId() const {
-        return id_;
-    }
+    const UriType& GetId() const { return id_; }
 
-    const Specification& GetSpecification() const {
-        return spec_;
-    }
+    const Specification& GetSpecification() const { return spec_; }
 
-    const PointerType& GetPointer() const {
-        return pointer_;
-    }
+    const PointerType& GetPointer() const { return pointer_; }
 
-    bool BeginValue(Context& context) const {
+    bool BeginValue(Context& context) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::BeginValue");
-        if (context.inArray) {
-            if (uniqueItems_)
+        if(context.inArray)
+        {
+            if(uniqueItems_)
                 context.valueUniqueness = true;
 
-            if (itemsList_)
+            if(itemsList_)
                 context.valueSchema = itemsList_;
-            else if (itemsTuple_) {
-                if (context.arrayElementIndex < itemsTupleCount_)
+            else if(itemsTuple_)
+            {
+                if(context.arrayElementIndex < itemsTupleCount_)
                     context.valueSchema = itemsTuple_[context.arrayElementIndex];
-                else if (additionalItemsSchema_)
+                else if(additionalItemsSchema_)
                     context.valueSchema = additionalItemsSchema_;
-                else if (additionalItems_)
+                else if(additionalItems_)
                     context.valueSchema = typeless_;
-                else {
+                else
+                {
                     context.error_handler.DisallowedItem(context.arrayElementIndex);
-                    // Must set valueSchema for when kValidateContinueOnErrorFlag is set, else reports spurious type error
+                    // Must set valueSchema for when kValidateContinueOnErrorFlag is set, else
+                    // reports spurious type error
                     context.valueSchema = typeless_;
                     // Must bump arrayElementIndex for when kValidateContinueOnErrorFlag is set
                     context.arrayElementIndex++;
@@ -896,89 +1062,112 @@ public:
         return true;
     }
 
-    RAPIDJSON_FORCEINLINE bool EndValue(Context& context) const {
+    RAPIDJSON_FORCEINLINE bool EndValue(Context& context) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::EndValue");
         // Only check pattern properties if we have validators
-        if (context.patternPropertiesValidatorCount > 0) {
+        if(context.patternPropertiesValidatorCount > 0)
+        {
             bool otherValid = false;
-            SizeType count = context.patternPropertiesValidatorCount;
-            if (context.objectPatternValidatorType != Context::kPatternValidatorOnly)
+            SizeType count  = context.patternPropertiesValidatorCount;
+            if(context.objectPatternValidatorType != Context::kPatternValidatorOnly)
                 otherValid = context.patternPropertiesValidators[--count]->IsValid();
 
             bool patternValid = true;
-            for (SizeType i = 0; i < count; i++)
-                if (!context.patternPropertiesValidators[i]->IsValid()) {
+            for(SizeType i = 0; i < count; i++)
+                if(!context.patternPropertiesValidators[i]->IsValid())
+                {
                     patternValid = false;
                     break;
                 }
 
-            if (context.objectPatternValidatorType == Context::kPatternValidatorOnly) {
-                if (!patternValid) {
-                    context.error_handler.PropertyViolations(context.patternPropertiesValidators, count);
+            if(context.objectPatternValidatorType == Context::kPatternValidatorOnly)
+            {
+                if(!patternValid)
+                {
+                    context.error_handler.PropertyViolations(context.patternPropertiesValidators,
+                                                             count);
                     RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorPatternProperties);
                 }
             }
-            else if (context.objectPatternValidatorType == Context::kPatternValidatorWithProperty) {
-                if (!patternValid || !otherValid) {
-                    context.error_handler.PropertyViolations(context.patternPropertiesValidators, count + 1);
+            else if(context.objectPatternValidatorType == Context::kPatternValidatorWithProperty)
+            {
+                if(!patternValid || !otherValid)
+                {
+                    context.error_handler.PropertyViolations(context.patternPropertiesValidators,
+                                                             count + 1);
                     RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorPatternProperties);
                 }
             }
-            else if (!patternValid && !otherValid) { // kPatternValidatorWithAdditionalProperty)
-                context.error_handler.PropertyViolations(context.patternPropertiesValidators, count + 1);
+            else if(!patternValid && !otherValid)
+            { // kPatternValidatorWithAdditionalProperty)
+                context.error_handler.PropertyViolations(context.patternPropertiesValidators,
+                                                         count + 1);
                 RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorPatternProperties);
             }
         }
 
         // For enums only check if we have a hasher
-        if (enum_ && context.hasher) {
+        if(enum_ && context.hasher)
+        {
             const uint64_t h = context.factory.GetHashCode(context.hasher);
-            for (SizeType i = 0; i < enumCount_; i++)
-                if (enum_[i] == h)
+            for(SizeType i = 0; i < enumCount_; i++)
+                if(enum_[i] == h)
                     goto foundEnum;
             context.error_handler.DisallowedValue(kValidateErrorEnum);
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorEnum);
-            foundEnum:;
+        foundEnum:;
         }
 
         // Only check allOf etc if we have validators
-        if (context.validatorCount > 0) {
-            if (allOf_.schemas)
-                for (SizeType i = allOf_.begin; i < allOf_.begin + allOf_.count; i++)
-                    if (!context.validators[i]->IsValid()) {
-                        context.error_handler.NotAllOf(&context.validators[allOf_.begin], allOf_.count);
+        if(context.validatorCount > 0)
+        {
+            if(allOf_.schemas)
+                for(SizeType i = allOf_.begin; i < allOf_.begin + allOf_.count; i++)
+                    if(!context.validators[i]->IsValid())
+                    {
+                        context.error_handler.NotAllOf(&context.validators[allOf_.begin],
+                                                       allOf_.count);
                         RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorAllOf);
                     }
 
-            if (anyOf_.schemas) {
-                for (SizeType i = anyOf_.begin; i < anyOf_.begin + anyOf_.count; i++)
-                    if (context.validators[i]->IsValid())
+            if(anyOf_.schemas)
+            {
+                for(SizeType i = anyOf_.begin; i < anyOf_.begin + anyOf_.count; i++)
+                    if(context.validators[i]->IsValid())
                         goto foundAny;
                 context.error_handler.NoneOf(&context.validators[anyOf_.begin], anyOf_.count);
                 RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorAnyOf);
-                foundAny:;
+            foundAny:;
             }
 
-            if (oneOf_.schemas) {
-                bool oneValid = false;
+            if(oneOf_.schemas)
+            {
+                bool oneValid       = false;
                 SizeType firstMatch = 0;
-                for (SizeType i = oneOf_.begin; i < oneOf_.begin + oneOf_.count; i++)
-                    if (context.validators[i]->IsValid()) {
-                        if (oneValid) {
+                for(SizeType i = oneOf_.begin; i < oneOf_.begin + oneOf_.count; i++)
+                    if(context.validators[i]->IsValid())
+                    {
+                        if(oneValid)
+                        {
                             context.error_handler.MultipleOneOf(firstMatch, i - oneOf_.begin);
                             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorOneOfMatch);
-                        } else {
-                            oneValid = true;
+                        }
+                        else
+                        {
+                            oneValid   = true;
                             firstMatch = i - oneOf_.begin;
                         }
                     }
-                if (!oneValid) {
+                if(!oneValid)
+                {
                     context.error_handler.NotOneOf(&context.validators[oneOf_.begin], oneOf_.count);
                     RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorOneOf);
                 }
             }
 
-            if (not_ && context.validators[notValidatorIndex_]->IsValid()) {
+            if(not_ && context.validators[notValidatorIndex_]->IsValid())
+            {
                 context.error_handler.Disallowed();
                 RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorNot);
             }
@@ -987,91 +1176,107 @@ public:
         return true;
     }
 
-    bool Null(Context& context) const {
+    bool Null(Context& context) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::Null");
-        if (!(type_ & (1 << kNullSchemaType))) {
+        if(!(type_ & (1 << kNullSchemaType)))
+        {
             DisallowedType(context, GetNullString());
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorType);
         }
         return CreateParallelValidator(context);
     }
 
-    bool Bool(Context& context, bool b) const {
+    bool Bool(Context& context, bool b) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::Bool", b);
-        if (!CheckBool(context, b))
+        if(!CheckBool(context, b))
             return false;
         return CreateParallelValidator(context);
     }
 
-    bool Int(Context& context, int i) const {
+    bool Int(Context& context, int i) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::Int", (int64_t)i);
-        if (!CheckInt(context, i))
+        if(!CheckInt(context, i))
             return false;
         return CreateParallelValidator(context);
     }
 
-    bool Uint(Context& context, unsigned u) const {
+    bool Uint(Context& context, unsigned u) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::Uint", (uint64_t)u);
-        if (!CheckUint(context, u))
+        if(!CheckUint(context, u))
             return false;
         return CreateParallelValidator(context);
     }
 
-    bool Int64(Context& context, int64_t i) const {
+    bool Int64(Context& context, int64_t i) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::Int64", i);
-        if (!CheckInt(context, i))
+        if(!CheckInt(context, i))
             return false;
         return CreateParallelValidator(context);
     }
 
-    bool Uint64(Context& context, uint64_t u) const {
+    bool Uint64(Context& context, uint64_t u) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::Uint64", u);
-        if (!CheckUint(context, u))
+        if(!CheckUint(context, u))
             return false;
         return CreateParallelValidator(context);
     }
 
-    bool Double(Context& context, double d) const {
+    bool Double(Context& context, double d) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::Double", d);
-        if (!(type_ & (1 << kNumberSchemaType))) {
+        if(!(type_ & (1 << kNumberSchemaType)))
+        {
             DisallowedType(context, GetNumberString());
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorType);
         }
 
-        if (!minimum_.IsNull() && !CheckDoubleMinimum(context, d))
+        if(!minimum_.IsNull() && !CheckDoubleMinimum(context, d))
             return false;
 
-        if (!maximum_.IsNull() && !CheckDoubleMaximum(context, d))
+        if(!maximum_.IsNull() && !CheckDoubleMaximum(context, d))
             return false;
 
-        if (!multipleOf_.IsNull() && !CheckDoubleMultipleOf(context, d))
+        if(!multipleOf_.IsNull() && !CheckDoubleMultipleOf(context, d))
             return false;
 
         return CreateParallelValidator(context);
     }
 
-    bool String(Context& context, const Ch* str, SizeType length, bool) const {
+    bool String(Context& context, const Ch* str, SizeType length, bool) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::String", str);
-        if (!(type_ & (1 << kStringSchemaType))) {
+        if(!(type_ & (1 << kStringSchemaType)))
+        {
             DisallowedType(context, GetStringString());
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorType);
         }
 
-        if (minLength_ != 0 || maxLength_ != SizeType(~0)) {
+        if(minLength_ != 0 || maxLength_ != SizeType(~0))
+        {
             SizeType count;
-            if (internal::CountStringCodePoint<EncodingType>(str, length, &count)) {
-                if (count < minLength_) {
+            if(internal::CountStringCodePoint<EncodingType>(str, length, &count))
+            {
+                if(count < minLength_)
+                {
                     context.error_handler.TooShort(str, length, minLength_);
                     RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorMinLength);
                 }
-                if (count > maxLength_) {
+                if(count > maxLength_)
+                {
                     context.error_handler.TooLong(str, length, maxLength_);
                     RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorMaxLength);
                 }
             }
         }
 
-        if (pattern_ && !IsPatternMatch(pattern_, str, length)) {
+        if(pattern_ && !IsPatternMatch(pattern_, str, length))
+        {
             context.error_handler.DoesNotMatch(str, length);
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorPattern);
         }
@@ -1079,21 +1284,27 @@ public:
         return CreateParallelValidator(context);
     }
 
-    bool StartObject(Context& context) const {
+    bool StartObject(Context& context) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::StartObject");
-        if (!(type_ & (1 << kObjectSchemaType))) {
+        if(!(type_ & (1 << kObjectSchemaType)))
+        {
             DisallowedType(context, GetObjectString());
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorType);
         }
 
-        if (hasDependencies_ || hasRequired_) {
-            context.propertyExist = static_cast<bool*>(context.factory.MallocState(sizeof(bool) * propertyCount_));
+        if(hasDependencies_ || hasRequired_)
+        {
+            context.propertyExist =
+                static_cast<bool*>(context.factory.MallocState(sizeof(bool) * propertyCount_));
             std::memset(context.propertyExist, 0, sizeof(bool) * propertyCount_);
         }
 
-        if (patternProperties_) { // pre-allocate schema array
+        if(patternProperties_)
+        {                                               // pre-allocate schema array
             SizeType count = patternPropertyCount_ + 1; // extra for valuePatternValidatorType
-            context.patternPropertiesSchemas = static_cast<const SchemaType**>(context.factory.MallocState(sizeof(const SchemaType*) * count));
+            context.patternPropertiesSchemas = static_cast<const SchemaType**>(
+                context.factory.MallocState(sizeof(const SchemaType*) * count));
             context.patternPropertiesSchemaCount = 0;
             std::memset(context.patternPropertiesSchemas, 0, sizeof(SchemaType*) * count);
         }
@@ -1101,51 +1312,66 @@ public:
         return CreateParallelValidator(context);
     }
 
-    bool Key(Context& context, const Ch* str, SizeType len, bool) const {
+    bool Key(Context& context, const Ch* str, SizeType len, bool) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::Key", str);
 
-        if (patternProperties_) {
+        if(patternProperties_)
+        {
             context.patternPropertiesSchemaCount = 0;
-            for (SizeType i = 0; i < patternPropertyCount_; i++)
-                if (patternProperties_[i].pattern && IsPatternMatch(patternProperties_[i].pattern, str, len)) {
-                    context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = patternProperties_[i].schema;
+            for(SizeType i = 0; i < patternPropertyCount_; i++)
+                if(patternProperties_[i].pattern &&
+                   IsPatternMatch(patternProperties_[i].pattern, str, len))
+                {
+                    context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] =
+                        patternProperties_[i].schema;
                     context.valueSchema = typeless_;
                 }
         }
 
-        SizeType index  = 0;
-        if (FindPropertyIndex(ValueType(str, len).Move(), &index)) {
-            if (context.patternPropertiesSchemaCount > 0) {
-                context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = properties_[index].schema;
-                context.valueSchema = typeless_;
+        SizeType index = 0;
+        if(FindPropertyIndex(ValueType(str, len).Move(), &index))
+        {
+            if(context.patternPropertiesSchemaCount > 0)
+            {
+                context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] =
+                    properties_[index].schema;
+                context.valueSchema               = typeless_;
                 context.valuePatternValidatorType = Context::kPatternValidatorWithProperty;
             }
             else
                 context.valueSchema = properties_[index].schema;
 
-            if (context.propertyExist)
+            if(context.propertyExist)
                 context.propertyExist[index] = true;
 
             return true;
         }
 
-        if (additionalPropertiesSchema_) {
-            if (context.patternPropertiesSchemaCount > 0) {
-                context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = additionalPropertiesSchema_;
+        if(additionalPropertiesSchema_)
+        {
+            if(context.patternPropertiesSchemaCount > 0)
+            {
+                context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] =
+                    additionalPropertiesSchema_;
                 context.valueSchema = typeless_;
-                context.valuePatternValidatorType = Context::kPatternValidatorWithAdditionalProperty;
+                context.valuePatternValidatorType =
+                    Context::kPatternValidatorWithAdditionalProperty;
             }
             else
                 context.valueSchema = additionalPropertiesSchema_;
             return true;
         }
-        else if (additionalProperties_) {
+        else if(additionalProperties_)
+        {
             context.valueSchema = typeless_;
             return true;
         }
 
-        if (context.patternPropertiesSchemaCount == 0) { // patternProperties are not additional properties
-            // Must set valueSchema for when kValidateContinueOnErrorFlag is set, else reports spurious type error
+        if(context.patternPropertiesSchemaCount == 0)
+        { // patternProperties are not additional properties
+            // Must set valueSchema for when kValidateContinueOnErrorFlag is set, else reports
+            // spurious type error
             context.valueSchema = typeless_;
             context.error_handler.DisallowedProperty(str, len);
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorAdditionalProperties);
@@ -1154,60 +1380,75 @@ public:
         return true;
     }
 
-    bool EndObject(Context& context, SizeType memberCount) const {
+    bool EndObject(Context& context, SizeType memberCount) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::EndObject");
-        if (hasRequired_) {
+        if(hasRequired_)
+        {
             context.error_handler.StartMissingProperties();
-            for (SizeType index = 0; index < propertyCount_; index++)
-                if (properties_[index].required && !context.propertyExist[index])
-                    if (properties_[index].schema->defaultValueLength_ == 0 )
+            for(SizeType index = 0; index < propertyCount_; index++)
+                if(properties_[index].required && !context.propertyExist[index])
+                    if(properties_[index].schema->defaultValueLength_ == 0)
                         context.error_handler.AddMissingProperty(properties_[index].name);
-            if (context.error_handler.EndMissingProperties())
+            if(context.error_handler.EndMissingProperties())
                 RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorRequired);
         }
 
-        if (memberCount < minProperties_) {
+        if(memberCount < minProperties_)
+        {
             context.error_handler.TooFewProperties(memberCount, minProperties_);
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorMinProperties);
         }
 
-        if (memberCount > maxProperties_) {
+        if(memberCount > maxProperties_)
+        {
             context.error_handler.TooManyProperties(memberCount, maxProperties_);
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorMaxProperties);
         }
 
-        if (hasDependencies_) {
+        if(hasDependencies_)
+        {
             context.error_handler.StartDependencyErrors();
-            for (SizeType sourceIndex = 0; sourceIndex < propertyCount_; sourceIndex++) {
+            for(SizeType sourceIndex = 0; sourceIndex < propertyCount_; sourceIndex++)
+            {
                 const Property& source = properties_[sourceIndex];
-                if (context.propertyExist[sourceIndex]) {
-                    if (source.dependencies) {
+                if(context.propertyExist[sourceIndex])
+                {
+                    if(source.dependencies)
+                    {
                         context.error_handler.StartMissingDependentProperties();
-                        for (SizeType targetIndex = 0; targetIndex < propertyCount_; targetIndex++)
-                            if (source.dependencies[targetIndex] && !context.propertyExist[targetIndex])
-                                context.error_handler.AddMissingDependentProperty(properties_[targetIndex].name);
+                        for(SizeType targetIndex = 0; targetIndex < propertyCount_; targetIndex++)
+                            if(source.dependencies[targetIndex] &&
+                               !context.propertyExist[targetIndex])
+                                context.error_handler.AddMissingDependentProperty(
+                                    properties_[targetIndex].name);
                         context.error_handler.EndMissingDependentProperties(source.name);
                     }
-                    else if (source.dependenciesSchema) {
-                        ISchemaValidator* dependenciesValidator = context.validators[source.dependenciesValidatorIndex];
-                        if (!dependenciesValidator->IsValid())
-                            context.error_handler.AddDependencySchemaError(source.name, dependenciesValidator);
+                    else if(source.dependenciesSchema)
+                    {
+                        ISchemaValidator* dependenciesValidator =
+                            context.validators[source.dependenciesValidatorIndex];
+                        if(!dependenciesValidator->IsValid())
+                            context.error_handler.AddDependencySchemaError(source.name,
+                                                                           dependenciesValidator);
                     }
                 }
             }
-            if (context.error_handler.EndDependencyErrors())
+            if(context.error_handler.EndDependencyErrors())
                 RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorDependencies);
         }
 
         return true;
     }
 
-    bool StartArray(Context& context) const {
+    bool StartArray(Context& context) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::StartArray");
         context.arrayElementIndex = 0;
-        context.inArray = true;  // Ensure we note that we are in an array
+        context.inArray           = true; // Ensure we note that we are in an array
 
-        if (!(type_ & (1 << kArraySchemaType))) {
+        if(!(type_ & (1 << kArraySchemaType)))
+        {
             DisallowedType(context, GetArrayString());
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorType);
         }
@@ -1215,16 +1456,19 @@ public:
         return CreateParallelValidator(context);
     }
 
-    bool EndArray(Context& context, SizeType elementCount) const {
+    bool EndArray(Context& context, SizeType elementCount) const
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "Schema::EndArray");
         context.inArray = false;
 
-        if (elementCount < minItems_) {
+        if(elementCount < minItems_)
+        {
             context.error_handler.TooFewItems(elementCount, minItems_);
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorMinItems);
         }
 
-        if (elementCount > maxItems_) {
+        if(elementCount > maxItems_)
+        {
             context.error_handler.TooManyItems(elementCount, maxItems_);
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorMaxItems);
         }
@@ -1232,53 +1476,55 @@ public:
         return true;
     }
 
-    static const ValueType& GetValidateErrorKeyword(ValidateErrorCode validateErrorCode) {
-        switch (validateErrorCode) {
-            case kValidateErrorMultipleOf:              return GetMultipleOfString();
-            case kValidateErrorMaximum:                 return GetMaximumString();
-            case kValidateErrorExclusiveMaximum:        return GetMaximumString(); // Same
-            case kValidateErrorMinimum:                 return GetMinimumString();
-            case kValidateErrorExclusiveMinimum:        return GetMinimumString(); // Same
+    static const ValueType& GetValidateErrorKeyword(ValidateErrorCode validateErrorCode)
+    {
+        switch(validateErrorCode)
+        {
+        case kValidateErrorMultipleOf: return GetMultipleOfString();
+        case kValidateErrorMaximum: return GetMaximumString();
+        case kValidateErrorExclusiveMaximum: return GetMaximumString(); // Same
+        case kValidateErrorMinimum: return GetMinimumString();
+        case kValidateErrorExclusiveMinimum: return GetMinimumString(); // Same
 
-            case kValidateErrorMaxLength:               return GetMaxLengthString();
-            case kValidateErrorMinLength:               return GetMinLengthString();
-            case kValidateErrorPattern:                 return GetPatternString();
+        case kValidateErrorMaxLength: return GetMaxLengthString();
+        case kValidateErrorMinLength: return GetMinLengthString();
+        case kValidateErrorPattern: return GetPatternString();
 
-            case kValidateErrorMaxItems:                return GetMaxItemsString();
-            case kValidateErrorMinItems:                return GetMinItemsString();
-            case kValidateErrorUniqueItems:             return GetUniqueItemsString();
-            case kValidateErrorAdditionalItems:         return GetAdditionalItemsString();
+        case kValidateErrorMaxItems: return GetMaxItemsString();
+        case kValidateErrorMinItems: return GetMinItemsString();
+        case kValidateErrorUniqueItems: return GetUniqueItemsString();
+        case kValidateErrorAdditionalItems: return GetAdditionalItemsString();
 
-            case kValidateErrorMaxProperties:           return GetMaxPropertiesString();
-            case kValidateErrorMinProperties:           return GetMinPropertiesString();
-            case kValidateErrorRequired:                return GetRequiredString();
-            case kValidateErrorAdditionalProperties:    return GetAdditionalPropertiesString();
-            case kValidateErrorPatternProperties:       return GetPatternPropertiesString();
-            case kValidateErrorDependencies:            return GetDependenciesString();
+        case kValidateErrorMaxProperties: return GetMaxPropertiesString();
+        case kValidateErrorMinProperties: return GetMinPropertiesString();
+        case kValidateErrorRequired: return GetRequiredString();
+        case kValidateErrorAdditionalProperties: return GetAdditionalPropertiesString();
+        case kValidateErrorPatternProperties: return GetPatternPropertiesString();
+        case kValidateErrorDependencies: return GetDependenciesString();
 
-            case kValidateErrorEnum:                    return GetEnumString();
-            case kValidateErrorType:                    return GetTypeString();
+        case kValidateErrorEnum: return GetEnumString();
+        case kValidateErrorType: return GetTypeString();
 
-            case kValidateErrorOneOf:                   return GetOneOfString();
-            case kValidateErrorOneOfMatch:              return GetOneOfString(); // Same
-            case kValidateErrorAllOf:                   return GetAllOfString();
-            case kValidateErrorAnyOf:                   return GetAnyOfString();
-            case kValidateErrorNot:                     return GetNotString();
+        case kValidateErrorOneOf: return GetOneOfString();
+        case kValidateErrorOneOfMatch: return GetOneOfString(); // Same
+        case kValidateErrorAllOf: return GetAllOfString();
+        case kValidateErrorAnyOf: return GetAnyOfString();
+        case kValidateErrorNot: return GetNotString();
 
-            case kValidateErrorReadOnly:                return GetReadOnlyString();
-            case kValidateErrorWriteOnly:               return GetWriteOnlyString();
+        case kValidateErrorReadOnly: return GetReadOnlyString();
+        case kValidateErrorWriteOnly: return GetWriteOnlyString();
 
-            default:                                    return GetNullString();
+        default: return GetNullString();
         }
     }
 
-
     // Generate functions for string literal according to Ch
-#define RAPIDJSON_STRING_(name, ...) \
-    static const ValueType& Get##name##String() {\
-        static const Ch s[] = { __VA_ARGS__, '\0' };\
-        static const ValueType v(s, static_cast<SizeType>(sizeof(s) / sizeof(Ch) - 1));\
-        return v;\
+#define RAPIDJSON_STRING_(name, ...)                                                    \
+    static const ValueType& Get##name##String()                                         \
+    {                                                                                   \
+        static const Ch s[] = {__VA_ARGS__, '\0'};                                      \
+        static const ValueType v(s, static_cast<SizeType>(sizeof(s) / sizeof(Ch) - 1)); \
+        return v;                                                                       \
     }
 
     RAPIDJSON_STRING_(Null, 'n', 'u', 'l', 'l')
@@ -1297,22 +1543,94 @@ public:
     RAPIDJSON_STRING_(Properties, 'p', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
     RAPIDJSON_STRING_(Required, 'r', 'e', 'q', 'u', 'i', 'r', 'e', 'd')
     RAPIDJSON_STRING_(Dependencies, 'd', 'e', 'p', 'e', 'n', 'd', 'e', 'n', 'c', 'i', 'e', 's')
-    RAPIDJSON_STRING_(PatternProperties, 'p', 'a', 't', 't', 'e', 'r', 'n', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
-    RAPIDJSON_STRING_(AdditionalProperties, 'a', 'd', 'd', 'i', 't', 'i', 'o', 'n', 'a', 'l', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
-    RAPIDJSON_STRING_(MinProperties, 'm', 'i', 'n', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
-    RAPIDJSON_STRING_(MaxProperties, 'm', 'a', 'x', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
+    RAPIDJSON_STRING_(PatternProperties,
+                      'p',
+                      'a',
+                      't',
+                      't',
+                      'e',
+                      'r',
+                      'n',
+                      'P',
+                      'r',
+                      'o',
+                      'p',
+                      'e',
+                      'r',
+                      't',
+                      'i',
+                      'e',
+                      's')
+    RAPIDJSON_STRING_(AdditionalProperties,
+                      'a',
+                      'd',
+                      'd',
+                      'i',
+                      't',
+                      'i',
+                      'o',
+                      'n',
+                      'a',
+                      'l',
+                      'P',
+                      'r',
+                      'o',
+                      'p',
+                      'e',
+                      'r',
+                      't',
+                      'i',
+                      'e',
+                      's')
+    RAPIDJSON_STRING_(
+        MinProperties, 'm', 'i', 'n', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
+    RAPIDJSON_STRING_(
+        MaxProperties, 'm', 'a', 'x', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
     RAPIDJSON_STRING_(Items, 'i', 't', 'e', 'm', 's')
     RAPIDJSON_STRING_(MinItems, 'm', 'i', 'n', 'I', 't', 'e', 'm', 's')
     RAPIDJSON_STRING_(MaxItems, 'm', 'a', 'x', 'I', 't', 'e', 'm', 's')
-    RAPIDJSON_STRING_(AdditionalItems, 'a', 'd', 'd', 'i', 't', 'i', 'o', 'n', 'a', 'l', 'I', 't', 'e', 'm', 's')
+    RAPIDJSON_STRING_(
+        AdditionalItems, 'a', 'd', 'd', 'i', 't', 'i', 'o', 'n', 'a', 'l', 'I', 't', 'e', 'm', 's')
     RAPIDJSON_STRING_(UniqueItems, 'u', 'n', 'i', 'q', 'u', 'e', 'I', 't', 'e', 'm', 's')
     RAPIDJSON_STRING_(MinLength, 'm', 'i', 'n', 'L', 'e', 'n', 'g', 't', 'h')
     RAPIDJSON_STRING_(MaxLength, 'm', 'a', 'x', 'L', 'e', 'n', 'g', 't', 'h')
     RAPIDJSON_STRING_(Pattern, 'p', 'a', 't', 't', 'e', 'r', 'n')
     RAPIDJSON_STRING_(Minimum, 'm', 'i', 'n', 'i', 'm', 'u', 'm')
     RAPIDJSON_STRING_(Maximum, 'm', 'a', 'x', 'i', 'm', 'u', 'm')
-    RAPIDJSON_STRING_(ExclusiveMinimum, 'e', 'x', 'c', 'l', 'u', 's', 'i', 'v', 'e', 'M', 'i', 'n', 'i', 'm', 'u', 'm')
-    RAPIDJSON_STRING_(ExclusiveMaximum, 'e', 'x', 'c', 'l', 'u', 's', 'i', 'v', 'e', 'M', 'a', 'x', 'i', 'm', 'u', 'm')
+    RAPIDJSON_STRING_(ExclusiveMinimum,
+                      'e',
+                      'x',
+                      'c',
+                      'l',
+                      'u',
+                      's',
+                      'i',
+                      'v',
+                      'e',
+                      'M',
+                      'i',
+                      'n',
+                      'i',
+                      'm',
+                      'u',
+                      'm')
+    RAPIDJSON_STRING_(ExclusiveMaximum,
+                      'e',
+                      'x',
+                      'c',
+                      'l',
+                      'u',
+                      's',
+                      'i',
+                      'v',
+                      'e',
+                      'M',
+                      'a',
+                      'x',
+                      'i',
+                      'm',
+                      'u',
+                      'm')
     RAPIDJSON_STRING_(MultipleOf, 'm', 'u', 'l', 't', 'i', 'p', 'l', 'e', 'O', 'f')
     RAPIDJSON_STRING_(DefaultValue, 'd', 'e', 'f', 'a', 'u', 'l', 't')
     RAPIDJSON_STRING_(Schema, '$', 's', 'c', 'h', 'e', 'm', 'a')
@@ -1326,8 +1644,9 @@ public:
 
 #undef RAPIDJSON_STRING_
 
-private:
-    enum SchemaValueType {
+    private:
+    enum SchemaValueType
+    {
         kNullSchemaType,
         kBooleanSchemaType,
         kObjectSchemaType,
@@ -1339,14 +1658,15 @@ private:
     };
 
 #if RAPIDJSON_SCHEMA_USE_INTERNALREGEX
-        typedef internal::GenericRegex<EncodingType, AllocatorType> RegexType;
+    typedef internal::GenericRegex<EncodingType, AllocatorType> RegexType;
 #elif RAPIDJSON_SCHEMA_USE_STDREGEX
-        typedef std::basic_regex<Ch> RegexType;
+    typedef std::basic_regex<Ch> RegexType;
 #else
-        typedef char RegexType;
+    typedef char RegexType;
 #endif
 
-    struct SchemaArray {
+    struct SchemaArray
+    {
         SchemaArray() : schemas(), count() {}
         ~SchemaArray() { AllocatorType::Free(schemas); }
         const SchemaType** schemas;
@@ -1355,40 +1675,54 @@ private:
     };
 
     template <typename V1, typename V2>
-    void AddUniqueElement(V1& a, const V2& v) {
-        for (typename V1::ConstValueIterator itr = a.Begin(); itr != a.End(); ++itr)
-            if (*itr == v)
+    void AddUniqueElement(V1& a, const V2& v)
+    {
+        for(typename V1::ConstValueIterator itr = a.Begin(); itr != a.End(); ++itr)
+            if(*itr == v)
                 return;
         V1 c(v, *allocator_);
         a.PushBack(c, *allocator_);
     }
 
-    static const ValueType* GetMember(const ValueType& value, const ValueType& name) {
+    static const ValueType* GetMember(const ValueType& value, const ValueType& name)
+    {
         typename ValueType::ConstMemberIterator itr = value.FindMember(name);
         return itr != value.MemberEnd() ? &(itr->value) : 0;
     }
 
-    static void AssignIfExist(bool& out, const ValueType& value, const ValueType& name) {
-        if (const ValueType* v = GetMember(value, name))
-            if (v->IsBool())
+    static void AssignIfExist(bool& out, const ValueType& value, const ValueType& name)
+    {
+        if(const ValueType* v = GetMember(value, name))
+            if(v->IsBool())
                 out = v->GetBool();
     }
 
-    static void AssignIfExist(SizeType& out, const ValueType& value, const ValueType& name) {
-        if (const ValueType* v = GetMember(value, name))
-            if (v->IsUint64() && v->GetUint64() <= SizeType(~0))
+    static void AssignIfExist(SizeType& out, const ValueType& value, const ValueType& name)
+    {
+        if(const ValueType* v = GetMember(value, name))
+            if(v->IsUint64() && v->GetUint64() <= SizeType(~0))
                 out = static_cast<SizeType>(v->GetUint64());
     }
 
-    void AssignIfExist(SchemaArray& out, SchemaDocumentType& schemaDocument, const PointerType& p, const ValueType& value, const ValueType& name, const ValueType& document) {
-        if (const ValueType* v = GetMember(value, name)) {
-            if (v->IsArray() && v->Size() > 0) {
+    void AssignIfExist(SchemaArray& out,
+                       SchemaDocumentType& schemaDocument,
+                       const PointerType& p,
+                       const ValueType& value,
+                       const ValueType& name,
+                       const ValueType& document)
+    {
+        if(const ValueType* v = GetMember(value, name))
+        {
+            if(v->IsArray() && v->Size() > 0)
+            {
                 PointerType q = p.Append(name, allocator_);
-                out.count = v->Size();
-                out.schemas = static_cast<const Schema**>(allocator_->Malloc(out.count * sizeof(const Schema*)));
-                memset(out.schemas, 0, sizeof(Schema*)* out.count);
-                for (SizeType i = 0; i < out.count; i++)
-                    schemaDocument.CreateSchema(&out.schemas[i], q.Append(i, allocator_), (*v)[i], document, id_);
+                out.count     = v->Size();
+                out.schemas   = static_cast<const Schema**>(
+                    allocator_->Malloc(out.count * sizeof(const Schema*)));
+                memset(out.schemas, 0, sizeof(Schema*) * out.count);
+                for(SizeType i = 0; i < out.count; i++)
+                    schemaDocument.CreateSchema(
+                        &out.schemas[i], q.Append(i, allocator_), (*v)[i], document, id_);
                 out.begin = validatorCount_;
                 validatorCount_ += out.count;
             }
@@ -1397,11 +1731,16 @@ private:
 
 #if RAPIDJSON_SCHEMA_USE_INTERNALREGEX
     template <typename ValueType>
-    RegexType* CreatePattern(const ValueType& value, SchemaDocumentType* sd, const PointerType& p) {
-        if (value.IsString()) {
-            RegexType* r = new (allocator_->Malloc(sizeof(RegexType))) RegexType(value.GetString(), allocator_);
-            if (!r->IsValid()) {
-                sd->SchemaErrorValue(kSchemaErrorRegexInvalid, p, value.GetString(), value.GetStringLength());
+    RegexType* CreatePattern(const ValueType& value, SchemaDocumentType* sd, const PointerType& p)
+    {
+        if(value.IsString())
+        {
+            RegexType* r =
+                new(allocator_->Malloc(sizeof(RegexType))) RegexType(value.GetString(), allocator_);
+            if(!r->IsValid())
+            {
+                sd->SchemaErrorValue(
+                    kSchemaErrorRegexInvalid, p, value.GetString(), value.GetStringLength());
                 r->~RegexType();
                 AllocatorType::Free(r);
                 r = 0;
@@ -1411,88 +1750,115 @@ private:
         return 0;
     }
 
-    static bool IsPatternMatch(const RegexType* pattern, const Ch *str, SizeType) {
+    static bool IsPatternMatch(const RegexType* pattern, const Ch* str, SizeType)
+    {
         GenericRegexSearch<RegexType> rs(*pattern);
         return rs.Search(str);
     }
 #elif RAPIDJSON_SCHEMA_USE_STDREGEX
     template <typename ValueType>
-    RegexType* CreatePattern(const ValueType& value, SchemaDocumentType* sd, const PointerType& p) {
-        if (value.IsString()) {
-            RegexType *r = static_cast<RegexType*>(allocator_->Malloc(sizeof(RegexType)));
-            try {
-                return new (r) RegexType(value.GetString(), std::size_t(value.GetStringLength()), std::regex_constants::ECMAScript);
+    RegexType* CreatePattern(const ValueType& value, SchemaDocumentType* sd, const PointerType& p)
+    {
+        if(value.IsString())
+        {
+            RegexType* r = static_cast<RegexType*>(allocator_->Malloc(sizeof(RegexType)));
+            try
+            {
+                return new(r) RegexType(value.GetString(),
+                                        std::size_t(value.GetStringLength()),
+                                        std::regex_constants::ECMAScript);
             }
-            catch (const std::regex_error& e) {
-                sd->SchemaErrorValue(kSchemaErrorRegexInvalid, p, value.GetString(), value.GetStringLength());
+            catch(const std::regex_error& e)
+            {
+                sd->SchemaErrorValue(
+                    kSchemaErrorRegexInvalid, p, value.GetString(), value.GetStringLength());
                 AllocatorType::Free(r);
             }
         }
         return 0;
     }
 
-    static bool IsPatternMatch(const RegexType* pattern, const Ch *str, SizeType length) {
+    static bool IsPatternMatch(const RegexType* pattern, const Ch* str, SizeType length)
+    {
         std::match_results<const Ch*> r;
         return std::regex_search(str, str + length, r, *pattern);
     }
 #else
     template <typename ValueType>
-    RegexType* CreatePattern(const ValueType&) {
+    RegexType* CreatePattern(const ValueType&)
+    {
         return 0;
     }
 
-    static bool IsPatternMatch(const RegexType*, const Ch *, SizeType) { return true; }
+    static bool IsPatternMatch(const RegexType*, const Ch*, SizeType) { return true; }
 #endif // RAPIDJSON_SCHEMA_USE_STDREGEX
 
-    void AddType(const ValueType& type) {
-        if      (type == GetNullString()   ) type_ |= 1 << kNullSchemaType;
-        else if (type == GetBooleanString()) type_ |= 1 << kBooleanSchemaType;
-        else if (type == GetObjectString() ) type_ |= 1 << kObjectSchemaType;
-        else if (type == GetArrayString()  ) type_ |= 1 << kArraySchemaType;
-        else if (type == GetStringString() ) type_ |= 1 << kStringSchemaType;
-        else if (type == GetIntegerString()) type_ |= 1 << kIntegerSchemaType;
-        else if (type == GetNumberString() ) type_ |= (1 << kNumberSchemaType) | (1 << kIntegerSchemaType);
+    void AddType(const ValueType& type)
+    {
+        if(type == GetNullString())
+            type_ |= 1 << kNullSchemaType;
+        else if(type == GetBooleanString())
+            type_ |= 1 << kBooleanSchemaType;
+        else if(type == GetObjectString())
+            type_ |= 1 << kObjectSchemaType;
+        else if(type == GetArrayString())
+            type_ |= 1 << kArraySchemaType;
+        else if(type == GetStringString())
+            type_ |= 1 << kStringSchemaType;
+        else if(type == GetIntegerString())
+            type_ |= 1 << kIntegerSchemaType;
+        else if(type == GetNumberString())
+            type_ |= (1 << kNumberSchemaType) | (1 << kIntegerSchemaType);
     }
 
-    // Creates parallel validators for allOf, anyOf, oneOf, not and schema dependencies, if required.
-    // Also creates a hasher for enums and array uniqueness, if required.
-    // Also a useful place to add type-independent error checks.
-    bool CreateParallelValidator(Context& context) const {
-        if (enum_ || context.arrayUniqueness)
+    // Creates parallel validators for allOf, anyOf, oneOf, not and schema dependencies, if
+    // required. Also creates a hasher for enums and array uniqueness, if required. Also a useful
+    // place to add type-independent error checks.
+    bool CreateParallelValidator(Context& context) const
+    {
+        if(enum_ || context.arrayUniqueness)
             context.hasher = context.factory.CreateHasher();
 
-        if (validatorCount_) {
+        if(validatorCount_)
+        {
             RAPIDJSON_ASSERT(context.validators == 0);
-            context.validators = static_cast<ISchemaValidator**>(context.factory.MallocState(sizeof(ISchemaValidator*) * validatorCount_));
+            context.validators = static_cast<ISchemaValidator**>(
+                context.factory.MallocState(sizeof(ISchemaValidator*) * validatorCount_));
             std::memset(context.validators, 0, sizeof(ISchemaValidator*) * validatorCount_);
             context.validatorCount = validatorCount_;
 
             // Always return after first failure for these sub-validators
-            if (allOf_.schemas)
+            if(allOf_.schemas)
                 CreateSchemaValidators(context, allOf_, false);
 
-            if (anyOf_.schemas)
+            if(anyOf_.schemas)
                 CreateSchemaValidators(context, anyOf_, false);
 
-            if (oneOf_.schemas)
+            if(oneOf_.schemas)
                 CreateSchemaValidators(context, oneOf_, false);
 
-            if (not_)
-                context.validators[notValidatorIndex_] = context.factory.CreateSchemaValidator(*not_, false);
+            if(not_)
+                context.validators[notValidatorIndex_] =
+                    context.factory.CreateSchemaValidator(*not_, false);
 
-            if (hasSchemaDependencies_) {
-                for (SizeType i = 0; i < propertyCount_; i++)
-                    if (properties_[i].dependenciesSchema)
-                        context.validators[properties_[i].dependenciesValidatorIndex] = context.factory.CreateSchemaValidator(*properties_[i].dependenciesSchema, false);
+            if(hasSchemaDependencies_)
+            {
+                for(SizeType i = 0; i < propertyCount_; i++)
+                    if(properties_[i].dependenciesSchema)
+                        context.validators[properties_[i].dependenciesValidatorIndex] =
+                            context.factory.CreateSchemaValidator(
+                                *properties_[i].dependenciesSchema, false);
             }
         }
 
         // Add any other type-independent checks here
-        if (readOnly_ && (context.flags & kValidateWriteFlag)) {
+        if(readOnly_ && (context.flags & kValidateWriteFlag))
+        {
             context.error_handler.DisallowedWhenWriting();
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorReadOnly);
         }
-        if (writeOnly_ && (context.flags & kValidateReadFlag)) {
+        if(writeOnly_ && (context.flags & kValidateReadFlag))
+        {
             context.error_handler.DisallowedWhenReading();
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorWriteOnly);
         }
@@ -1500,18 +1866,23 @@ private:
         return true;
     }
 
-    void CreateSchemaValidators(Context& context, const SchemaArray& schemas, const bool inheritContinueOnErrors) const {
-        for (SizeType i = 0; i < schemas.count; i++)
-            context.validators[schemas.begin + i] = context.factory.CreateSchemaValidator(*schemas.schemas[i], inheritContinueOnErrors);
+    void CreateSchemaValidators(Context& context,
+                                const SchemaArray& schemas,
+                                const bool inheritContinueOnErrors) const
+    {
+        for(SizeType i = 0; i < schemas.count; i++)
+            context.validators[schemas.begin + i] =
+                context.factory.CreateSchemaValidator(*schemas.schemas[i], inheritContinueOnErrors);
     }
 
     // O(n)
-    bool FindPropertyIndex(const ValueType& name, SizeType* outIndex) const {
-        SizeType len = name.GetStringLength();
+    bool FindPropertyIndex(const ValueType& name, SizeType* outIndex) const
+    {
+        SizeType len  = name.GetStringLength();
         const Ch* str = name.GetString();
-        for (SizeType index = 0; index < propertyCount_; index++)
-            if (properties_[index].name.GetStringLength() == len &&
-                (std::memcmp(properties_[index].name.GetString(), str, sizeof(Ch) * len) == 0))
+        for(SizeType index = 0; index < propertyCount_; index++)
+            if(properties_[index].name.GetStringLength() == len &&
+               (std::memcmp(properties_[index].name.GetString(), str, sizeof(Ch) * len) == 0))
             {
                 *outIndex = index;
                 return true;
@@ -1519,158 +1890,218 @@ private:
         return false;
     }
 
-    bool CheckBool(Context& context, bool) const {
-        if (!(type_ & (1 << kBooleanSchemaType))) {
+    bool CheckBool(Context& context, bool) const
+    {
+        if(!(type_ & (1 << kBooleanSchemaType)))
+        {
             DisallowedType(context, GetBooleanString());
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorType);
         }
         return true;
     }
 
-    bool CheckInt(Context& context, int64_t i) const {
-        if (!(type_ & ((1 << kIntegerSchemaType) | (1 << kNumberSchemaType)))) {
+    bool CheckInt(Context& context, int64_t i) const
+    {
+        if(!(type_ & ((1 << kIntegerSchemaType) | (1 << kNumberSchemaType))))
+        {
             DisallowedType(context, GetIntegerString());
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorType);
         }
 
-        if (!minimum_.IsNull()) {
-            if (minimum_.IsInt64()) {
-                if (exclusiveMinimum_ ? i <= minimum_.GetInt64() : i < minimum_.GetInt64()) {
+        if(!minimum_.IsNull())
+        {
+            if(minimum_.IsInt64())
+            {
+                if(exclusiveMinimum_ ? i <= minimum_.GetInt64() : i < minimum_.GetInt64())
+                {
                     context.error_handler.BelowMinimum(i, minimum_, exclusiveMinimum_);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(exclusiveMinimum_ ? kValidateErrorExclusiveMinimum : kValidateErrorMinimum);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(
+                        exclusiveMinimum_ ? kValidateErrorExclusiveMinimum : kValidateErrorMinimum);
                 }
             }
-            else if (minimum_.IsUint64()) {
+            else if(minimum_.IsUint64())
+            {
                 context.error_handler.BelowMinimum(i, minimum_, exclusiveMinimum_);
-                RAPIDJSON_INVALID_KEYWORD_RETURN(exclusiveMinimum_ ? kValidateErrorExclusiveMinimum : kValidateErrorMinimum); // i <= max(int64_t) < minimum.GetUint64()
+                RAPIDJSON_INVALID_KEYWORD_RETURN(
+                    exclusiveMinimum_
+                        ? kValidateErrorExclusiveMinimum
+                        : kValidateErrorMinimum); // i <= max(int64_t) < minimum.GetUint64()
             }
-            else if (!CheckDoubleMinimum(context, static_cast<double>(i)))
+            else if(!CheckDoubleMinimum(context, static_cast<double>(i)))
                 return false;
         }
 
-        if (!maximum_.IsNull()) {
-            if (maximum_.IsInt64()) {
-                if (exclusiveMaximum_ ? i >= maximum_.GetInt64() : i > maximum_.GetInt64()) {
+        if(!maximum_.IsNull())
+        {
+            if(maximum_.IsInt64())
+            {
+                if(exclusiveMaximum_ ? i >= maximum_.GetInt64() : i > maximum_.GetInt64())
+                {
                     context.error_handler.AboveMaximum(i, maximum_, exclusiveMaximum_);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(exclusiveMaximum_ ? kValidateErrorExclusiveMaximum : kValidateErrorMaximum);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(
+                        exclusiveMaximum_ ? kValidateErrorExclusiveMaximum : kValidateErrorMaximum);
                 }
             }
-            else if (maximum_.IsUint64()) { }
-                /* do nothing */ // i <= max(int64_t) < maximum_.GetUint64()
-            else if (!CheckDoubleMaximum(context, static_cast<double>(i)))
+            else if(maximum_.IsUint64()) {}
+            /* do nothing */ // i <= max(int64_t) < maximum_.GetUint64()
+            else if(!CheckDoubleMaximum(context, static_cast<double>(i)))
                 return false;
         }
 
-        if (!multipleOf_.IsNull()) {
-            if (multipleOf_.IsUint64()) {
-                if (static_cast<uint64_t>(i >= 0 ? i : -i) % multipleOf_.GetUint64() != 0) {
+        if(!multipleOf_.IsNull())
+        {
+            if(multipleOf_.IsUint64())
+            {
+                if(static_cast<uint64_t>(i >= 0 ? i : -i) % multipleOf_.GetUint64() != 0)
+                {
                     context.error_handler.NotMultipleOf(i, multipleOf_);
                     RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorMultipleOf);
                 }
             }
-            else if (!CheckDoubleMultipleOf(context, static_cast<double>(i)))
+            else if(!CheckDoubleMultipleOf(context, static_cast<double>(i)))
                 return false;
         }
 
         return true;
     }
 
-    bool CheckUint(Context& context, uint64_t i) const {
-        if (!(type_ & ((1 << kIntegerSchemaType) | (1 << kNumberSchemaType)))) {
+    bool CheckUint(Context& context, uint64_t i) const
+    {
+        if(!(type_ & ((1 << kIntegerSchemaType) | (1 << kNumberSchemaType))))
+        {
             DisallowedType(context, GetIntegerString());
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorType);
         }
 
-        if (!minimum_.IsNull()) {
-            if (minimum_.IsUint64()) {
-                if (exclusiveMinimum_ ? i <= minimum_.GetUint64() : i < minimum_.GetUint64()) {
+        if(!minimum_.IsNull())
+        {
+            if(minimum_.IsUint64())
+            {
+                if(exclusiveMinimum_ ? i <= minimum_.GetUint64() : i < minimum_.GetUint64())
+                {
                     context.error_handler.BelowMinimum(i, minimum_, exclusiveMinimum_);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(exclusiveMinimum_ ? kValidateErrorExclusiveMinimum : kValidateErrorMinimum);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(
+                        exclusiveMinimum_ ? kValidateErrorExclusiveMinimum : kValidateErrorMinimum);
                 }
             }
-            else if (minimum_.IsInt64())
+            else if(minimum_.IsInt64())
                 /* do nothing */; // i >= 0 > minimum.Getint64()
-            else if (!CheckDoubleMinimum(context, static_cast<double>(i)))
+            else if(!CheckDoubleMinimum(context, static_cast<double>(i)))
                 return false;
         }
 
-        if (!maximum_.IsNull()) {
-            if (maximum_.IsUint64()) {
-                if (exclusiveMaximum_ ? i >= maximum_.GetUint64() : i > maximum_.GetUint64()) {
+        if(!maximum_.IsNull())
+        {
+            if(maximum_.IsUint64())
+            {
+                if(exclusiveMaximum_ ? i >= maximum_.GetUint64() : i > maximum_.GetUint64())
+                {
                     context.error_handler.AboveMaximum(i, maximum_, exclusiveMaximum_);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(exclusiveMaximum_ ? kValidateErrorExclusiveMaximum : kValidateErrorMaximum);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(
+                        exclusiveMaximum_ ? kValidateErrorExclusiveMaximum : kValidateErrorMaximum);
                 }
             }
-            else if (maximum_.IsInt64()) {
+            else if(maximum_.IsInt64())
+            {
                 context.error_handler.AboveMaximum(i, maximum_, exclusiveMaximum_);
-                RAPIDJSON_INVALID_KEYWORD_RETURN(exclusiveMaximum_ ? kValidateErrorExclusiveMaximum : kValidateErrorMaximum); // i >= 0 > maximum_
+                RAPIDJSON_INVALID_KEYWORD_RETURN(exclusiveMaximum_
+                                                     ? kValidateErrorExclusiveMaximum
+                                                     : kValidateErrorMaximum); // i >= 0 > maximum_
             }
-            else if (!CheckDoubleMaximum(context, static_cast<double>(i)))
+            else if(!CheckDoubleMaximum(context, static_cast<double>(i)))
                 return false;
         }
 
-        if (!multipleOf_.IsNull()) {
-            if (multipleOf_.IsUint64()) {
-                if (i % multipleOf_.GetUint64() != 0) {
+        if(!multipleOf_.IsNull())
+        {
+            if(multipleOf_.IsUint64())
+            {
+                if(i % multipleOf_.GetUint64() != 0)
+                {
                     context.error_handler.NotMultipleOf(i, multipleOf_);
                     RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorMultipleOf);
                 }
             }
-            else if (!CheckDoubleMultipleOf(context, static_cast<double>(i)))
+            else if(!CheckDoubleMultipleOf(context, static_cast<double>(i)))
                 return false;
         }
 
         return true;
     }
 
-    bool CheckDoubleMinimum(Context& context, double d) const {
-        if (exclusiveMinimum_ ? d <= minimum_.GetDouble() : d < minimum_.GetDouble()) {
+    bool CheckDoubleMinimum(Context& context, double d) const
+    {
+        if(exclusiveMinimum_ ? d <= minimum_.GetDouble() : d < minimum_.GetDouble())
+        {
             context.error_handler.BelowMinimum(d, minimum_, exclusiveMinimum_);
-            RAPIDJSON_INVALID_KEYWORD_RETURN(exclusiveMinimum_ ? kValidateErrorExclusiveMinimum : kValidateErrorMinimum);
+            RAPIDJSON_INVALID_KEYWORD_RETURN(exclusiveMinimum_ ? kValidateErrorExclusiveMinimum
+                                                               : kValidateErrorMinimum);
         }
         return true;
     }
 
-    bool CheckDoubleMaximum(Context& context, double d) const {
-        if (exclusiveMaximum_ ? d >= maximum_.GetDouble() : d > maximum_.GetDouble()) {
+    bool CheckDoubleMaximum(Context& context, double d) const
+    {
+        if(exclusiveMaximum_ ? d >= maximum_.GetDouble() : d > maximum_.GetDouble())
+        {
             context.error_handler.AboveMaximum(d, maximum_, exclusiveMaximum_);
-            RAPIDJSON_INVALID_KEYWORD_RETURN(exclusiveMaximum_ ? kValidateErrorExclusiveMaximum : kValidateErrorMaximum);
+            RAPIDJSON_INVALID_KEYWORD_RETURN(exclusiveMaximum_ ? kValidateErrorExclusiveMaximum
+                                                               : kValidateErrorMaximum);
         }
         return true;
     }
 
-    bool CheckDoubleMultipleOf(Context& context, double d) const {
+    bool CheckDoubleMultipleOf(Context& context, double d) const
+    {
         double a = std::abs(d), b = std::abs(multipleOf_.GetDouble());
-        double q = a / b;
-        double qRounded = std::floor(q + 0.5);
+        double q             = a / b;
+        double qRounded      = std::floor(q + 0.5);
         double scaledEpsilon = (q + qRounded) * std::numeric_limits<double>::epsilon();
-        double difference = std::abs(qRounded - q);
-        bool isMultiple = difference <= scaledEpsilon || difference < (std::numeric_limits<double>::min)();
-        if (!isMultiple) {
+        double difference    = std::abs(qRounded - q);
+        bool isMultiple =
+            difference <= scaledEpsilon || difference < (std::numeric_limits<double>::min)();
+        if(!isMultiple)
+        {
             context.error_handler.NotMultipleOf(d, multipleOf_);
             RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorMultipleOf);
         }
         return true;
     }
 
-    void DisallowedType(Context& context, const ValueType& actualType) const {
+    void DisallowedType(Context& context, const ValueType& actualType) const
+    {
         ErrorHandler& eh = context.error_handler;
         eh.StartDisallowedType();
 
-        if (type_ & (1 << kNullSchemaType)) eh.AddExpectedType(GetNullString());
-        if (type_ & (1 << kBooleanSchemaType)) eh.AddExpectedType(GetBooleanString());
-        if (type_ & (1 << kObjectSchemaType)) eh.AddExpectedType(GetObjectString());
-        if (type_ & (1 << kArraySchemaType)) eh.AddExpectedType(GetArrayString());
-        if (type_ & (1 << kStringSchemaType)) eh.AddExpectedType(GetStringString());
+        if(type_ & (1 << kNullSchemaType))
+            eh.AddExpectedType(GetNullString());
+        if(type_ & (1 << kBooleanSchemaType))
+            eh.AddExpectedType(GetBooleanString());
+        if(type_ & (1 << kObjectSchemaType))
+            eh.AddExpectedType(GetObjectString());
+        if(type_ & (1 << kArraySchemaType))
+            eh.AddExpectedType(GetArrayString());
+        if(type_ & (1 << kStringSchemaType))
+            eh.AddExpectedType(GetStringString());
 
-        if (type_ & (1 << kNumberSchemaType)) eh.AddExpectedType(GetNumberString());
-        else if (type_ & (1 << kIntegerSchemaType)) eh.AddExpectedType(GetIntegerString());
+        if(type_ & (1 << kNumberSchemaType))
+            eh.AddExpectedType(GetNumberString());
+        else if(type_ & (1 << kIntegerSchemaType))
+            eh.AddExpectedType(GetIntegerString());
 
         eh.EndDisallowedType(actualType);
     }
 
-    struct Property {
-        Property() : schema(), dependenciesSchema(), dependenciesValidatorIndex(), dependencies(), required(false) {}
+    struct Property
+    {
+        Property()
+            : schema(),
+              dependenciesSchema(),
+              dependenciesValidatorIndex(),
+              dependencies(),
+              required(false)
+        {
+        }
         ~Property() { AllocatorType::Free(dependencies); }
         SValue name;
         const SchemaType* schema;
@@ -1680,10 +2111,13 @@ private:
         bool required;
     };
 
-    struct PatternProperty {
+    struct PatternProperty
+    {
         PatternProperty() : schema(), pattern() {}
-        ~PatternProperty() {
-            if (pattern) {
+        ~PatternProperty()
+        {
+            if(pattern)
+            {
                 pattern->~RegexType();
                 AllocatorType::Free(pattern);
             }
@@ -1746,30 +2180,37 @@ private:
     bool nullable_;
 };
 
-template<typename Stack, typename Ch>
-struct TokenHelper {
-    RAPIDJSON_FORCEINLINE static void AppendIndexToken(Stack& documentStack, SizeType index) {
+template <typename Stack, typename Ch>
+struct TokenHelper
+{
+    RAPIDJSON_FORCEINLINE static void AppendIndexToken(Stack& documentStack, SizeType index)
+    {
         *documentStack.template Push<Ch>() = '/';
         char buffer[21];
-        size_t length = static_cast<size_t>((sizeof(SizeType) == 4 ? u32toa(index, buffer) : u64toa(index, buffer)) - buffer);
-        for (size_t i = 0; i < length; i++)
+        size_t length = static_cast<size_t>(
+            (sizeof(SizeType) == 4 ? u32toa(index, buffer) : u64toa(index, buffer)) - buffer);
+        for(size_t i = 0; i < length; i++)
             *documentStack.template Push<Ch>() = static_cast<Ch>(buffer[i]);
     }
 };
 
 // Partial specialized version for char to prevent buffer copying.
 template <typename Stack>
-struct TokenHelper<Stack, char> {
-    RAPIDJSON_FORCEINLINE static void AppendIndexToken(Stack& documentStack, SizeType index) {
-        RAPIDJSON_IF_CONSTEXPR (sizeof(SizeType) == 4) {
-            char *buffer = documentStack.template Push<char>(1 + 10); // '/' + uint
-            *buffer++ = '/';
+struct TokenHelper<Stack, char>
+{
+    RAPIDJSON_FORCEINLINE static void AppendIndexToken(Stack& documentStack, SizeType index)
+    {
+        RAPIDJSON_IF_CONSTEXPR(sizeof(SizeType) == 4)
+        {
+            char* buffer    = documentStack.template Push<char>(1 + 10); // '/' + uint
+            *buffer++       = '/';
             const char* end = internal::u32toa(index, buffer);
-             documentStack.template Pop<char>(static_cast<size_t>(10 - (end - buffer)));
+            documentStack.template Pop<char>(static_cast<size_t>(10 - (end - buffer)));
         }
-        else {
-            char *buffer = documentStack.template Push<char>(1 + 20); // '/' + uint64
-            *buffer++ = '/';
+        else
+        {
+            char* buffer    = documentStack.template Push<char>(1 + 20); // '/' + uint64
+            *buffer++       = '/';
             const char* end = internal::u64toa(index, buffer);
             documentStack.template Pop<char>(static_cast<size_t>(20 - (end - buffer)));
         }
@@ -1782,15 +2223,18 @@ struct TokenHelper<Stack, char> {
 // IGenericRemoteSchemaDocumentProvider
 
 template <typename SchemaDocumentType>
-class IGenericRemoteSchemaDocumentProvider {
-public:
+class IGenericRemoteSchemaDocumentProvider
+{
+    public:
     typedef typename SchemaDocumentType::Ch Ch;
     typedef typename SchemaDocumentType::ValueType ValueType;
     typedef typename SchemaDocumentType::AllocatorType AllocatorType;
 
     virtual ~IGenericRemoteSchemaDocumentProvider() {}
     virtual const SchemaDocumentType* GetRemoteDocument(const Ch* uri, SizeType length) = 0;
-    virtual const SchemaDocumentType* GetRemoteDocument(const GenericUri<ValueType, AllocatorType> uri, Specification& spec) {
+    virtual const SchemaDocumentType*
+    GetRemoteDocument(const GenericUri<ValueType, AllocatorType> uri, Specification& spec)
+    {
         // Default implementation just calls through for compatibility
         // Following line suppresses unused parameter warning
         (void)spec;
@@ -1812,10 +2256,12 @@ public:
     \tparam Allocator Allocator type for allocating memory of this document.
 */
 template <typename ValueT, typename Allocator = CrtAllocator>
-class GenericSchemaDocument {
-public:
+class GenericSchemaDocument
+{
+    public:
     typedef ValueT ValueType;
-    typedef IGenericRemoteSchemaDocumentProvider<GenericSchemaDocument> IRemoteSchemaDocumentProviderType;
+    typedef IGenericRemoteSchemaDocumentProvider<GenericSchemaDocument>
+        IRemoteSchemaDocumentProviderType;
     typedef Allocator AllocatorType;
     typedef typename ValueType::EncodingType EncodingType;
     typedef typename EncodingType::Ch Ch;
@@ -1835,28 +2281,32 @@ public:
         \param document A JSON document as source.
         \param uri The base URI of this schema document for purposes of violation reporting.
         \param uriLength Length of \c name, in code points.
-        \param remoteProvider An optional remote schema document provider for resolving remote reference. Can be null.
-        \param allocator An optional allocator instance for allocating memory. Can be null.
-        \param pointer An optional JSON pointer to the start of the schema document
-        \param spec Optional schema draft or OpenAPI version. Used if no specification in document. Defaults to draft-04.
+        \param remoteProvider An optional remote schema document provider for resolving remote
+       reference. Can be null. \param allocator An optional allocator instance for allocating
+       memory. Can be null. \param pointer An optional JSON pointer to the start of the schema
+       document \param spec Optional schema draft or OpenAPI version. Used if no specification in
+       document. Defaults to draft-04.
     */
-    explicit GenericSchemaDocument(const ValueType& document, const Ch* uri = 0, SizeType uriLength = 0,
-        IRemoteSchemaDocumentProviderType* remoteProvider = 0, Allocator* allocator = 0,
-        const PointerType& pointer = PointerType(), // PR #1393
-        const Specification& spec = Specification(kDraft04)) :
-        remoteProvider_(remoteProvider),
-        allocator_(allocator),
-        ownAllocator_(),
-        root_(),
-        typeless_(),
-        schemaMap_(allocator, kInitialSchemaMapSize),
-        schemaRef_(allocator, kInitialSchemaRefSize),
-        spec_(spec),
-        error_(kObjectType),
-        currentError_()
+    explicit GenericSchemaDocument(const ValueType& document,
+                                   const Ch* uri                                     = 0,
+                                   SizeType uriLength                                = 0,
+                                   IRemoteSchemaDocumentProviderType* remoteProvider = 0,
+                                   Allocator* allocator                              = 0,
+                                   const PointerType& pointer = PointerType(), // PR #1393
+                                   const Specification& spec  = Specification(kDraft04))
+        : remoteProvider_(remoteProvider),
+          allocator_(allocator),
+          ownAllocator_(),
+          root_(),
+          typeless_(),
+          schemaMap_(allocator, kInitialSchemaMapSize),
+          schemaRef_(allocator, kInitialSchemaRefSize),
+          spec_(spec),
+          error_(kObjectType),
+          currentError_()
     {
         RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaDocument::GenericSchemaDocument");
-        if (!allocator_)
+        if(!allocator_)
             ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
 
         Ch noUri[1] = {0};
@@ -1864,7 +2314,12 @@ public:
         docId_ = UriType(uri_, allocator_);
 
         typeless_ = static_cast<SchemaType*>(allocator_->Malloc(sizeof(SchemaType)));
-        new (typeless_) SchemaType(this, PointerType(), ValueType(kObjectType).Move(), ValueType(kObjectType).Move(), allocator_, docId_);
+        new(typeless_) SchemaType(this,
+                                  PointerType(),
+                                  ValueType(kObjectType).Move(),
+                                  ValueType(kObjectType).Move(),
+                                  allocator_,
+                                  docId_);
 
         // Establish the schema draft or open api version.
         // We only ever look for '$schema' or 'swagger' or 'openapi' at the root of the document.
@@ -1874,16 +2329,22 @@ public:
         // And call HandleRefSchema() if there are $ref.
         // PR #1393 use input pointer if supplied
         root_ = typeless_;
-        if (pointer.GetTokenCount() == 0) {
+        if(pointer.GetTokenCount() == 0)
+        {
             CreateSchemaRecursive(&root_, pointer, document, document, docId_);
         }
-        else if (const ValueType* v = pointer.Get(document)) {
+        else if(const ValueType* v = pointer.Get(document))
+        {
             CreateSchema(&root_, pointer, *v, document, docId_);
         }
-        else {
+        else
+        {
             GenericStringBuffer<EncodingType> sb;
             pointer.StringifyUriFragment(sb);
-            SchemaErrorValue(kSchemaErrorStartUnknown, PointerType(), sb.GetString(), static_cast<SizeType>(sb.GetSize() / sizeof(Ch)));
+            SchemaErrorValue(kSchemaErrorStartUnknown,
+                             PointerType(),
+                             sb.GetString(),
+                             static_cast<SizeType>(sb.GetSize() / sizeof(Ch)));
         }
 
         RAPIDJSON_ASSERT(root_ != 0);
@@ -1893,33 +2354,35 @@ public:
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
     //! Move constructor in C++11
-    GenericSchemaDocument(GenericSchemaDocument&& rhs) RAPIDJSON_NOEXCEPT :
-        remoteProvider_(rhs.remoteProvider_),
-        allocator_(rhs.allocator_),
-        ownAllocator_(rhs.ownAllocator_),
-        root_(rhs.root_),
-        typeless_(rhs.typeless_),
-        schemaMap_(std::move(rhs.schemaMap_)),
-        schemaRef_(std::move(rhs.schemaRef_)),
-        uri_(std::move(rhs.uri_)),
-        docId_(std::move(rhs.docId_)),
-        spec_(rhs.spec_),
-        error_(std::move(rhs.error_)),
-        currentError_(std::move(rhs.currentError_))
+    GenericSchemaDocument(GenericSchemaDocument&& rhs) RAPIDJSON_NOEXCEPT
+        : remoteProvider_(rhs.remoteProvider_),
+          allocator_(rhs.allocator_),
+          ownAllocator_(rhs.ownAllocator_),
+          root_(rhs.root_),
+          typeless_(rhs.typeless_),
+          schemaMap_(std::move(rhs.schemaMap_)),
+          schemaRef_(std::move(rhs.schemaRef_)),
+          uri_(std::move(rhs.uri_)),
+          docId_(std::move(rhs.docId_)),
+          spec_(rhs.spec_),
+          error_(std::move(rhs.error_)),
+          currentError_(std::move(rhs.currentError_))
     {
         rhs.remoteProvider_ = 0;
-        rhs.allocator_ = 0;
-        rhs.ownAllocator_ = 0;
-        rhs.typeless_ = 0;
+        rhs.allocator_      = 0;
+        rhs.ownAllocator_   = 0;
+        rhs.typeless_       = 0;
     }
 #endif
 
     //! Destructor
-    ~GenericSchemaDocument() {
-        while (!schemaMap_.Empty())
+    ~GenericSchemaDocument()
+    {
+        while(!schemaMap_.Empty())
             schemaMap_.template Pop<SchemaEntry>(1)->~SchemaEntry();
 
-        if (typeless_) {
+        if(typeless_)
+        {
             typeless_->~SchemaType();
             Allocator::Free(typeless_);
         }
@@ -1939,16 +2402,18 @@ public:
 
     //! Static method to get the specification of any schema document
     //  Returns kDraftNone if document is silent
-    static const Specification GetSpecification(const ValueType& document) {
-      SchemaDraft draft = GetSchemaDraft(document);
-      if (draft != kDraftNone)
-        return Specification(draft);
-      else {
-        OpenApiVersion oapi = GetOpenApiVersion(document);
-        if (oapi != kVersionNone)
-          return Specification(oapi);
-      }
-      return Specification(kDraftNone);
+    static const Specification GetSpecification(const ValueType& document)
+    {
+        SchemaDraft draft = GetSchemaDraft(document);
+        if(draft != kDraftNone)
+            return Specification(draft);
+        else
+        {
+            OpenApiVersion oapi = GetOpenApiVersion(document);
+            if(oapi != kVersionNone)
+                return Specification(oapi);
+        }
+        return Specification(kDraftNone);
     }
 
     //! Get the root schema.
@@ -1958,47 +2423,63 @@ public:
     GValue& GetError() { return error_; }
     const GValue& GetError() const { return error_; }
 
-    static const StringRefType& GetSchemaErrorKeyword(SchemaErrorCode schemaErrorCode) {
-        switch (schemaErrorCode) {
-            case kSchemaErrorStartUnknown:             return GetStartUnknownString();
-            case kSchemaErrorRefPlainName:             return GetRefPlainNameString();
-            case kSchemaErrorRefInvalid:               return GetRefInvalidString();
-            case kSchemaErrorRefPointerInvalid:        return GetRefPointerInvalidString();
-            case kSchemaErrorRefUnknown:               return GetRefUnknownString();
-            case kSchemaErrorRefCyclical:              return GetRefCyclicalString();
-            case kSchemaErrorRefNoRemoteProvider:      return GetRefNoRemoteProviderString();
-            case kSchemaErrorRefNoRemoteSchema:        return GetRefNoRemoteSchemaString();
-            case kSchemaErrorRegexInvalid:             return GetRegexInvalidString();
-            case kSchemaErrorSpecUnknown:              return GetSpecUnknownString();
-            case kSchemaErrorSpecUnsupported:          return GetSpecUnsupportedString();
-            case kSchemaErrorSpecIllegal:              return GetSpecIllegalString();
-            case kSchemaErrorReadOnlyAndWriteOnly:     return GetReadOnlyAndWriteOnlyString();
-            default:                                   return GetNullString();
+    static const StringRefType& GetSchemaErrorKeyword(SchemaErrorCode schemaErrorCode)
+    {
+        switch(schemaErrorCode)
+        {
+        case kSchemaErrorStartUnknown: return GetStartUnknownString();
+        case kSchemaErrorRefPlainName: return GetRefPlainNameString();
+        case kSchemaErrorRefInvalid: return GetRefInvalidString();
+        case kSchemaErrorRefPointerInvalid: return GetRefPointerInvalidString();
+        case kSchemaErrorRefUnknown: return GetRefUnknownString();
+        case kSchemaErrorRefCyclical: return GetRefCyclicalString();
+        case kSchemaErrorRefNoRemoteProvider: return GetRefNoRemoteProviderString();
+        case kSchemaErrorRefNoRemoteSchema: return GetRefNoRemoteSchemaString();
+        case kSchemaErrorRegexInvalid: return GetRegexInvalidString();
+        case kSchemaErrorSpecUnknown: return GetSpecUnknownString();
+        case kSchemaErrorSpecUnsupported: return GetSpecUnsupportedString();
+        case kSchemaErrorSpecIllegal: return GetSpecIllegalString();
+        case kSchemaErrorReadOnlyAndWriteOnly: return GetReadOnlyAndWriteOnlyString();
+        default: return GetNullString();
         }
     }
 
     //! Default error method
-    void SchemaError(const SchemaErrorCode code, const PointerType& location) {
-      currentError_ = GValue(kObjectType);
-      AddCurrentError(code, location);
+    void SchemaError(const SchemaErrorCode code, const PointerType& location)
+    {
+        currentError_ = GValue(kObjectType);
+        AddCurrentError(code, location);
     }
 
     //! Method for error with single string value insert
-    void SchemaErrorValue(const SchemaErrorCode code, const PointerType& location, const Ch* value, SizeType length) {
-      currentError_ = GValue(kObjectType);
-      currentError_.AddMember(GetValueString(), GValue(value, length, *allocator_).Move(), *allocator_);
-      AddCurrentError(code, location);
+    void SchemaErrorValue(const SchemaErrorCode code,
+                          const PointerType& location,
+                          const Ch* value,
+                          SizeType length)
+    {
+        currentError_ = GValue(kObjectType);
+        currentError_.AddMember(
+            GetValueString(), GValue(value, length, *allocator_).Move(), *allocator_);
+        AddCurrentError(code, location);
     }
 
     //! Method for error with invalid pointer
-    void SchemaErrorPointer(const SchemaErrorCode code, const PointerType& location, const Ch* value, SizeType length, const PointerType& pointer) {
-      currentError_ = GValue(kObjectType);
-      currentError_.AddMember(GetValueString(), GValue(value, length, *allocator_).Move(), *allocator_);
-      currentError_.AddMember(GetOffsetString(), static_cast<SizeType>(pointer.GetParseErrorOffset() / sizeof(Ch)), *allocator_);
-      AddCurrentError(code, location);
+    void SchemaErrorPointer(const SchemaErrorCode code,
+                            const PointerType& location,
+                            const Ch* value,
+                            SizeType length,
+                            const PointerType& pointer)
+    {
+        currentError_ = GValue(kObjectType);
+        currentError_.AddMember(
+            GetValueString(), GValue(value, length, *allocator_).Move(), *allocator_);
+        currentError_.AddMember(GetOffsetString(),
+                                static_cast<SizeType>(pointer.GetParseErrorOffset() / sizeof(Ch)),
+                                *allocator_);
+        AddCurrentError(code, location);
     }
 
-  private:
+    private:
     //! Prohibit copying
     GenericSchemaDocument(const GenericSchemaDocument&);
     //! Prohibit assignment
@@ -2006,10 +2487,16 @@ public:
 
     typedef const PointerType* SchemaRefPtr; // PR #1393
 
-    struct SchemaEntry {
-        SchemaEntry(const PointerType& p, SchemaType* s, bool o, Allocator* allocator) : pointer(p, allocator), schema(s), owned(o) {}
-        ~SchemaEntry() {
-            if (owned) {
+    struct SchemaEntry
+    {
+        SchemaEntry(const PointerType& p, SchemaType* s, bool o, Allocator* allocator)
+            : pointer(p, allocator), schema(s), owned(o)
+        {
+        }
+        ~SchemaEntry()
+        {
+            if(owned)
+            {
                 schema->~SchemaType();
                 Allocator::Free(schema);
             }
@@ -2019,39 +2506,46 @@ public:
         bool owned;
     };
 
-    void AddErrorInstanceLocation(GValue& result, const PointerType& location) {
-      GenericStringBuffer<EncodingType> sb;
-      location.StringifyUriFragment(sb);
-      GValue instanceRef(sb.GetString(), static_cast<SizeType>(sb.GetSize() / sizeof(Ch)), *allocator_);
-      result.AddMember(GetInstanceRefString(), instanceRef, *allocator_);
+    void AddErrorInstanceLocation(GValue& result, const PointerType& location)
+    {
+        GenericStringBuffer<EncodingType> sb;
+        location.StringifyUriFragment(sb);
+        GValue instanceRef(
+            sb.GetString(), static_cast<SizeType>(sb.GetSize() / sizeof(Ch)), *allocator_);
+        result.AddMember(GetInstanceRefString(), instanceRef, *allocator_);
     }
 
-    void AddError(GValue& keyword, GValue& error) {
-      typename GValue::MemberIterator member = error_.FindMember(keyword);
-      if (member == error_.MemberEnd())
-        error_.AddMember(keyword, error, *allocator_);
-      else {
-        if (member->value.IsObject()) {
-          GValue errors(kArrayType);
-          errors.PushBack(member->value, *allocator_);
-          member->value = errors;
+    void AddError(GValue& keyword, GValue& error)
+    {
+        typename GValue::MemberIterator member = error_.FindMember(keyword);
+        if(member == error_.MemberEnd())
+            error_.AddMember(keyword, error, *allocator_);
+        else
+        {
+            if(member->value.IsObject())
+            {
+                GValue errors(kArrayType);
+                errors.PushBack(member->value, *allocator_);
+                member->value = errors;
+            }
+            member->value.PushBack(error, *allocator_);
         }
-        member->value.PushBack(error, *allocator_);
-      }
     }
 
-    void AddCurrentError(const SchemaErrorCode code, const PointerType& location) {
-      RAPIDJSON_SCHEMA_PRINT(InvalidKeyword, GetSchemaErrorKeyword(code));
-      currentError_.AddMember(GetErrorCodeString(), code, *allocator_);
-      AddErrorInstanceLocation(currentError_, location);
-      AddError(GValue(GetSchemaErrorKeyword(code)).Move(), currentError_);
+    void AddCurrentError(const SchemaErrorCode code, const PointerType& location)
+    {
+        RAPIDJSON_SCHEMA_PRINT(InvalidKeyword, GetSchemaErrorKeyword(code));
+        currentError_.AddMember(GetErrorCodeString(), code, *allocator_);
+        AddErrorInstanceLocation(currentError_, location);
+        AddError(GValue(GetSchemaErrorKeyword(code)).Move(), currentError_);
     }
 
-#define RAPIDJSON_STRING_(name, ...) \
-    static const StringRefType& Get##name##String() {\
-        static const Ch s[] = { __VA_ARGS__, '\0' };\
+#define RAPIDJSON_STRING_(name, ...)                                                        \
+    static const StringRefType& Get##name##String()                                         \
+    {                                                                                       \
+        static const Ch s[] = {__VA_ARGS__, '\0'};                                          \
         static const StringRefType v(s, static_cast<SizeType>(sizeof(s) / sizeof(Ch) - 1)); \
-        return v;\
+        return v;                                                                           \
     }
 
     RAPIDJSON_STRING_(InstanceRef, 'i', 'n', 's', 't', 'a', 'n', 'c', 'e', 'R', 'e', 'f')
@@ -2061,77 +2555,194 @@ public:
 
     RAPIDJSON_STRING_(Null, 'n', 'u', 'l', 'l')
     RAPIDJSON_STRING_(SpecUnknown, 'S', 'p', 'e', 'c', 'U', 'n', 'k', 'n', 'o', 'w', 'n')
-    RAPIDJSON_STRING_(SpecUnsupported, 'S', 'p', 'e', 'c', 'U', 'n', 's', 'u', 'p', 'p', 'o', 'r', 't', 'e', 'd')
+    RAPIDJSON_STRING_(
+        SpecUnsupported, 'S', 'p', 'e', 'c', 'U', 'n', 's', 'u', 'p', 'p', 'o', 'r', 't', 'e', 'd')
     RAPIDJSON_STRING_(SpecIllegal, 'S', 'p', 'e', 'c', 'I', 'l', 'l', 'e', 'g', 'a', 'l')
     RAPIDJSON_STRING_(StartUnknown, 'S', 't', 'a', 'r', 't', 'U', 'n', 'k', 'n', 'o', 'w', 'n')
     RAPIDJSON_STRING_(RefPlainName, 'R', 'e', 'f', 'P', 'l', 'a', 'i', 'n', 'N', 'a', 'm', 'e')
     RAPIDJSON_STRING_(RefInvalid, 'R', 'e', 'f', 'I', 'n', 'v', 'a', 'l', 'i', 'd')
-    RAPIDJSON_STRING_(RefPointerInvalid, 'R', 'e', 'f', 'P', 'o', 'i', 'n', 't', 'e', 'r', 'I', 'n', 'v', 'a', 'l', 'i', 'd')
+    RAPIDJSON_STRING_(RefPointerInvalid,
+                      'R',
+                      'e',
+                      'f',
+                      'P',
+                      'o',
+                      'i',
+                      'n',
+                      't',
+                      'e',
+                      'r',
+                      'I',
+                      'n',
+                      'v',
+                      'a',
+                      'l',
+                      'i',
+                      'd')
     RAPIDJSON_STRING_(RefUnknown, 'R', 'e', 'f', 'U', 'n', 'k', 'n', 'o', 'w', 'n')
     RAPIDJSON_STRING_(RefCyclical, 'R', 'e', 'f', 'C', 'y', 'c', 'l', 'i', 'c', 'a', 'l')
-    RAPIDJSON_STRING_(RefNoRemoteProvider, 'R', 'e', 'f', 'N', 'o', 'R', 'e', 'm', 'o', 't', 'e', 'P', 'r', 'o', 'v', 'i', 'd', 'e', 'r')
-    RAPIDJSON_STRING_(RefNoRemoteSchema, 'R', 'e', 'f', 'N', 'o', 'R', 'e', 'm', 'o', 't', 'e', 'S', 'c', 'h', 'e', 'm', 'a')
-    RAPIDJSON_STRING_(ReadOnlyAndWriteOnly, 'R', 'e', 'a', 'd', 'O', 'n', 'l', 'y', 'A', 'n', 'd', 'W', 'r', 'i', 't', 'e', 'O', 'n', 'l', 'y')
+    RAPIDJSON_STRING_(RefNoRemoteProvider,
+                      'R',
+                      'e',
+                      'f',
+                      'N',
+                      'o',
+                      'R',
+                      'e',
+                      'm',
+                      'o',
+                      't',
+                      'e',
+                      'P',
+                      'r',
+                      'o',
+                      'v',
+                      'i',
+                      'd',
+                      'e',
+                      'r')
+    RAPIDJSON_STRING_(RefNoRemoteSchema,
+                      'R',
+                      'e',
+                      'f',
+                      'N',
+                      'o',
+                      'R',
+                      'e',
+                      'm',
+                      'o',
+                      't',
+                      'e',
+                      'S',
+                      'c',
+                      'h',
+                      'e',
+                      'm',
+                      'a')
+    RAPIDJSON_STRING_(ReadOnlyAndWriteOnly,
+                      'R',
+                      'e',
+                      'a',
+                      'd',
+                      'O',
+                      'n',
+                      'l',
+                      'y',
+                      'A',
+                      'n',
+                      'd',
+                      'W',
+                      'r',
+                      'i',
+                      't',
+                      'e',
+                      'O',
+                      'n',
+                      'l',
+                      'y')
     RAPIDJSON_STRING_(RegexInvalid, 'R', 'e', 'g', 'e', 'x', 'I', 'n', 'v', 'a', 'l', 'i', 'd')
 
 #undef RAPIDJSON_STRING_
 
     // Static method to get schema draft of any schema document
-    static SchemaDraft GetSchemaDraft(const ValueType& document) {
-        static const Ch kDraft03String[] = { 'h', 't', 't', 'p', ':', '/', '/', 'j', 's', 'o', 'n', '-', 's', 'c', 'h', 'e', 'm', 'a', '.', 'o', 'r', 'g', '/', 'd', 'r', 'a', 'f', 't', '-', '0', '3', '/', 's', 'c', 'h', 'e', 'm', 'a', '#', '\0' };
-        static const Ch kDraft04String[] = { 'h', 't', 't', 'p', ':', '/', '/', 'j', 's', 'o', 'n', '-', 's', 'c', 'h', 'e', 'm', 'a', '.', 'o', 'r', 'g', '/', 'd', 'r', 'a', 'f', 't', '-', '0', '4', '/', 's', 'c', 'h', 'e', 'm', 'a', '#', '\0' };
-        static const Ch kDraft05String[] = { 'h', 't', 't', 'p', ':', '/', '/', 'j', 's', 'o', 'n', '-', 's', 'c', 'h', 'e', 'm', 'a', '.', 'o', 'r', 'g', '/', 'd', 'r', 'a', 'f', 't', '-', '0', '5', '/', 's', 'c', 'h', 'e', 'm', 'a', '#', '\0' };
-        static const Ch kDraft06String[] = { 'h', 't', 't', 'p', ':', '/', '/', 'j', 's', 'o', 'n', '-', 's', 'c', 'h', 'e', 'm', 'a', '.', 'o', 'r', 'g', '/', 'd', 'r', 'a', 'f', 't', '-', '0', '6', '/', 's', 'c', 'h', 'e', 'm', 'a', '#', '\0' };
-        static const Ch kDraft07String[] = { 'h', 't', 't', 'p', ':', '/', '/', 'j', 's', 'o', 'n', '-', 's', 'c', 'h', 'e', 'm', 'a', '.', 'o', 'r', 'g', '/', 'd', 'r', 'a', 'f', 't', '-', '0', '7', '/', 's', 'c', 'h', 'e', 'm', 'a', '#', '\0' };
-        static const Ch kDraft2019_09String[] = { 'h', 't', 't', 'p', 's', ':', '/', '/', 'j', 's', 'o', 'n', '-', 's', 'c', 'h', 'e', 'm', 'a', '.', 'o', 'r', 'g', '/', 'd', 'r', 'a', 'f', 't', '/', '2', '0', '1', '9', '-', '0', '9', '/', 's', 'c', 'h', 'e', 'm', 'a', '\0' };
-        static const Ch kDraft2020_12String[] = { 'h', 't', 't', 'p', 's', ':', '/', '/', 'j', 's', 'o', 'n', '-', 's', 'c', 'h', 'e', 'm', 'a', '.', 'o', 'r', 'g', '/', 'd', 'r', 'a', 'f', 't', '/', '2', '0', '2', '0', '-', '1', '2', '/', 's', 'c', 'h', 'e', 'm', 'a', '\0' };
+    static SchemaDraft GetSchemaDraft(const ValueType& document)
+    {
+        static const Ch kDraft03String[]      = {'h', 't', 't', 'p', ':', '/', '/', 'j', 's', 'o',
+                                                 'n', '-', 's', 'c', 'h', 'e', 'm', 'a', '.', 'o',
+                                                 'r', 'g', '/', 'd', 'r', 'a', 'f', 't', '-', '0',
+                                                 '3', '/', 's', 'c', 'h', 'e', 'm', 'a', '#', '\0'};
+        static const Ch kDraft04String[]      = {'h', 't', 't', 'p', ':', '/', '/', 'j', 's', 'o',
+                                                 'n', '-', 's', 'c', 'h', 'e', 'm', 'a', '.', 'o',
+                                                 'r', 'g', '/', 'd', 'r', 'a', 'f', 't', '-', '0',
+                                                 '4', '/', 's', 'c', 'h', 'e', 'm', 'a', '#', '\0'};
+        static const Ch kDraft05String[]      = {'h', 't', 't', 'p', ':', '/', '/', 'j', 's', 'o',
+                                                 'n', '-', 's', 'c', 'h', 'e', 'm', 'a', '.', 'o',
+                                                 'r', 'g', '/', 'd', 'r', 'a', 'f', 't', '-', '0',
+                                                 '5', '/', 's', 'c', 'h', 'e', 'm', 'a', '#', '\0'};
+        static const Ch kDraft06String[]      = {'h', 't', 't', 'p', ':', '/', '/', 'j', 's', 'o',
+                                                 'n', '-', 's', 'c', 'h', 'e', 'm', 'a', '.', 'o',
+                                                 'r', 'g', '/', 'd', 'r', 'a', 'f', 't', '-', '0',
+                                                 '6', '/', 's', 'c', 'h', 'e', 'm', 'a', '#', '\0'};
+        static const Ch kDraft07String[]      = {'h', 't', 't', 'p', ':', '/', '/', 'j', 's', 'o',
+                                                 'n', '-', 's', 'c', 'h', 'e', 'm', 'a', '.', 'o',
+                                                 'r', 'g', '/', 'd', 'r', 'a', 'f', 't', '-', '0',
+                                                 '7', '/', 's', 'c', 'h', 'e', 'm', 'a', '#', '\0'};
+        static const Ch kDraft2019_09String[] = {
+            'h', 't', 't', 'p', 's', ':', '/', '/', 'j', 's', 'o', 'n', '-', 's', 'c',
+            'h', 'e', 'm', 'a', '.', 'o', 'r', 'g', '/', 'd', 'r', 'a', 'f', 't', '/',
+            '2', '0', '1', '9', '-', '0', '9', '/', 's', 'c', 'h', 'e', 'm', 'a', '\0'};
+        static const Ch kDraft2020_12String[] = {
+            'h', 't', 't', 'p', 's', ':', '/', '/', 'j', 's', 'o', 'n', '-', 's', 'c',
+            'h', 'e', 'm', 'a', '.', 'o', 'r', 'g', '/', 'd', 'r', 'a', 'f', 't', '/',
+            '2', '0', '2', '0', '-', '1', '2', '/', 's', 'c', 'h', 'e', 'm', 'a', '\0'};
 
-        if (!document.IsObject()) {
+        if(!document.IsObject())
+        {
             return kDraftNone;
         }
 
         // Get the schema draft from the $schema keyword at the supplied location
-        typename ValueType::ConstMemberIterator itr = document.FindMember(SchemaType::GetSchemaString());
-        if (itr != document.MemberEnd()) {
-            if (!itr->value.IsString()) return kDraftUnknown;
+        typename ValueType::ConstMemberIterator itr =
+            document.FindMember(SchemaType::GetSchemaString());
+        if(itr != document.MemberEnd())
+        {
+            if(!itr->value.IsString())
+                return kDraftUnknown;
             const UriType draftUri(itr->value);
             // Check base uri for match
-            if (draftUri.Match(UriType(kDraft04String), false)) return kDraft04;
-            if (draftUri.Match(UriType(kDraft05String), false)) return kDraft05;
-            if (draftUri.Match(UriType(kDraft06String), false)) return kDraft06;
-            if (draftUri.Match(UriType(kDraft07String), false)) return kDraft07;
-            if (draftUri.Match(UriType(kDraft03String), false)) return kDraft03;
-            if (draftUri.Match(UriType(kDraft2019_09String), false)) return kDraft2019_09;
-            if (draftUri.Match(UriType(kDraft2020_12String), false)) return kDraft2020_12;
+            if(draftUri.Match(UriType(kDraft04String), false))
+                return kDraft04;
+            if(draftUri.Match(UriType(kDraft05String), false))
+                return kDraft05;
+            if(draftUri.Match(UriType(kDraft06String), false))
+                return kDraft06;
+            if(draftUri.Match(UriType(kDraft07String), false))
+                return kDraft07;
+            if(draftUri.Match(UriType(kDraft03String), false))
+                return kDraft03;
+            if(draftUri.Match(UriType(kDraft2019_09String), false))
+                return kDraft2019_09;
+            if(draftUri.Match(UriType(kDraft2020_12String), false))
+                return kDraft2020_12;
             return kDraftUnknown;
         }
         // $schema not found
         return kDraftNone;
     }
 
-
     // Get open api version of any schema document
-    static OpenApiVersion GetOpenApiVersion(const ValueType& document) {
-        static const Ch kVersion20String[] = { '2', '.', '0', '\0' };
-        static const Ch kVersion30String[] = { '3', '.', '0', '.', '\0' }; // ignore patch level
-        static const Ch kVersion31String[] = { '3', '.', '1', '.', '\0' }; // ignore patch level
-        static SizeType len = internal::StrLen<Ch>(kVersion30String);
+    static OpenApiVersion GetOpenApiVersion(const ValueType& document)
+    {
+        static const Ch kVersion20String[] = {'2', '.', '0', '\0'};
+        static const Ch kVersion30String[] = {'3', '.', '0', '.', '\0'}; // ignore patch level
+        static const Ch kVersion31String[] = {'3', '.', '1', '.', '\0'}; // ignore patch level
+        static SizeType len                = internal::StrLen<Ch>(kVersion30String);
 
-        if (!document.IsObject()) {
+        if(!document.IsObject())
+        {
             return kVersionNone;
         }
 
         // Get the open api version from the swagger / openapi keyword at the supplied location
-        typename ValueType::ConstMemberIterator itr = document.FindMember(SchemaType::GetSwaggerString());
-        if (itr == document.MemberEnd()) itr = document.FindMember(SchemaType::GetOpenApiString());
-        if (itr != document.MemberEnd()) {
-            if (!itr->value.IsString()) return kVersionUnknown;
+        typename ValueType::ConstMemberIterator itr =
+            document.FindMember(SchemaType::GetSwaggerString());
+        if(itr == document.MemberEnd())
+            itr = document.FindMember(SchemaType::GetOpenApiString());
+        if(itr != document.MemberEnd())
+        {
+            if(!itr->value.IsString())
+                return kVersionUnknown;
             const ValueType kVersion20Value(kVersion20String);
-            if (kVersion20Value == itr->value) return kVersion20; // must match 2.0 exactly
+            if(kVersion20Value == itr->value)
+                return kVersion20; // must match 2.0 exactly
             const ValueType kVersion30Value(kVersion30String);
-            if (itr->value.GetStringLength() > len && kVersion30Value == ValueType(itr->value.GetString(), len)) return kVersion30; // must match 3.0.x
+            if(itr->value.GetStringLength() > len &&
+               kVersion30Value == ValueType(itr->value.GetString(), len))
+                return kVersion30; // must match 3.0.x
             const ValueType kVersion31Value(kVersion31String);
-            if (itr->value.GetStringLength() > len && kVersion31Value == ValueType(itr->value.GetString(), len)) return kVersion31; // must match 3.1.x
+            if(itr->value.GetStringLength() > len &&
+               kVersion31Value == ValueType(itr->value.GetString(), len))
+                return kVersion31; // must match 3.1.x
             return kVersionUnknown;
         }
         // swagger or openapi not found
@@ -2139,61 +2750,82 @@ public:
     }
 
     // Get the draft of the schema or the open api version (which implies the draft).
-    // Report an error if schema draft or open api version not supported or not recognized, or both in document, and carry on.
-    void SetSchemaSpecification(const ValueType& document) {
+    // Report an error if schema draft or open api version not supported or not recognized, or both
+    // in document, and carry on.
+    void SetSchemaSpecification(const ValueType& document)
+    {
         // Look for '$schema', 'swagger' or 'openapi' keyword at document root
-        SchemaDraft docDraft = GetSchemaDraft(document);
+        SchemaDraft docDraft   = GetSchemaDraft(document);
         OpenApiVersion docOapi = GetOpenApiVersion(document);
         // Error if both in document
-        if (docDraft != kDraftNone && docOapi != kVersionNone)
-          SchemaError(kSchemaErrorSpecIllegal, PointerType());
+        if(docDraft != kDraftNone && docOapi != kVersionNone)
+            SchemaError(kSchemaErrorSpecIllegal, PointerType());
         // Use document draft or open api version if present or use spec from constructor
-        if (docDraft != kDraftNone)
+        if(docDraft != kDraftNone)
             spec_ = Specification(docDraft);
-        else if (docOapi != kVersionNone)
+        else if(docOapi != kVersionNone)
             spec_ = Specification(docOapi);
         // Error if draft or version unknown
-        if (spec_.draft == kDraftUnknown || spec_.oapi == kVersionUnknown)
-          SchemaError(kSchemaErrorSpecUnknown, PointerType());
-        else if (!spec_.IsSupported())
+        if(spec_.draft == kDraftUnknown || spec_.oapi == kVersionUnknown)
+            SchemaError(kSchemaErrorSpecUnknown, PointerType());
+        else if(!spec_.IsSupported())
             SchemaError(kSchemaErrorSpecUnsupported, PointerType());
     }
 
     // Changed by PR #1393
-    void CreateSchemaRecursive(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document, const UriType& id) {
-        if (v.GetType() == kObjectType) {
+    void CreateSchemaRecursive(const SchemaType** schema,
+                               const PointerType& pointer,
+                               const ValueType& v,
+                               const ValueType& document,
+                               const UriType& id)
+    {
+        if(v.GetType() == kObjectType)
+        {
             UriType newid = UriType(CreateSchema(schema, pointer, v, document, id), allocator_);
 
-            for (typename ValueType::ConstMemberIterator itr = v.MemberBegin(); itr != v.MemberEnd(); ++itr)
-                CreateSchemaRecursive(0, pointer.Append(itr->name, allocator_), itr->value, document, newid);
+            for(typename ValueType::ConstMemberIterator itr = v.MemberBegin(); itr != v.MemberEnd();
+                ++itr)
+                CreateSchemaRecursive(
+                    0, pointer.Append(itr->name, allocator_), itr->value, document, newid);
         }
-        else if (v.GetType() == kArrayType)
-            for (SizeType i = 0; i < v.Size(); i++)
+        else if(v.GetType() == kArrayType)
+            for(SizeType i = 0; i < v.Size(); i++)
                 CreateSchemaRecursive(0, pointer.Append(i, allocator_), v[i], document, id);
     }
 
     // Changed by PR #1393
-    const UriType& CreateSchema(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document, const UriType& id) {
+    const UriType& CreateSchema(const SchemaType** schema,
+                                const PointerType& pointer,
+                                const ValueType& v,
+                                const ValueType& document,
+                                const UriType& id)
+    {
         RAPIDJSON_ASSERT(pointer.IsValid());
         GenericStringBuffer<EncodingType> sb;
         pointer.StringifyUriFragment(sb);
-        RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaDocument::CreateSchema", sb.GetString(), id.GetString());
-        if (v.IsObject()) {
-            if (const SchemaType* sc = GetSchema(pointer)) {
-                if (schema)
+        RAPIDJSON_SCHEMA_PRINT(
+            Method, "GenericSchemaDocument::CreateSchema", sb.GetString(), id.GetString());
+        if(v.IsObject())
+        {
+            if(const SchemaType* sc = GetSchema(pointer))
+            {
+                if(schema)
                     *schema = sc;
                 AddSchemaRefs(const_cast<SchemaType*>(sc));
             }
-            else if (!HandleRefSchema(pointer, schema, v, document, id)) {
+            else if(!HandleRefSchema(pointer, schema, v, document, id))
+            {
                 // The new schema constructor adds itself and its $ref(s) to schemaMap_
-                SchemaType* s = new (allocator_->Malloc(sizeof(SchemaType))) SchemaType(this, pointer, v, document, allocator_, id);
-                if (schema)
+                SchemaType* s = new(allocator_->Malloc(sizeof(SchemaType)))
+                    SchemaType(this, pointer, v, document, allocator_, id);
+                if(schema)
                     *schema = s;
                 return s->GetId();
             }
         }
-        else {
-            if (schema)
+        else
+        {
+            if(schema)
                 *schema = typeless_;
             AddSchemaRefs(typeless_);
         }
@@ -2202,116 +2834,179 @@ public:
 
     // Changed by PR #1393
     // TODO should this return a UriType& ?
-    bool HandleRefSchema(const PointerType& source, const SchemaType** schema, const ValueType& v, const ValueType& document, const UriType& id) {
+    bool HandleRefSchema(const PointerType& source,
+                         const SchemaType** schema,
+                         const ValueType& v,
+                         const ValueType& document,
+                         const UriType& id)
+    {
         typename ValueType::ConstMemberIterator itr = v.FindMember(SchemaType::GetRefString());
-        if (itr == v.MemberEnd())
+        if(itr == v.MemberEnd())
             return false;
 
         GenericStringBuffer<EncodingType> sb;
         source.StringifyUriFragment(sb);
-        RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaDocument::HandleRefSchema", sb.GetString(), id.GetString());
+        RAPIDJSON_SCHEMA_PRINT(
+            Method, "GenericSchemaDocument::HandleRefSchema", sb.GetString(), id.GetString());
         // Resolve the source pointer to the $ref'ed schema (finally)
-        new (schemaRef_.template Push<SchemaRefPtr>()) SchemaRefPtr(&source);
+        new(schemaRef_.template Push<SchemaRefPtr>()) SchemaRefPtr(&source);
 
-        if (itr->value.IsString()) {
+        if(itr->value.IsString())
+        {
             SizeType len = itr->value.GetStringLength();
-            if (len == 0)
+            if(len == 0)
                 SchemaError(kSchemaErrorRefInvalid, source);
-            else {
+            else
+            {
                 // First resolve $ref against the in-scope id
                 UriType scopeId = UriType(id, allocator_);
-                UriType ref = UriType(itr->value, allocator_).Resolve(scopeId, allocator_);
-                RAPIDJSON_SCHEMA_PRINT(SchemaIds, id.GetString(), itr->value.GetString(), ref.GetString());
-                // See if the resolved $ref minus the fragment matches a resolved id in this document
-                // Search from the root. Returns the subschema in the document and its absolute JSON pointer.
+                UriType ref     = UriType(itr->value, allocator_).Resolve(scopeId, allocator_);
+                RAPIDJSON_SCHEMA_PRINT(
+                    SchemaIds, id.GetString(), itr->value.GetString(), ref.GetString());
+                // See if the resolved $ref minus the fragment matches a resolved id in this
+                // document Search from the root. Returns the subschema in the document and its
+                // absolute JSON pointer.
                 PointerType basePointer = PointerType();
-                const ValueType *base = FindId(document, ref, basePointer, docId_, false);
-                if (!base) {
+                const ValueType* base   = FindId(document, ref, basePointer, docId_, false);
+                if(!base)
+                {
                     // Remote reference - call the remote document provider
-                    if (!remoteProvider_)
+                    if(!remoteProvider_)
                         SchemaError(kSchemaErrorRefNoRemoteProvider, source);
-                    else {
-                        if (const GenericSchemaDocument* remoteDocument = remoteProvider_->GetRemoteDocument(ref, spec_)) {
+                    else
+                    {
+                        if(const GenericSchemaDocument* remoteDocument =
+                               remoteProvider_->GetRemoteDocument(ref, spec_))
+                        {
                             const Ch* s = ref.GetFragString();
-                            len = ref.GetFragStringLength();
-                            if (len <= 1 || s[1] == '/') {
+                            len         = ref.GetFragStringLength();
+                            if(len <= 1 || s[1] == '/')
+                            {
                                 // JSON pointer fragment, absolute in the remote schema
                                 const PointerType pointer(s, len, allocator_);
-                                if (!pointer.IsValid())
-                                    SchemaErrorPointer(kSchemaErrorRefPointerInvalid, source, s, len, pointer);
-                                else {
+                                if(!pointer.IsValid())
+                                    SchemaErrorPointer(
+                                        kSchemaErrorRefPointerInvalid, source, s, len, pointer);
+                                else
+                                {
                                     // Get the subschema
-                                    if (const SchemaType *sc = remoteDocument->GetSchema(pointer)) {
-                                        if (schema)
+                                    if(const SchemaType* sc = remoteDocument->GetSchema(pointer))
+                                    {
+                                        if(schema)
                                             *schema = sc;
-                                        AddSchemaRefs(const_cast<SchemaType *>(sc));
+                                        AddSchemaRefs(const_cast<SchemaType*>(sc));
                                         return true;
-                                    } else
-                                        SchemaErrorValue(kSchemaErrorRefUnknown, source, ref.GetString(), ref.GetStringLength());
+                                    }
+                                    else
+                                        SchemaErrorValue(kSchemaErrorRefUnknown,
+                                                         source,
+                                                         ref.GetString(),
+                                                         ref.GetStringLength());
                                 }
-                            } else
+                            }
+                            else
                                 // Plain name fragment, not allowed in remote schema
                                 SchemaErrorValue(kSchemaErrorRefPlainName, source, s, len);
-                        } else
-                          SchemaErrorValue(kSchemaErrorRefNoRemoteSchema, source, ref.GetString(), ref.GetStringLength());
+                        }
+                        else
+                            SchemaErrorValue(kSchemaErrorRefNoRemoteSchema,
+                                             source,
+                                             ref.GetString(),
+                                             ref.GetStringLength());
                     }
                 }
-                else { // Local reference
+                else
+                { // Local reference
                     const Ch* s = ref.GetFragString();
-                    len = ref.GetFragStringLength();
-                    if (len <= 1 || s[1] == '/') {
+                    len         = ref.GetFragStringLength();
+                    if(len <= 1 || s[1] == '/')
+                    {
                         // JSON pointer fragment, relative to the resolved URI
                         const PointerType relPointer(s, len, allocator_);
-                        if (!relPointer.IsValid())
-                            SchemaErrorPointer(kSchemaErrorRefPointerInvalid, source, s, len, relPointer);
-                        else {
+                        if(!relPointer.IsValid())
+                            SchemaErrorPointer(
+                                kSchemaErrorRefPointerInvalid, source, s, len, relPointer);
+                        else
+                        {
                             // Get the subschema
-                            if (const ValueType *pv = relPointer.Get(*base)) {
+                            if(const ValueType* pv = relPointer.Get(*base))
+                            {
                                 // Now get the absolute JSON pointer by adding relative to base
                                 PointerType pointer(basePointer, allocator_);
-                                for (SizeType i = 0; i < relPointer.GetTokenCount(); i++)
+                                for(SizeType i = 0; i < relPointer.GetTokenCount(); i++)
                                     pointer = pointer.Append(relPointer.GetTokens()[i], allocator_);
-                                if (IsCyclicRef(pointer))
-                                    SchemaErrorValue(kSchemaErrorRefCyclical, source, ref.GetString(), ref.GetStringLength());
-                                else {
-                                    // Call CreateSchema recursively, but first compute the in-scope id for the $ref target as we have jumped there
+                                if(IsCyclicRef(pointer))
+                                    SchemaErrorValue(kSchemaErrorRefCyclical,
+                                                     source,
+                                                     ref.GetString(),
+                                                     ref.GetStringLength());
+                                else
+                                {
+                                    // Call CreateSchema recursively, but first compute the in-scope
+                                    // id for the $ref target as we have jumped there
                                     // TODO: cache pointer <-> id mapping
                                     size_t unresolvedTokenIndex;
-                                    scopeId = pointer.GetUri(document, docId_, &unresolvedTokenIndex, allocator_);
+                                    scopeId = pointer.GetUri(
+                                        document, docId_, &unresolvedTokenIndex, allocator_);
                                     CreateSchema(schema, pointer, *pv, document, scopeId);
                                     return true;
                                 }
-                            } else
-                                SchemaErrorValue(kSchemaErrorRefUnknown, source, ref.GetString(), ref.GetStringLength());
+                            }
+                            else
+                                SchemaErrorValue(kSchemaErrorRefUnknown,
+                                                 source,
+                                                 ref.GetString(),
+                                                 ref.GetStringLength());
                         }
-                    } else {
+                    }
+                    else
+                    {
                         // Plain name fragment, relative to the resolved URI
                         // Not supported in open api 2.0 and 3.0
                         PointerType pointer(allocator_);
-                        if (spec_.oapi == kVersion20 || spec_.oapi == kVersion30)
+                        if(spec_.oapi == kVersion20 || spec_.oapi == kVersion30)
                             SchemaErrorValue(kSchemaErrorRefPlainName, source, s, len);
                         // See if the fragment matches an id in this document.
-                        // Search from the base we just established. Returns the subschema in the document and its absolute JSON pointer.
-                        else if (const ValueType *pv = FindId(*base, ref, pointer, UriType(ref.GetBaseString(), ref.GetBaseStringLength(), allocator_), true, basePointer)) {
-                            if (IsCyclicRef(pointer))
-                                SchemaErrorValue(kSchemaErrorRefCyclical, source, ref.GetString(), ref.GetStringLength());
-                            else {
-                                // Call CreateSchema recursively, but first compute the in-scope id for the $ref target as we have jumped there
+                        // Search from the base we just established. Returns the subschema in the
+                        // document and its absolute JSON pointer.
+                        else if(const ValueType* pv = FindId(*base,
+                                                             ref,
+                                                             pointer,
+                                                             UriType(ref.GetBaseString(),
+                                                                     ref.GetBaseStringLength(),
+                                                                     allocator_),
+                                                             true,
+                                                             basePointer))
+                        {
+                            if(IsCyclicRef(pointer))
+                                SchemaErrorValue(kSchemaErrorRefCyclical,
+                                                 source,
+                                                 ref.GetString(),
+                                                 ref.GetStringLength());
+                            else
+                            {
+                                // Call CreateSchema recursively, but first compute the in-scope id
+                                // for the $ref target as we have jumped there
                                 // TODO: cache pointer <-> id mapping
                                 size_t unresolvedTokenIndex;
-                                scopeId = pointer.GetUri(document, docId_, &unresolvedTokenIndex, allocator_);
+                                scopeId = pointer.GetUri(
+                                    document, docId_, &unresolvedTokenIndex, allocator_);
                                 CreateSchema(schema, pointer, *pv, document, scopeId);
                                 return true;
                             }
-                        } else
-                            SchemaErrorValue(kSchemaErrorRefUnknown, source, ref.GetString(), ref.GetStringLength());
+                        }
+                        else
+                            SchemaErrorValue(kSchemaErrorRefUnknown,
+                                             source,
+                                             ref.GetString(),
+                                             ref.GetStringLength());
                     }
                 }
             }
         }
 
         // Invalid/Unknown $ref
-        if (schema)
+        if(schema)
             *schema = typeless_;
         AddSchemaRefs(typeless_);
         return true;
@@ -2321,38 +3016,64 @@ public:
     // If full specified use all URI else ignore fragment.
     // If found, return a pointer to the subschema and its JSON pointer.
     // TODO cache pointer <-> id mapping
-    ValueType* FindId(const ValueType& doc, const UriType& finduri, PointerType& resptr, const UriType& baseuri, bool full, const PointerType& here = PointerType()) const {
-        SizeType i = 0;
+    ValueType* FindId(const ValueType& doc,
+                      const UriType& finduri,
+                      PointerType& resptr,
+                      const UriType& baseuri,
+                      bool full,
+                      const PointerType& here = PointerType()) const
+    {
+        SizeType i        = 0;
         ValueType* resval = 0;
-        UriType tempuri = UriType(finduri, allocator_);
-        UriType localuri = UriType(baseuri, allocator_);
-        if (doc.GetType() == kObjectType) {
+        UriType tempuri   = UriType(finduri, allocator_);
+        UriType localuri  = UriType(baseuri, allocator_);
+        if(doc.GetType() == kObjectType)
+        {
             // Establish the base URI of this object
             typename ValueType::ConstMemberIterator m = doc.FindMember(SchemaType::GetIdString());
-            if (m != doc.MemberEnd() && m->value.GetType() == kStringType) {
+            if(m != doc.MemberEnd() && m->value.GetType() == kStringType)
+            {
                 localuri = UriType(m->value, allocator_).Resolve(baseuri, allocator_);
             }
             // See if it matches
-            if (localuri.Match(finduri, full)) {
-                RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaDocument::FindId (match)", full ? localuri.GetString() : localuri.GetBaseString());
-                resval = const_cast<ValueType *>(&doc);
+            if(localuri.Match(finduri, full))
+            {
+                RAPIDJSON_SCHEMA_PRINT(Method,
+                                       "GenericSchemaDocument::FindId (match)",
+                                       full ? localuri.GetString() : localuri.GetBaseString());
+                resval = const_cast<ValueType*>(&doc);
                 resptr = here;
                 return resval;
             }
             // No match, continue looking
-            for (m = doc.MemberBegin(); m != doc.MemberEnd(); ++m) {
-                if (m->value.GetType() == kObjectType || m->value.GetType() == kArrayType) {
-                    resval = FindId(m->value, finduri, resptr, localuri, full, here.Append(m->name.GetString(), m->name.GetStringLength(), allocator_));
+            for(m = doc.MemberBegin(); m != doc.MemberEnd(); ++m)
+            {
+                if(m->value.GetType() == kObjectType || m->value.GetType() == kArrayType)
+                {
+                    resval = FindId(
+                        m->value,
+                        finduri,
+                        resptr,
+                        localuri,
+                        full,
+                        here.Append(m->name.GetString(), m->name.GetStringLength(), allocator_));
                 }
-                if (resval) break;
+                if(resval)
+                    break;
             }
-        } else if (doc.GetType() == kArrayType) {
+        }
+        else if(doc.GetType() == kArrayType)
+        {
             // Continue looking
-            for (typename ValueType::ConstValueIterator v = doc.Begin(); v != doc.End(); ++v) {
-                if (v->GetType() == kObjectType || v->GetType() == kArrayType) {
-                    resval = FindId(*v, finduri, resptr, localuri, full, here.Append(i, allocator_));
+            for(typename ValueType::ConstValueIterator v = doc.Begin(); v != doc.End(); ++v)
+            {
+                if(v->GetType() == kObjectType || v->GetType() == kArrayType)
+                {
+                    resval =
+                        FindId(*v, finduri, resptr, localuri, full, here.Append(i, allocator_));
                 }
-                if (resval) break;
+                if(resval)
+                    break;
                 i++;
             }
         }
@@ -2360,33 +3081,44 @@ public:
     }
 
     // Added by PR #1393
-    void AddSchemaRefs(SchemaType* schema) {
+    void AddSchemaRefs(SchemaType* schema)
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaDocument::AddSchemaRefs");
-        while (!schemaRef_.Empty()) {
-            SchemaRefPtr *ref = schemaRef_.template Pop<SchemaRefPtr>(1);
-            SchemaEntry *entry = schemaMap_.template Push<SchemaEntry>();
-            new (entry) SchemaEntry(**ref, schema, false, allocator_);
+        while(!schemaRef_.Empty())
+        {
+            SchemaRefPtr* ref  = schemaRef_.template Pop<SchemaRefPtr>(1);
+            SchemaEntry* entry = schemaMap_.template Push<SchemaEntry>();
+            new(entry) SchemaEntry(**ref, schema, false, allocator_);
         }
     }
 
     // Added by PR #1393
-    bool IsCyclicRef(const PointerType& pointer) const {
-        for (const SchemaRefPtr* ref = schemaRef_.template Bottom<SchemaRefPtr>(); ref != schemaRef_.template End<SchemaRefPtr>(); ++ref)
-            if (pointer == **ref)
+    bool IsCyclicRef(const PointerType& pointer) const
+    {
+        for(const SchemaRefPtr* ref = schemaRef_.template Bottom<SchemaRefPtr>();
+            ref != schemaRef_.template End<SchemaRefPtr>();
+            ++ref)
+            if(pointer == **ref)
                 return true;
         return false;
     }
 
-    const SchemaType* GetSchema(const PointerType& pointer) const {
-        for (const SchemaEntry* target = schemaMap_.template Bottom<SchemaEntry>(); target != schemaMap_.template End<SchemaEntry>(); ++target)
-            if (pointer == target->pointer)
+    const SchemaType* GetSchema(const PointerType& pointer) const
+    {
+        for(const SchemaEntry* target = schemaMap_.template Bottom<SchemaEntry>();
+            target != schemaMap_.template End<SchemaEntry>();
+            ++target)
+            if(pointer == target->pointer)
                 return target->schema;
         return 0;
     }
 
-    PointerType GetPointer(const SchemaType* schema) const {
-        for (const SchemaEntry* target = schemaMap_.template Bottom<SchemaEntry>(); target != schemaMap_.template End<SchemaEntry>(); ++target)
-            if (schema == target->schema)
+    PointerType GetPointer(const SchemaType* schema) const
+    {
+        for(const SchemaEntry* target = schemaMap_.template Bottom<SchemaEntry>();
+            target != schemaMap_.template End<SchemaEntry>();
+            ++target)
+            if(schema == target->schema)
                 return target->pointer;
         return PointerType();
     }
@@ -2397,13 +3129,13 @@ public:
     static const size_t kInitialSchemaRefSize = 64;
 
     IRemoteSchemaDocumentProviderType* remoteProvider_;
-    Allocator *allocator_;
-    Allocator *ownAllocator_;
-    const SchemaType* root_;                //!< Root schema.
+    Allocator* allocator_;
+    Allocator* ownAllocator_;
+    const SchemaType* root_; //!< Root schema.
     SchemaType* typeless_;
-    internal::Stack<Allocator> schemaMap_;  // Stores created Pointer -> Schemas
-    internal::Stack<Allocator> schemaRef_;  // Stores Pointer(s) from $ref(s) until resolved
-    GValue uri_;                            // Schema document URI
+    internal::Stack<Allocator> schemaMap_; // Stores created Pointer -> Schemas
+    internal::Stack<Allocator> schemaRef_; // Stores Pointer(s) from $ref(s) until resolved
+    GValue uri_;                           // Schema document URI
     UriType docId_;
     Specification spec_;
     GValue error_;
@@ -2430,15 +3162,16 @@ typedef IGenericRemoteSchemaDocumentProvider<SchemaDocument> IRemoteSchemaDocume
     \tparam OutputHandler Type of output handler. Default handler does nothing.
     \tparam StateAllocator Allocator for storing the internal validation states.
 */
-template <
-    typename SchemaDocumentType,
-    typename OutputHandler = BaseReaderHandler<typename SchemaDocumentType::SchemaType::EncodingType>,
-    typename StateAllocator = CrtAllocator>
-class GenericSchemaValidator :
-    public internal::ISchemaStateFactory<typename SchemaDocumentType::SchemaType>, 
-    public internal::ISchemaValidator,
-    public internal::IValidationErrorHandler<typename SchemaDocumentType::SchemaType> {
-public:
+template <typename SchemaDocumentType,
+          typename OutputHandler =
+              BaseReaderHandler<typename SchemaDocumentType::SchemaType::EncodingType>,
+          typename StateAllocator = CrtAllocator>
+class GenericSchemaValidator
+    : public internal::ISchemaStateFactory<typename SchemaDocumentType::SchemaType>,
+      public internal::ISchemaValidator,
+      public internal::IValidationErrorHandler<typename SchemaDocumentType::SchemaType>
+{
+    public:
     typedef typename SchemaDocumentType::SchemaType SchemaType;
     typedef typename SchemaDocumentType::PointerType PointerType;
     typedef typename SchemaType::EncodingType EncodingType;
@@ -2454,25 +3187,23 @@ public:
         \param schemaStackCapacity Optional initial capacity of schema path stack.
         \param documentStackCapacity Optional initial capacity of document path stack.
     */
-    GenericSchemaValidator(
-        const SchemaDocumentType& schemaDocument,
-        StateAllocator* allocator = 0, 
-        size_t schemaStackCapacity = kDefaultSchemaStackCapacity,
-        size_t documentStackCapacity = kDefaultDocumentStackCapacity)
-        :
-        schemaDocument_(&schemaDocument),
-        root_(schemaDocument.GetRoot()),
-        stateAllocator_(allocator),
-        ownStateAllocator_(0),
-        schemaStack_(allocator, schemaStackCapacity),
-        documentStack_(allocator, documentStackCapacity),
-        outputHandler_(0),
-        error_(kObjectType),
-        currentError_(),
-        missingDependents_(),
-        valid_(true),
-        flags_(kValidateDefaultFlags),
-        depth_(0)
+    GenericSchemaValidator(const SchemaDocumentType& schemaDocument,
+                           StateAllocator* allocator    = 0,
+                           size_t schemaStackCapacity   = kDefaultSchemaStackCapacity,
+                           size_t documentStackCapacity = kDefaultDocumentStackCapacity)
+        : schemaDocument_(&schemaDocument),
+          root_(schemaDocument.GetRoot()),
+          stateAllocator_(allocator),
+          ownStateAllocator_(0),
+          schemaStack_(allocator, schemaStackCapacity),
+          documentStack_(allocator, documentStackCapacity),
+          outputHandler_(0),
+          error_(kObjectType),
+          currentError_(),
+          missingDependents_(),
+          valid_(true),
+          flags_(kValidateDefaultFlags),
+          depth_(0)
     {
         RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaValidator::GenericSchemaValidator");
     }
@@ -2484,46 +3215,48 @@ public:
         \param schemaStackCapacity Optional initial capacity of schema path stack.
         \param documentStackCapacity Optional initial capacity of document path stack.
     */
-    GenericSchemaValidator(
-        const SchemaDocumentType& schemaDocument,
-        OutputHandler& outputHandler,
-        StateAllocator* allocator = 0, 
-        size_t schemaStackCapacity = kDefaultSchemaStackCapacity,
-        size_t documentStackCapacity = kDefaultDocumentStackCapacity)
-        :
-        schemaDocument_(&schemaDocument),
-        root_(schemaDocument.GetRoot()),
-        stateAllocator_(allocator),
-        ownStateAllocator_(0),
-        schemaStack_(allocator, schemaStackCapacity),
-        documentStack_(allocator, documentStackCapacity),
-        outputHandler_(&outputHandler),
-        error_(kObjectType),
-        currentError_(),
-        missingDependents_(),
-        valid_(true),
-        flags_(kValidateDefaultFlags),
-        depth_(0)
+    GenericSchemaValidator(const SchemaDocumentType& schemaDocument,
+                           OutputHandler& outputHandler,
+                           StateAllocator* allocator    = 0,
+                           size_t schemaStackCapacity   = kDefaultSchemaStackCapacity,
+                           size_t documentStackCapacity = kDefaultDocumentStackCapacity)
+        : schemaDocument_(&schemaDocument),
+          root_(schemaDocument.GetRoot()),
+          stateAllocator_(allocator),
+          ownStateAllocator_(0),
+          schemaStack_(allocator, schemaStackCapacity),
+          documentStack_(allocator, documentStackCapacity),
+          outputHandler_(&outputHandler),
+          error_(kObjectType),
+          currentError_(),
+          missingDependents_(),
+          valid_(true),
+          flags_(kValidateDefaultFlags),
+          depth_(0)
     {
-        RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaValidator::GenericSchemaValidator (output handler)");
+        RAPIDJSON_SCHEMA_PRINT(Method,
+                               "GenericSchemaValidator::GenericSchemaValidator (output handler)");
     }
 
     //! Destructor.
-    ~GenericSchemaValidator() {
+    ~GenericSchemaValidator()
+    {
         Reset();
         RAPIDJSON_DELETE(ownStateAllocator_);
     }
 
     //! Reset the internal states.
-    void Reset() {
-        while (!schemaStack_.Empty())
+    void Reset()
+    {
+        while(!schemaStack_.Empty())
             PopSchema();
         documentStack_.Clear();
         ResetError();
     }
 
     //! Reset the error state.
-    void ResetError() {
+    void ResetError()
+    {
         error_.SetObject();
         currentError_.SetNull();
         missingDependents_.SetNull();
@@ -2531,16 +3264,15 @@ public:
     }
 
     //! Implementation of ISchemaValidator
-    void SetValidateFlags(unsigned flags) {
-        flags_ = flags;
-    }
-    virtual unsigned GetValidateFlags() const {
-        return flags_;
-    }
+    void SetValidateFlags(unsigned flags) { flags_ = flags; }
+    virtual unsigned GetValidateFlags() const { return flags_; }
 
-    virtual bool IsValid() const {
-        if (!valid_) return false;
-        if (GetContinueOnErrors() && !error_.ObjectEmpty()) return false;
+    virtual bool IsValid() const
+    {
+        if(!valid_)
+            return false;
+        if(GetContinueOnErrors() && !error_.ObjectEmpty())
+            return false;
         return true;
     }
     //! End of Implementation of ISchemaValidator
@@ -2551,99 +3283,143 @@ public:
 
     //! Gets the JSON pointer pointed to the invalid schema.
     //  If reporting all errors, the stack will be empty.
-    PointerType GetInvalidSchemaPointer() const {
+    PointerType GetInvalidSchemaPointer() const
+    {
         return schemaStack_.Empty() ? PointerType() : CurrentSchema().GetPointer();
     }
 
     //! Gets the keyword of invalid schema.
     //  If reporting all errors, the stack will be empty, so return "errors".
-    const Ch* GetInvalidSchemaKeyword() const {
-        if (!schemaStack_.Empty()) return CurrentContext().invalidKeyword;
-        if (GetContinueOnErrors() && !error_.ObjectEmpty()) return static_cast<const Ch*>(GetErrorsString());
+    const Ch* GetInvalidSchemaKeyword() const
+    {
+        if(!schemaStack_.Empty())
+            return CurrentContext().invalidKeyword;
+        if(GetContinueOnErrors() && !error_.ObjectEmpty())
+            return static_cast<const Ch*>(GetErrorsString());
         return 0;
     }
 
     //! Gets the error code of invalid schema.
     //  If reporting all errors, the stack will be empty, so return kValidateErrors.
-    ValidateErrorCode GetInvalidSchemaCode() const {
-        if (!schemaStack_.Empty()) return CurrentContext().invalidCode;
-        if (GetContinueOnErrors() && !error_.ObjectEmpty()) return kValidateErrors;
+    ValidateErrorCode GetInvalidSchemaCode() const
+    {
+        if(!schemaStack_.Empty())
+            return CurrentContext().invalidCode;
+        if(GetContinueOnErrors() && !error_.ObjectEmpty())
+            return kValidateErrors;
         return kValidateErrorNone;
     }
 
     //! Gets the JSON pointer pointed to the invalid value.
     //  If reporting all errors, the stack will be empty.
-    PointerType GetInvalidDocumentPointer() const {
-        if (documentStack_.Empty()) {
+    PointerType GetInvalidDocumentPointer() const
+    {
+        if(documentStack_.Empty())
+        {
             return PointerType();
         }
-        else {
-            return PointerType(documentStack_.template Bottom<Ch>(), documentStack_.GetSize() / sizeof(Ch));
+        else
+        {
+            return PointerType(documentStack_.template Bottom<Ch>(),
+                               documentStack_.GetSize() / sizeof(Ch));
         }
     }
 
-    void NotMultipleOf(int64_t actual, const SValue& expected) {
+    void NotMultipleOf(int64_t actual, const SValue& expected)
+    {
         AddNumberError(kValidateErrorMultipleOf, ValueType(actual).Move(), expected);
     }
-    void NotMultipleOf(uint64_t actual, const SValue& expected) {
+    void NotMultipleOf(uint64_t actual, const SValue& expected)
+    {
         AddNumberError(kValidateErrorMultipleOf, ValueType(actual).Move(), expected);
     }
-    void NotMultipleOf(double actual, const SValue& expected) {
+    void NotMultipleOf(double actual, const SValue& expected)
+    {
         AddNumberError(kValidateErrorMultipleOf, ValueType(actual).Move(), expected);
     }
-    void AboveMaximum(int64_t actual, const SValue& expected, bool exclusive) {
-        AddNumberError(exclusive ? kValidateErrorExclusiveMaximum : kValidateErrorMaximum, ValueType(actual).Move(), expected,
-            exclusive ? &SchemaType::GetExclusiveMaximumString : 0);
+    void AboveMaximum(int64_t actual, const SValue& expected, bool exclusive)
+    {
+        AddNumberError(exclusive ? kValidateErrorExclusiveMaximum : kValidateErrorMaximum,
+                       ValueType(actual).Move(),
+                       expected,
+                       exclusive ? &SchemaType::GetExclusiveMaximumString : 0);
     }
-    void AboveMaximum(uint64_t actual, const SValue& expected, bool exclusive) {
-        AddNumberError(exclusive ? kValidateErrorExclusiveMaximum : kValidateErrorMaximum, ValueType(actual).Move(), expected,
-            exclusive ? &SchemaType::GetExclusiveMaximumString : 0);
+    void AboveMaximum(uint64_t actual, const SValue& expected, bool exclusive)
+    {
+        AddNumberError(exclusive ? kValidateErrorExclusiveMaximum : kValidateErrorMaximum,
+                       ValueType(actual).Move(),
+                       expected,
+                       exclusive ? &SchemaType::GetExclusiveMaximumString : 0);
     }
-    void AboveMaximum(double actual, const SValue& expected, bool exclusive) {
-        AddNumberError(exclusive ? kValidateErrorExclusiveMaximum : kValidateErrorMaximum, ValueType(actual).Move(), expected,
-            exclusive ? &SchemaType::GetExclusiveMaximumString : 0);
+    void AboveMaximum(double actual, const SValue& expected, bool exclusive)
+    {
+        AddNumberError(exclusive ? kValidateErrorExclusiveMaximum : kValidateErrorMaximum,
+                       ValueType(actual).Move(),
+                       expected,
+                       exclusive ? &SchemaType::GetExclusiveMaximumString : 0);
     }
-    void BelowMinimum(int64_t actual, const SValue& expected, bool exclusive) {
-        AddNumberError(exclusive ? kValidateErrorExclusiveMinimum : kValidateErrorMinimum, ValueType(actual).Move(), expected,
-            exclusive ? &SchemaType::GetExclusiveMinimumString : 0);
+    void BelowMinimum(int64_t actual, const SValue& expected, bool exclusive)
+    {
+        AddNumberError(exclusive ? kValidateErrorExclusiveMinimum : kValidateErrorMinimum,
+                       ValueType(actual).Move(),
+                       expected,
+                       exclusive ? &SchemaType::GetExclusiveMinimumString : 0);
     }
-    void BelowMinimum(uint64_t actual, const SValue& expected, bool exclusive) {
-        AddNumberError(exclusive ? kValidateErrorExclusiveMinimum : kValidateErrorMinimum, ValueType(actual).Move(), expected,
-            exclusive ? &SchemaType::GetExclusiveMinimumString : 0);
+    void BelowMinimum(uint64_t actual, const SValue& expected, bool exclusive)
+    {
+        AddNumberError(exclusive ? kValidateErrorExclusiveMinimum : kValidateErrorMinimum,
+                       ValueType(actual).Move(),
+                       expected,
+                       exclusive ? &SchemaType::GetExclusiveMinimumString : 0);
     }
-    void BelowMinimum(double actual, const SValue& expected, bool exclusive) {
-        AddNumberError(exclusive ? kValidateErrorExclusiveMinimum : kValidateErrorMinimum, ValueType(actual).Move(), expected,
-            exclusive ? &SchemaType::GetExclusiveMinimumString : 0);
+    void BelowMinimum(double actual, const SValue& expected, bool exclusive)
+    {
+        AddNumberError(exclusive ? kValidateErrorExclusiveMinimum : kValidateErrorMinimum,
+                       ValueType(actual).Move(),
+                       expected,
+                       exclusive ? &SchemaType::GetExclusiveMinimumString : 0);
     }
 
-    void TooLong(const Ch* str, SizeType length, SizeType expected) {
+    void TooLong(const Ch* str, SizeType length, SizeType expected)
+    {
         AddNumberError(kValidateErrorMaxLength,
-            ValueType(str, length, GetStateAllocator()).Move(), SValue(expected).Move());
+                       ValueType(str, length, GetStateAllocator()).Move(),
+                       SValue(expected).Move());
     }
-    void TooShort(const Ch* str, SizeType length, SizeType expected) {
+    void TooShort(const Ch* str, SizeType length, SizeType expected)
+    {
         AddNumberError(kValidateErrorMinLength,
-            ValueType(str, length, GetStateAllocator()).Move(), SValue(expected).Move());
+                       ValueType(str, length, GetStateAllocator()).Move(),
+                       SValue(expected).Move());
     }
-    void DoesNotMatch(const Ch* str, SizeType length) {
+    void DoesNotMatch(const Ch* str, SizeType length)
+    {
         currentError_.SetObject();
-        currentError_.AddMember(GetActualString(), ValueType(str, length, GetStateAllocator()).Move(), GetStateAllocator());
+        currentError_.AddMember(GetActualString(),
+                                ValueType(str, length, GetStateAllocator()).Move(),
+                                GetStateAllocator());
         AddCurrentError(kValidateErrorPattern);
     }
 
-    void DisallowedItem(SizeType index) {
+    void DisallowedItem(SizeType index)
+    {
         currentError_.SetObject();
-        currentError_.AddMember(GetDisallowedString(), ValueType(index).Move(), GetStateAllocator());
+        currentError_.AddMember(
+            GetDisallowedString(), ValueType(index).Move(), GetStateAllocator());
         AddCurrentError(kValidateErrorAdditionalItems, true);
     }
-    void TooFewItems(SizeType actualCount, SizeType expectedCount) {
-        AddNumberError(kValidateErrorMinItems,
-            ValueType(actualCount).Move(), SValue(expectedCount).Move());
+    void TooFewItems(SizeType actualCount, SizeType expectedCount)
+    {
+        AddNumberError(
+            kValidateErrorMinItems, ValueType(actualCount).Move(), SValue(expectedCount).Move());
     }
-    void TooManyItems(SizeType actualCount, SizeType expectedCount) {
-        AddNumberError(kValidateErrorMaxItems,
-            ValueType(actualCount).Move(), SValue(expectedCount).Move());
+    void TooManyItems(SizeType actualCount, SizeType expectedCount)
+    {
+        AddNumberError(
+            kValidateErrorMaxItems, ValueType(actualCount).Move(), SValue(expectedCount).Move());
     }
-    void DuplicateItems(SizeType index1, SizeType index2) {
+    void DuplicateItems(SizeType index1, SizeType index2)
+    {
         ValueType duplicates(kArrayType);
         duplicates.PushBack(index1, GetStateAllocator());
         duplicates.PushBack(index2, GetStateAllocator());
@@ -2652,22 +3428,26 @@ public:
         AddCurrentError(kValidateErrorUniqueItems, true);
     }
 
-    void TooManyProperties(SizeType actualCount, SizeType expectedCount) {
+    void TooManyProperties(SizeType actualCount, SizeType expectedCount)
+    {
         AddNumberError(kValidateErrorMaxProperties,
-            ValueType(actualCount).Move(), SValue(expectedCount).Move());
+                       ValueType(actualCount).Move(),
+                       SValue(expectedCount).Move());
     }
-    void TooFewProperties(SizeType actualCount, SizeType expectedCount) {
+    void TooFewProperties(SizeType actualCount, SizeType expectedCount)
+    {
         AddNumberError(kValidateErrorMinProperties,
-            ValueType(actualCount).Move(), SValue(expectedCount).Move());
+                       ValueType(actualCount).Move(),
+                       SValue(expectedCount).Move());
     }
-    void StartMissingProperties() {
-        currentError_.SetArray();
-    }
-    void AddMissingProperty(const SValue& name) {
+    void StartMissingProperties() { currentError_.SetArray(); }
+    void AddMissingProperty(const SValue& name)
+    {
         currentError_.PushBack(ValueType(name, GetStateAllocator()).Move(), GetStateAllocator());
     }
-    bool EndMissingProperties() {
-        if (currentError_.Empty())
+    bool EndMissingProperties()
+    {
+        if(currentError_.Empty())
             return false;
         ValueType error(kObjectType);
         error.AddMember(GetMissingString(), currentError_, GetStateAllocator());
@@ -2675,27 +3455,31 @@ public:
         AddCurrentError(kValidateErrorRequired);
         return true;
     }
-    void PropertyViolations(ISchemaValidator** subvalidators, SizeType count) {
-        for (SizeType i = 0; i < count; ++i)
+    void PropertyViolations(ISchemaValidator** subvalidators, SizeType count)
+    {
+        for(SizeType i = 0; i < count; ++i)
             MergeError(static_cast<GenericSchemaValidator*>(subvalidators[i])->GetError());
     }
-    void DisallowedProperty(const Ch* name, SizeType length) {
+    void DisallowedProperty(const Ch* name, SizeType length)
+    {
         currentError_.SetObject();
-        currentError_.AddMember(GetDisallowedString(), ValueType(name, length, GetStateAllocator()).Move(), GetStateAllocator());
+        currentError_.AddMember(GetDisallowedString(),
+                                ValueType(name, length, GetStateAllocator()).Move(),
+                                GetStateAllocator());
         AddCurrentError(kValidateErrorAdditionalProperties, true);
     }
 
-    void StartDependencyErrors() {
-        currentError_.SetObject();
+    void StartDependencyErrors() { currentError_.SetObject(); }
+    void StartMissingDependentProperties() { missingDependents_.SetArray(); }
+    void AddMissingDependentProperty(const SValue& targetName)
+    {
+        missingDependents_.PushBack(ValueType(targetName, GetStateAllocator()).Move(),
+                                    GetStateAllocator());
     }
-    void StartMissingDependentProperties() {
-        missingDependents_.SetArray();
-    }
-    void AddMissingDependentProperty(const SValue& targetName) {
-        missingDependents_.PushBack(ValueType(targetName, GetStateAllocator()).Move(), GetStateAllocator());
-    }
-    void EndMissingDependentProperties(const SValue& sourceName) {
-        if (!missingDependents_.Empty()) {
+    void EndMissingDependentProperties(const SValue& sourceName)
+    {
+        if(!missingDependents_.Empty())
+        {
             // Create equivalent 'required' error
             ValueType error(kObjectType);
             ValidateErrorCode code = kValidateErrorRequired;
@@ -2703,19 +3487,31 @@ public:
             AddErrorCode(error, code);
             AddErrorInstanceLocation(error, false);
             // When appending to a pointer ensure its allocator is used
-            PointerType schemaRef = GetInvalidSchemaPointer().Append(SchemaType::GetValidateErrorKeyword(kValidateErrorDependencies), &GetInvalidSchemaPointer().GetAllocator());
-            AddErrorSchemaLocation(error, schemaRef.Append(sourceName.GetString(), sourceName.GetStringLength(), &GetInvalidSchemaPointer().GetAllocator()));
+            PointerType schemaRef = GetInvalidSchemaPointer().Append(
+                SchemaType::GetValidateErrorKeyword(kValidateErrorDependencies),
+                &GetInvalidSchemaPointer().GetAllocator());
+            AddErrorSchemaLocation(error,
+                                   schemaRef.Append(sourceName.GetString(),
+                                                    sourceName.GetStringLength(),
+                                                    &GetInvalidSchemaPointer().GetAllocator()));
             ValueType wrapper(kObjectType);
-            wrapper.AddMember(ValueType(SchemaType::GetValidateErrorKeyword(code), GetStateAllocator()).Move(), error, GetStateAllocator());
-            currentError_.AddMember(ValueType(sourceName, GetStateAllocator()).Move(), wrapper, GetStateAllocator());
+            wrapper.AddMember(
+                ValueType(SchemaType::GetValidateErrorKeyword(code), GetStateAllocator()).Move(),
+                error,
+                GetStateAllocator());
+            currentError_.AddMember(
+                ValueType(sourceName, GetStateAllocator()).Move(), wrapper, GetStateAllocator());
         }
     }
-    void AddDependencySchemaError(const SValue& sourceName, ISchemaValidator* subvalidator) {
+    void AddDependencySchemaError(const SValue& sourceName, ISchemaValidator* subvalidator)
+    {
         currentError_.AddMember(ValueType(sourceName, GetStateAllocator()).Move(),
-            static_cast<GenericSchemaValidator*>(subvalidator)->GetError(), GetStateAllocator());
+                                static_cast<GenericSchemaValidator*>(subvalidator)->GetError(),
+                                GetStateAllocator());
     }
-    bool EndDependencyErrors() {
-        if (currentError_.ObjectEmpty())
+    bool EndDependencyErrors()
+    {
+        if(currentError_.ObjectEmpty())
             return false;
         ValueType error(kObjectType);
         error.AddMember(GetErrorsString(), currentError_, GetStateAllocator());
@@ -2724,37 +3520,46 @@ public:
         return true;
     }
 
-    void DisallowedValue(const ValidateErrorCode code = kValidateErrorEnum) {
+    void DisallowedValue(const ValidateErrorCode code = kValidateErrorEnum)
+    {
         currentError_.SetObject();
         AddCurrentError(code);
     }
-    void StartDisallowedType() {
-        currentError_.SetArray();
+    void StartDisallowedType() { currentError_.SetArray(); }
+    void AddExpectedType(const typename SchemaType::ValueType& expectedType)
+    {
+        currentError_.PushBack(ValueType(expectedType, GetStateAllocator()).Move(),
+                               GetStateAllocator());
     }
-    void AddExpectedType(const typename SchemaType::ValueType& expectedType) {
-        currentError_.PushBack(ValueType(expectedType, GetStateAllocator()).Move(), GetStateAllocator());
-    }
-    void EndDisallowedType(const typename SchemaType::ValueType& actualType) {
+    void EndDisallowedType(const typename SchemaType::ValueType& actualType)
+    {
         ValueType error(kObjectType);
         error.AddMember(GetExpectedString(), currentError_, GetStateAllocator());
-        error.AddMember(GetActualString(), ValueType(actualType, GetStateAllocator()).Move(), GetStateAllocator());
+        error.AddMember(GetActualString(),
+                        ValueType(actualType, GetStateAllocator()).Move(),
+                        GetStateAllocator());
         currentError_ = error;
         AddCurrentError(kValidateErrorType);
     }
-    void NotAllOf(ISchemaValidator** subvalidators, SizeType count) {
-        // Treat allOf like oneOf and anyOf to match https://rapidjson.org/md_doc_schema.html#allOf-anyOf-oneOf
+    void NotAllOf(ISchemaValidator** subvalidators, SizeType count)
+    {
+        // Treat allOf like oneOf and anyOf to match
+        // https://rapidjson.org/md_doc_schema.html#allOf-anyOf-oneOf
         AddErrorArray(kValidateErrorAllOf, subvalidators, count);
-        //for (SizeType i = 0; i < count; ++i) {
-        //    MergeError(static_cast<GenericSchemaValidator*>(subvalidators[i])->GetError());
-        //}
+        // for (SizeType i = 0; i < count; ++i) {
+        //     MergeError(static_cast<GenericSchemaValidator*>(subvalidators[i])->GetError());
+        // }
     }
-    void NoneOf(ISchemaValidator** subvalidators, SizeType count) {
+    void NoneOf(ISchemaValidator** subvalidators, SizeType count)
+    {
         AddErrorArray(kValidateErrorAnyOf, subvalidators, count);
     }
-    void NotOneOf(ISchemaValidator** subvalidators, SizeType count) {
+    void NotOneOf(ISchemaValidator** subvalidators, SizeType count)
+    {
         AddErrorArray(kValidateErrorOneOf, subvalidators, count);
     }
-    void MultipleOneOf(SizeType index1, SizeType index2) {
+    void MultipleOneOf(SizeType index1, SizeType index2)
+    {
         ValueType matches(kArrayType);
         matches.PushBack(index1, GetStateAllocator());
         matches.PushBack(index2, GetStateAllocator());
@@ -2762,24 +3567,28 @@ public:
         currentError_.AddMember(GetMatchesString(), matches, GetStateAllocator());
         AddCurrentError(kValidateErrorOneOfMatch);
     }
-    void Disallowed() {
+    void Disallowed()
+    {
         currentError_.SetObject();
         AddCurrentError(kValidateErrorNot);
     }
-    void DisallowedWhenWriting() {
+    void DisallowedWhenWriting()
+    {
         currentError_.SetObject();
         AddCurrentError(kValidateErrorReadOnly);
     }
-    void DisallowedWhenReading() {
+    void DisallowedWhenReading()
+    {
         currentError_.SetObject();
         AddCurrentError(kValidateErrorWriteOnly);
     }
 
-#define RAPIDJSON_STRING_(name, ...) \
-    static const StringRefType& Get##name##String() {\
-        static const Ch s[] = { __VA_ARGS__, '\0' };\
+#define RAPIDJSON_STRING_(name, ...)                                                        \
+    static const StringRefType& Get##name##String()                                         \
+    {                                                                                       \
+        static const Ch s[] = {__VA_ARGS__, '\0'};                                          \
         static const StringRefType v(s, static_cast<SizeType>(sizeof(s) / sizeof(Ch) - 1)); \
-        return v;\
+        return v;                                                                           \
     }
 
     RAPIDJSON_STRING_(InstanceRef, 'i', 'n', 's', 't', 'a', 'n', 'c', 'e', 'R', 'e', 'f')
@@ -2796,62 +3605,80 @@ public:
 
 #undef RAPIDJSON_STRING_
 
-#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_(method, arg1)\
-    if (!valid_) return false; \
-    if ((!BeginValue() && !GetContinueOnErrors()) || (!CurrentSchema().method arg1 && !GetContinueOnErrors())) {\
-        *documentStack_.template Push<Ch>() = '\0';\
-        documentStack_.template Pop<Ch>(1);\
-        RAPIDJSON_SCHEMA_PRINT(InvalidDocument, documentStack_.template Bottom<Ch>());\
-        valid_ = false;\
-        return valid_;\
+#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_(method, arg1)                                   \
+    if(!valid_)                                                                        \
+        return false;                                                                  \
+    if((!BeginValue() && !GetContinueOnErrors()) ||                                    \
+       (!CurrentSchema().method arg1 && !GetContinueOnErrors()))                       \
+    {                                                                                  \
+        *documentStack_.template Push<Ch>() = '\0';                                    \
+        documentStack_.template Pop<Ch>(1);                                            \
+        RAPIDJSON_SCHEMA_PRINT(InvalidDocument, documentStack_.template Bottom<Ch>()); \
+        valid_ = false;                                                                \
+        return valid_;                                                                 \
     }
 
-#define RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(method, arg2)\
-    for (Context* context = schemaStack_.template Bottom<Context>(); context != schemaStack_.template End<Context>(); context++) {\
-        if (context->hasher)\
-            static_cast<HasherType*>(context->hasher)->method arg2;\
-        if (context->validators)\
-            for (SizeType i_ = 0; i_ < context->validatorCount; i_++)\
-                static_cast<GenericSchemaValidator*>(context->validators[i_])->method arg2;\
-        if (context->patternPropertiesValidators)\
-            for (SizeType i_ = 0; i_ < context->patternPropertiesValidatorCount; i_++)\
-                static_cast<GenericSchemaValidator*>(context->patternPropertiesValidators[i_])->method arg2;\
+#define RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(method, arg2)                                        \
+    for(Context* context = schemaStack_.template Bottom<Context>();                            \
+        context != schemaStack_.template End<Context>();                                       \
+        context++)                                                                             \
+    {                                                                                          \
+        if(context->hasher)                                                                    \
+            static_cast<HasherType*>(context->hasher)->method arg2;                            \
+        if(context->validators)                                                                \
+            for(SizeType i_ = 0; i_ < context->validatorCount; i_++)                           \
+                static_cast<GenericSchemaValidator*>(context->validators[i_])->method arg2;    \
+        if(context->patternPropertiesValidators)                                               \
+            for(SizeType i_ = 0; i_ < context->patternPropertiesValidatorCount; i_++)          \
+                static_cast<GenericSchemaValidator*>(context->patternPropertiesValidators[i_]) \
+                    ->method arg2;                                                             \
     }
 
-#define RAPIDJSON_SCHEMA_HANDLE_END_(method, arg2)\
-    valid_ = (EndValue() || GetContinueOnErrors()) && (!outputHandler_ || outputHandler_->method arg2);\
+#define RAPIDJSON_SCHEMA_HANDLE_END_(method, arg2)                                                 \
+    valid_ =                                                                                       \
+        (EndValue() || GetContinueOnErrors()) && (!outputHandler_ || outputHandler_->method arg2); \
     return valid_;
 
 #define RAPIDJSON_SCHEMA_HANDLE_VALUE_(method, arg1, arg2) \
-    RAPIDJSON_SCHEMA_HANDLE_BEGIN_   (method, arg1);\
-    RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(method, arg2);\
-    RAPIDJSON_SCHEMA_HANDLE_END_     (method, arg2)
+    RAPIDJSON_SCHEMA_HANDLE_BEGIN_(method, arg1);          \
+    RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(method, arg2);       \
+    RAPIDJSON_SCHEMA_HANDLE_END_(method, arg2)
 
-    bool Null()             { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Null,   (CurrentContext()), ( )); }
-    bool Bool(bool b)       { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Bool,   (CurrentContext(), b), (b)); }
-    bool Int(int i)         { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Int,    (CurrentContext(), i), (i)); }
-    bool Uint(unsigned u)   { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Uint,   (CurrentContext(), u), (u)); }
-    bool Int64(int64_t i)   { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Int64,  (CurrentContext(), i), (i)); }
+    bool Null() { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Null, (CurrentContext()), ()); }
+    bool Bool(bool b) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Bool, (CurrentContext(), b), (b)); }
+    bool Int(int i) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Int, (CurrentContext(), i), (i)); }
+    bool Uint(unsigned u) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Uint, (CurrentContext(), u), (u)); }
+    bool Int64(int64_t i) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Int64, (CurrentContext(), i), (i)); }
     bool Uint64(uint64_t u) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Uint64, (CurrentContext(), u), (u)); }
-    bool Double(double d)   { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Double, (CurrentContext(), d), (d)); }
+    bool Double(double d) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Double, (CurrentContext(), d), (d)); }
     bool RawNumber(const Ch* str, SizeType length, bool copy)
-                                    { RAPIDJSON_SCHEMA_HANDLE_VALUE_(String, (CurrentContext(), str, length, copy), (str, length, copy)); }
+    {
+        RAPIDJSON_SCHEMA_HANDLE_VALUE_(
+            String, (CurrentContext(), str, length, copy), (str, length, copy));
+    }
     bool String(const Ch* str, SizeType length, bool copy)
-                                    { RAPIDJSON_SCHEMA_HANDLE_VALUE_(String, (CurrentContext(), str, length, copy), (str, length, copy)); }
+    {
+        RAPIDJSON_SCHEMA_HANDLE_VALUE_(
+            String, (CurrentContext(), str, length, copy), (str, length, copy));
+    }
 
-    bool StartObject() {
+    bool StartObject()
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaValidator::StartObject");
         RAPIDJSON_SCHEMA_HANDLE_BEGIN_(StartObject, (CurrentContext()));
         RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(StartObject, ());
         valid_ = !outputHandler_ || outputHandler_->StartObject();
         return valid_;
     }
-    
-    bool Key(const Ch* str, SizeType len, bool copy) {
+
+    bool Key(const Ch* str, SizeType len, bool copy)
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaValidator::Key", str);
-        if (!valid_) return false;
+        if(!valid_)
+            return false;
         AppendToken(str, len);
-        if (!CurrentSchema().Key(CurrentContext(), str, len, copy) && !GetContinueOnErrors()) {
+        if(!CurrentSchema().Key(CurrentContext(), str, len, copy) && !GetContinueOnErrors())
+        {
             valid_ = false;
             return valid_;
         }
@@ -2859,31 +3686,38 @@ public:
         valid_ = !outputHandler_ || outputHandler_->Key(str, len, copy);
         return valid_;
     }
-    
-    bool EndObject(SizeType memberCount) {
+
+    bool EndObject(SizeType memberCount)
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaValidator::EndObject");
-        if (!valid_) return false;
+        if(!valid_)
+            return false;
         RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(EndObject, (memberCount));
-        if (!CurrentSchema().EndObject(CurrentContext(), memberCount) && !GetContinueOnErrors()) { 
-            valid_ = false; 
-            return valid_; 
+        if(!CurrentSchema().EndObject(CurrentContext(), memberCount) && !GetContinueOnErrors())
+        {
+            valid_ = false;
+            return valid_;
         }
         RAPIDJSON_SCHEMA_HANDLE_END_(EndObject, (memberCount));
     }
 
-    bool StartArray() {
+    bool StartArray()
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaValidator::StartArray");
         RAPIDJSON_SCHEMA_HANDLE_BEGIN_(StartArray, (CurrentContext()));
         RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(StartArray, ());
         valid_ = !outputHandler_ || outputHandler_->StartArray();
         return valid_;
     }
-    
-    bool EndArray(SizeType elementCount) {
+
+    bool EndArray(SizeType elementCount)
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaValidator::EndArray");
-        if (!valid_) return false;
+        if(!valid_)
+            return false;
         RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(EndArray, (elementCount));
-        if (!CurrentSchema().EndArray(CurrentContext(), elementCount) && !GetContinueOnErrors()) {
+        if(!CurrentSchema().EndArray(CurrentContext(), elementCount) && !GetContinueOnErrors())
+        {
             valid_ = false;
             return valid_;
         }
@@ -2895,114 +3729,130 @@ public:
 #undef RAPIDJSON_SCHEMA_HANDLE_VALUE_
 
     // Implementation of ISchemaStateFactory<SchemaType>
-    virtual ISchemaValidator* CreateSchemaValidator(const SchemaType& root, const bool inheritContinueOnErrors) {
+    virtual ISchemaValidator* CreateSchemaValidator(const SchemaType& root,
+                                                    const bool inheritContinueOnErrors)
+    {
         *documentStack_.template Push<Ch>() = '\0';
         documentStack_.template Pop<Ch>(1);
-        ISchemaValidator* sv = new (GetStateAllocator().Malloc(sizeof(GenericSchemaValidator))) GenericSchemaValidator(*schemaDocument_, root, documentStack_.template Bottom<char>(), documentStack_.GetSize(),
-        depth_ + 1,
-        &GetStateAllocator());
-        sv->SetValidateFlags(inheritContinueOnErrors ? GetValidateFlags() : GetValidateFlags() & ~static_cast<unsigned>(kValidateContinueOnErrorFlag));
+        ISchemaValidator* sv = new(GetStateAllocator().Malloc(sizeof(GenericSchemaValidator)))
+            GenericSchemaValidator(*schemaDocument_,
+                                   root,
+                                   documentStack_.template Bottom<char>(),
+                                   documentStack_.GetSize(),
+                                   depth_ + 1,
+                                   &GetStateAllocator());
+        sv->SetValidateFlags(inheritContinueOnErrors
+                                 ? GetValidateFlags()
+                                 : GetValidateFlags() &
+                                       ~static_cast<unsigned>(kValidateContinueOnErrorFlag));
         return sv;
     }
 
-    virtual void DestroySchemaValidator(ISchemaValidator* validator) {
+    virtual void DestroySchemaValidator(ISchemaValidator* validator)
+    {
         GenericSchemaValidator* v = static_cast<GenericSchemaValidator*>(validator);
         v->~GenericSchemaValidator();
         StateAllocator::Free(v);
     }
 
-    virtual void* CreateHasher() {
-        return new (GetStateAllocator().Malloc(sizeof(HasherType))) HasherType(&GetStateAllocator());
+    virtual void* CreateHasher()
+    {
+        return new(GetStateAllocator().Malloc(sizeof(HasherType))) HasherType(&GetStateAllocator());
     }
 
-    virtual uint64_t GetHashCode(void* hasher) {
+    virtual uint64_t GetHashCode(void* hasher)
+    {
         return static_cast<HasherType*>(hasher)->GetHashCode();
     }
 
-    virtual void DestroryHasher(void* hasher) {
+    virtual void DestroryHasher(void* hasher)
+    {
         HasherType* h = static_cast<HasherType*>(hasher);
         h->~HasherType();
         StateAllocator::Free(h);
     }
 
-    virtual void* MallocState(size_t size) {
-        return GetStateAllocator().Malloc(size);
-    }
+    virtual void* MallocState(size_t size) { return GetStateAllocator().Malloc(size); }
 
-    virtual void FreeState(void* p) {
-        StateAllocator::Free(p);
-    }
+    virtual void FreeState(void* p) { StateAllocator::Free(p); }
     // End of implementation of ISchemaStateFactory<SchemaType>
 
-private:
+    private:
     typedef typename SchemaType::Context Context;
     typedef GenericValue<UTF8<>, StateAllocator> HashCodeArray;
     typedef internal::Hasher<EncodingType, StateAllocator> HasherType;
 
-    GenericSchemaValidator( 
-        const SchemaDocumentType& schemaDocument,
-        const SchemaType& root,
-        const char* basePath, size_t basePathSize,
-        unsigned depth,
-        StateAllocator* allocator = 0,
-        size_t schemaStackCapacity = kDefaultSchemaStackCapacity,
-        size_t documentStackCapacity = kDefaultDocumentStackCapacity)
-        :
-        schemaDocument_(&schemaDocument),
-        root_(root),
-        stateAllocator_(allocator),
-        ownStateAllocator_(0),
-        schemaStack_(allocator, schemaStackCapacity),
-        documentStack_(allocator, documentStackCapacity),
-        outputHandler_(0),
-        error_(kObjectType),
-        currentError_(),
-        missingDependents_(),
-        valid_(true),
-        flags_(kValidateDefaultFlags),
-        depth_(depth)
+    GenericSchemaValidator(const SchemaDocumentType& schemaDocument,
+                           const SchemaType& root,
+                           const char* basePath,
+                           size_t basePathSize,
+                           unsigned depth,
+                           StateAllocator* allocator    = 0,
+                           size_t schemaStackCapacity   = kDefaultSchemaStackCapacity,
+                           size_t documentStackCapacity = kDefaultDocumentStackCapacity)
+        : schemaDocument_(&schemaDocument),
+          root_(root),
+          stateAllocator_(allocator),
+          ownStateAllocator_(0),
+          schemaStack_(allocator, schemaStackCapacity),
+          documentStack_(allocator, documentStackCapacity),
+          outputHandler_(0),
+          error_(kObjectType),
+          currentError_(),
+          missingDependents_(),
+          valid_(true),
+          flags_(kValidateDefaultFlags),
+          depth_(depth)
     {
-        RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaValidator::GenericSchemaValidator (internal)", basePath && basePathSize ? basePath : "");
-        if (basePath && basePathSize)
+        RAPIDJSON_SCHEMA_PRINT(Method,
+                               "GenericSchemaValidator::GenericSchemaValidator (internal)",
+                               basePath && basePathSize ? basePath : "");
+        if(basePath && basePathSize)
             memcpy(documentStack_.template Push<char>(basePathSize), basePath, basePathSize);
     }
 
-    StateAllocator& GetStateAllocator() {
-        if (!stateAllocator_)
+    StateAllocator& GetStateAllocator()
+    {
+        if(!stateAllocator_)
             stateAllocator_ = ownStateAllocator_ = RAPIDJSON_NEW(StateAllocator)();
         return *stateAllocator_;
     }
 
-    bool GetContinueOnErrors() const {
-        return flags_ & kValidateContinueOnErrorFlag;
-    }
+    bool GetContinueOnErrors() const { return flags_ & kValidateContinueOnErrorFlag; }
 
-    bool BeginValue() {
+    bool BeginValue()
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaValidator::BeginValue");
-        if (schemaStack_.Empty())
+        if(schemaStack_.Empty())
             PushSchema(root_);
-        else {
-            if (CurrentContext().inArray)
-                internal::TokenHelper<internal::Stack<StateAllocator>, Ch>::AppendIndexToken(documentStack_, CurrentContext().arrayElementIndex);
+        else
+        {
+            if(CurrentContext().inArray)
+                internal::TokenHelper<internal::Stack<StateAllocator>, Ch>::AppendIndexToken(
+                    documentStack_, CurrentContext().arrayElementIndex);
 
-            if (!CurrentSchema().BeginValue(CurrentContext()) && !GetContinueOnErrors())
+            if(!CurrentSchema().BeginValue(CurrentContext()) && !GetContinueOnErrors())
                 return false;
 
-            SizeType count = CurrentContext().patternPropertiesSchemaCount;
+            SizeType count        = CurrentContext().patternPropertiesSchemaCount;
             const SchemaType** sa = CurrentContext().patternPropertiesSchemas;
-            typename Context::PatternValidatorType patternValidatorType = CurrentContext().valuePatternValidatorType;
+            typename Context::PatternValidatorType patternValidatorType =
+                CurrentContext().valuePatternValidatorType;
             bool valueUniqueness = CurrentContext().valueUniqueness;
             RAPIDJSON_ASSERT(CurrentContext().valueSchema);
             PushSchema(*CurrentContext().valueSchema);
 
-            if (count > 0) {
+            if(count > 0)
+            {
                 CurrentContext().objectPatternValidatorType = patternValidatorType;
-                ISchemaValidator**& va = CurrentContext().patternPropertiesValidators;
+                ISchemaValidator**& va   = CurrentContext().patternPropertiesValidators;
                 SizeType& validatorCount = CurrentContext().patternPropertiesValidatorCount;
-                va = static_cast<ISchemaValidator**>(MallocState(sizeof(ISchemaValidator*) * count));
+                va =
+                    static_cast<ISchemaValidator**>(MallocState(sizeof(ISchemaValidator*) * count));
                 std::memset(va, 0, sizeof(ISchemaValidator*) * count);
-                for (SizeType i = 0; i < count; i++)
-                    va[validatorCount++] = CreateSchemaValidator(*sa[i], true);  // Inherit continueOnError
+                for(SizeType i = 0; i < count; i++)
+                    va[validatorCount++] =
+                        CreateSchemaValidator(*sa[i], true); // Inherit continueOnError
             }
 
             CurrentContext().arrayUniqueness = valueUniqueness;
@@ -3010,35 +3860,48 @@ private:
         return true;
     }
 
-    bool EndValue() {
+    bool EndValue()
+    {
         RAPIDJSON_SCHEMA_PRINT(Method, "GenericSchemaValidator::EndValue");
-        if (!CurrentSchema().EndValue(CurrentContext()) && !GetContinueOnErrors())
+        if(!CurrentSchema().EndValue(CurrentContext()) && !GetContinueOnErrors())
             return false;
 
         GenericStringBuffer<EncodingType> sb;
         schemaDocument_->GetPointer(&CurrentSchema()).StringifyUriFragment(sb);
         *documentStack_.template Push<Ch>() = '\0';
         documentStack_.template Pop<Ch>(1);
-        RAPIDJSON_SCHEMA_PRINT(ValidatorPointers, sb.GetString(), documentStack_.template Bottom<Ch>(), depth_);
+        RAPIDJSON_SCHEMA_PRINT(
+            ValidatorPointers, sb.GetString(), documentStack_.template Bottom<Ch>(), depth_);
         void* hasher = CurrentContext().hasher;
-        uint64_t h = hasher && CurrentContext().arrayUniqueness ? static_cast<HasherType*>(hasher)->GetHashCode() : 0;
-        
+        uint64_t h   = hasher && CurrentContext().arrayUniqueness
+                           ? static_cast<HasherType*>(hasher)->GetHashCode()
+                           : 0;
+
         PopSchema();
 
-        if (!schemaStack_.Empty()) {
+        if(!schemaStack_.Empty())
+        {
             Context& context = CurrentContext();
             // Only check uniqueness if there is a hasher
-            if (hasher && context.valueUniqueness) {
+            if(hasher && context.valueUniqueness)
+            {
                 HashCodeArray* a = static_cast<HashCodeArray*>(context.arrayElementHashCodes);
-                if (!a)
-                    CurrentContext().arrayElementHashCodes = a = new (GetStateAllocator().Malloc(sizeof(HashCodeArray))) HashCodeArray(kArrayType);
-                for (typename HashCodeArray::ConstValueIterator itr = a->Begin(); itr != a->End(); ++itr)
-                    if (itr->GetUint64() == h) {
+                if(!a)
+                    CurrentContext().arrayElementHashCodes = a =
+                        new(GetStateAllocator().Malloc(sizeof(HashCodeArray)))
+                            HashCodeArray(kArrayType);
+                for(typename HashCodeArray::ConstValueIterator itr = a->Begin(); itr != a->End();
+                    ++itr)
+                    if(itr->GetUint64() == h)
+                    {
                         DuplicateItems(static_cast<SizeType>(itr - a->Begin()), a->Size());
                         // Cleanup before returning if continuing
-                        if (GetContinueOnErrors()) {
+                        if(GetContinueOnErrors())
+                        {
                             a->PushBack(h, GetStateAllocator());
-                            while (!documentStack_.Empty() && *documentStack_.template Pop<Ch>(1) != '/');
+                            while(!documentStack_.Empty() &&
+                                  *documentStack_.template Pop<Ch>(1) != '/')
+                                ;
                         }
                         RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorUniqueItems);
                     }
@@ -3047,21 +3910,26 @@ private:
         }
 
         // Remove the last token of document pointer
-        while (!documentStack_.Empty() && *documentStack_.template Pop<Ch>(1) != '/')
+        while(!documentStack_.Empty() && *documentStack_.template Pop<Ch>(1) != '/')
             ;
 
         return true;
     }
 
-    void AppendToken(const Ch* str, SizeType len) {
-        documentStack_.template Reserve<Ch>(1 + len * 2); // worst case all characters are escaped as two characters
+    void AppendToken(const Ch* str, SizeType len)
+    {
+        documentStack_.template Reserve<Ch>(
+            1 + len * 2); // worst case all characters are escaped as two characters
         *documentStack_.template PushUnsafe<Ch>() = '/';
-        for (SizeType i = 0; i < len; i++) {
-            if (str[i] == '~') {
+        for(SizeType i = 0; i < len; i++)
+        {
+            if(str[i] == '~')
+            {
                 *documentStack_.template PushUnsafe<Ch>() = '~';
                 *documentStack_.template PushUnsafe<Ch>() = '0';
             }
-            else if (str[i] == '/') {
+            else if(str[i] == '/')
+            {
                 *documentStack_.template PushUnsafe<Ch>() = '~';
                 *documentStack_.template PushUnsafe<Ch>() = '1';
             }
@@ -3070,49 +3938,64 @@ private:
         }
     }
 
-    RAPIDJSON_FORCEINLINE void PushSchema(const SchemaType& schema) { new (schemaStack_.template Push<Context>()) Context(*this, *this, &schema, flags_); }
-    
-    RAPIDJSON_FORCEINLINE void PopSchema() {
+    RAPIDJSON_FORCEINLINE void PushSchema(const SchemaType& schema)
+    {
+        new(schemaStack_.template Push<Context>()) Context(*this, *this, &schema, flags_);
+    }
+
+    RAPIDJSON_FORCEINLINE void PopSchema()
+    {
         Context* c = schemaStack_.template Pop<Context>(1);
-        if (HashCodeArray* a = static_cast<HashCodeArray*>(c->arrayElementHashCodes)) {
+        if(HashCodeArray* a = static_cast<HashCodeArray*>(c->arrayElementHashCodes))
+        {
             a->~HashCodeArray();
             StateAllocator::Free(a);
         }
         c->~Context();
     }
 
-    void AddErrorInstanceLocation(ValueType& result, bool parent) {
+    void AddErrorInstanceLocation(ValueType& result, bool parent)
+    {
         GenericStringBuffer<EncodingType> sb;
         PointerType instancePointer = GetInvalidDocumentPointer();
         ((parent && instancePointer.GetTokenCount() > 0)
-         ? PointerType(instancePointer.GetTokens(), instancePointer.GetTokenCount() - 1)
-         : instancePointer).StringifyUriFragment(sb);
-        ValueType instanceRef(sb.GetString(), static_cast<SizeType>(sb.GetSize() / sizeof(Ch)),
-                              GetStateAllocator());
+             ? PointerType(instancePointer.GetTokens(), instancePointer.GetTokenCount() - 1)
+             : instancePointer)
+            .StringifyUriFragment(sb);
+        ValueType instanceRef(
+            sb.GetString(), static_cast<SizeType>(sb.GetSize() / sizeof(Ch)), GetStateAllocator());
         result.AddMember(GetInstanceRefString(), instanceRef, GetStateAllocator());
     }
 
-    void AddErrorSchemaLocation(ValueType& result, PointerType schema = PointerType()) {
+    void AddErrorSchemaLocation(ValueType& result, PointerType schema = PointerType())
+    {
         GenericStringBuffer<EncodingType> sb;
         SizeType len = CurrentSchema().GetURI().GetStringLength();
-        if (len) memcpy(sb.Push(len), CurrentSchema().GetURI().GetString(), len * sizeof(Ch));
-        if (schema.GetTokenCount()) schema.StringifyUriFragment(sb);
-        else GetInvalidSchemaPointer().StringifyUriFragment(sb);
-        ValueType schemaRef(sb.GetString(), static_cast<SizeType>(sb.GetSize() / sizeof(Ch)),
-            GetStateAllocator());
+        if(len)
+            memcpy(sb.Push(len), CurrentSchema().GetURI().GetString(), len * sizeof(Ch));
+        if(schema.GetTokenCount())
+            schema.StringifyUriFragment(sb);
+        else
+            GetInvalidSchemaPointer().StringifyUriFragment(sb);
+        ValueType schemaRef(
+            sb.GetString(), static_cast<SizeType>(sb.GetSize() / sizeof(Ch)), GetStateAllocator());
         result.AddMember(GetSchemaRefString(), schemaRef, GetStateAllocator());
     }
 
-    void AddErrorCode(ValueType& result, const ValidateErrorCode code) {
+    void AddErrorCode(ValueType& result, const ValidateErrorCode code)
+    {
         result.AddMember(GetErrorCodeString(), code, GetStateAllocator());
     }
 
-    void AddError(ValueType& keyword, ValueType& error) {
+    void AddError(ValueType& keyword, ValueType& error)
+    {
         typename ValueType::MemberIterator member = error_.FindMember(keyword);
-        if (member == error_.MemberEnd())
+        if(member == error_.MemberEnd())
             error_.AddMember(keyword, error, GetStateAllocator());
-        else {
-            if (member->value.IsObject()) {
+        else
+        {
+            if(member->value.IsObject())
+            {
                 ValueType errors(kArrayType);
                 errors.PushBack(member->value, GetStateAllocator());
                 member->value = errors;
@@ -3121,51 +4004,71 @@ private:
         }
     }
 
-    void AddCurrentError(const ValidateErrorCode code, bool parent = false) {
+    void AddCurrentError(const ValidateErrorCode code, bool parent = false)
+    {
         AddErrorCode(currentError_, code);
         AddErrorInstanceLocation(currentError_, parent);
         AddErrorSchemaLocation(currentError_);
-        AddError(ValueType(SchemaType::GetValidateErrorKeyword(code), GetStateAllocator(), false).Move(), currentError_);
+        AddError(
+            ValueType(SchemaType::GetValidateErrorKeyword(code), GetStateAllocator(), false).Move(),
+            currentError_);
     }
 
-    void MergeError(ValueType& other) {
-        for (typename ValueType::MemberIterator it = other.MemberBegin(), end = other.MemberEnd(); it != end; ++it) {
+    void MergeError(ValueType& other)
+    {
+        for(typename ValueType::MemberIterator it = other.MemberBegin(), end = other.MemberEnd();
+            it != end;
+            ++it)
+        {
             AddError(it->name, it->value);
         }
     }
 
-    void AddNumberError(const ValidateErrorCode code, ValueType& actual, const SValue& expected,
-        const typename SchemaType::ValueType& (*exclusive)() = 0) {
+    void AddNumberError(const ValidateErrorCode code,
+                        ValueType& actual,
+                        const SValue& expected,
+                        const typename SchemaType::ValueType& (*exclusive)() = 0)
+    {
         currentError_.SetObject();
         currentError_.AddMember(GetActualString(), actual, GetStateAllocator());
-        currentError_.AddMember(GetExpectedString(), ValueType(expected, GetStateAllocator()).Move(), GetStateAllocator());
-        if (exclusive)
-            currentError_.AddMember(ValueType(exclusive(), GetStateAllocator()).Move(), true, GetStateAllocator());
+        currentError_.AddMember(GetExpectedString(),
+                                ValueType(expected, GetStateAllocator()).Move(),
+                                GetStateAllocator());
+        if(exclusive)
+            currentError_.AddMember(
+                ValueType(exclusive(), GetStateAllocator()).Move(), true, GetStateAllocator());
         AddCurrentError(code);
     }
 
-    void AddErrorArray(const ValidateErrorCode code,
-        ISchemaValidator** subvalidators, SizeType count) {
+    void
+    AddErrorArray(const ValidateErrorCode code, ISchemaValidator** subvalidators, SizeType count)
+    {
         ValueType errors(kArrayType);
-        for (SizeType i = 0; i < count; ++i)
-            errors.PushBack(static_cast<GenericSchemaValidator*>(subvalidators[i])->GetError(), GetStateAllocator());
+        for(SizeType i = 0; i < count; ++i)
+            errors.PushBack(static_cast<GenericSchemaValidator*>(subvalidators[i])->GetError(),
+                            GetStateAllocator());
         currentError_.SetObject();
         currentError_.AddMember(GetErrorsString(), errors, GetStateAllocator());
         AddCurrentError(code);
     }
 
-    const SchemaType& CurrentSchema() const { return *schemaStack_.template Top<Context>()->schema; }
+    const SchemaType& CurrentSchema() const
+    {
+        return *schemaStack_.template Top<Context>()->schema;
+    }
     Context& CurrentContext() { return *schemaStack_.template Top<Context>(); }
     const Context& CurrentContext() const { return *schemaStack_.template Top<Context>(); }
 
-    static const size_t kDefaultSchemaStackCapacity = 1024;
+    static const size_t kDefaultSchemaStackCapacity   = 1024;
     static const size_t kDefaultDocumentStackCapacity = 256;
     const SchemaDocumentType* schemaDocument_;
     const SchemaType& root_;
     StateAllocator* stateAllocator_;
     StateAllocator* ownStateAllocator_;
-    internal::Stack<StateAllocator> schemaStack_;    //!< stack to store the current path of schema (BaseSchemaType *)
-    internal::Stack<StateAllocator> documentStack_;  //!< stack to store the current path of validating document (Ch)
+    internal::Stack<StateAllocator>
+        schemaStack_; //!< stack to store the current path of schema (BaseSchemaType *)
+    internal::Stack<StateAllocator>
+        documentStack_; //!< stack to store the current path of validating document (Ch)
     OutputHandler* outputHandler_;
     ValueType error_;
     ValueType currentError_;
@@ -3190,14 +4093,14 @@ typedef GenericSchemaValidator<SchemaDocument> SchemaValidator;
     \tparam SchemaDocumentType Type of schema document.
     \tparam StackAllocator Allocator type for stack.
 */
-template <
-    unsigned parseFlags,
-    typename InputStream,
-    typename SourceEncoding,
-    typename SchemaDocumentType = SchemaDocument,
-    typename StackAllocator = CrtAllocator>
-class SchemaValidatingReader {
-public:
+template <unsigned parseFlags,
+          typename InputStream,
+          typename SourceEncoding,
+          typename SchemaDocumentType = SchemaDocument,
+          typename StackAllocator     = CrtAllocator>
+class SchemaValidatingReader
+{
+    public:
     typedef typename SchemaDocumentType::PointerType PointerType;
     typedef typename InputStream::Ch Ch;
     typedef GenericValue<SourceEncoding, StackAllocator> ValueType;
@@ -3207,25 +4110,37 @@ public:
         \param is Input stream.
         \param sd Schema document.
     */
-    SchemaValidatingReader(InputStream& is, const SchemaDocumentType& sd) : is_(is), sd_(sd), invalidSchemaKeyword_(), invalidSchemaCode_(kValidateErrorNone), error_(kObjectType), isValid_(true) {}
+    SchemaValidatingReader(InputStream& is, const SchemaDocumentType& sd)
+        : is_(is),
+          sd_(sd),
+          invalidSchemaKeyword_(),
+          invalidSchemaCode_(kValidateErrorNone),
+          error_(kObjectType),
+          isValid_(true)
+    {
+    }
 
     template <typename Handler>
-    bool operator()(Handler& handler) {
-        GenericReader<SourceEncoding, typename SchemaDocumentType::EncodingType, StackAllocator> reader;
+    bool operator()(Handler& handler)
+    {
+        GenericReader<SourceEncoding, typename SchemaDocumentType::EncodingType, StackAllocator>
+            reader;
         GenericSchemaValidator<SchemaDocumentType, Handler> validator(sd_, handler);
         parseResult_ = reader.template Parse<parseFlags>(is_, validator);
 
         isValid_ = validator.IsValid();
-        if (isValid_) {
-            invalidSchemaPointer_ = PointerType();
-            invalidSchemaKeyword_ = 0;
+        if(isValid_)
+        {
+            invalidSchemaPointer_   = PointerType();
+            invalidSchemaKeyword_   = 0;
             invalidDocumentPointer_ = PointerType();
             error_.SetObject();
         }
-        else {
-            invalidSchemaPointer_ = validator.GetInvalidSchemaPointer();
-            invalidSchemaKeyword_ = validator.GetInvalidSchemaKeyword();
-            invalidSchemaCode_ = validator.GetInvalidSchemaCode();
+        else
+        {
+            invalidSchemaPointer_   = validator.GetInvalidSchemaPointer();
+            invalidSchemaKeyword_   = validator.GetInvalidSchemaKeyword();
+            invalidSchemaCode_      = validator.GetInvalidSchemaCode();
             invalidDocumentPointer_ = validator.GetInvalidDocumentPointer();
             error_.CopyFrom(validator.GetError(), allocator_);
         }
@@ -3241,7 +4156,7 @@ public:
     const ValueType& GetError() const { return error_; }
     ValidateErrorCode GetInvalidSchemaCode() const { return invalidSchemaCode_; }
 
-private:
+    private:
     InputStream& is_;
     const SchemaDocumentType& sd_;
 
diff --git a/include/rapidjson/stream.h b/include/rapidjson/stream.h
index 1fd70915c5..3839489914 100644
--- a/include/rapidjson/stream.h
+++ b/include/rapidjson/stream.h
@@ -69,34 +69,41 @@ concept Stream {
     For custom stream, this type can be specialized for other configuration.
     See TEST(Reader, CustomStringStream) in readertest.cpp for example.
 */
-template<typename Stream>
-struct StreamTraits {
+template <typename Stream>
+struct StreamTraits
+{
     //! Whether to make local copy of stream for optimization during parsing.
     /*!
         By default, for safety, streams do not use local copy optimization.
         Stream that can be copied fast should specialize this, like StreamTraits<StringStream>.
     */
-    enum { copyOptimization = 0 };
+    enum
+    {
+        copyOptimization = 0
+    };
 };
 
 //! Reserve n characters for writing to a stream.
-template<typename Stream>
-inline void PutReserve(Stream& stream, size_t count) {
+template <typename Stream>
+inline void PutReserve(Stream& stream, size_t count)
+{
     (void)stream;
     (void)count;
 }
 
 //! Write character to a stream, presuming buffer is reserved.
-template<typename Stream>
-inline void PutUnsafe(Stream& stream, typename Stream::Ch c) {
+template <typename Stream>
+inline void PutUnsafe(Stream& stream, typename Stream::Ch c)
+{
     stream.Put(c);
 }
 
 //! Put N copies of a character to a stream.
-template<typename Stream, typename Ch>
-inline void PutN(Stream& stream, Ch c, size_t n) {
+template <typename Stream, typename Ch>
+inline void PutN(Stream& stream, Ch c, size_t n)
+{
     PutReserve(stream, n);
-    for (size_t i = 0; i < n; i++)
+    for(size_t i = 0; i < n; i++)
         PutUnsafe(stream, c);
 }
 
@@ -111,15 +118,16 @@ inline void PutN(Stream& stream, Ch c, size_t n) {
 
 #if defined(_MSC_VER) && _MSC_VER <= 1800
 RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(4702)  // unreachable code
-RAPIDJSON_DIAG_OFF(4512)  // assignment operator could not be generated
+RAPIDJSON_DIAG_OFF(4702) // unreachable code
+RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
 #endif
 
-template <typename InputStream, typename Encoding = UTF8<> >
-class GenericStreamWrapper {
-public:
+template <typename InputStream, typename Encoding = UTF8<>>
+class GenericStreamWrapper
+{
+    public:
     typedef typename Encoding::Ch Ch;
-    GenericStreamWrapper(InputStream& is): is_(is) {}
+    GenericStreamWrapper(InputStream& is) : is_(is) {}
 
     Ch Peek() const { return is_.Peek(); }
     Ch Take() { return is_.Take(); }
@@ -136,7 +144,7 @@ public:
     UTFType GetType() const { return is_.GetType(); }
     bool HasBOM() const { return is_.HasBOM(); }
 
-protected:
+    protected:
     InputStream& is_;
 };
 
@@ -149,33 +157,46 @@ RAPIDJSON_DIAG_POP
 
 //! Read-only string stream.
 /*! \note implements Stream concept
-*/
+ */
 template <typename Encoding>
-struct GenericStringStream {
+struct GenericStringStream
+{
     typedef typename Encoding::Ch Ch;
 
-    GenericStringStream(const Ch *src) : src_(src), head_(src) {}
+    GenericStringStream(const Ch* src) : src_(src), head_(src) {}
 
     Ch Peek() const { return *src_; }
     Ch Take() { return *src_++; }
     size_t Tell() const { return static_cast<size_t>(src_ - head_); }
 
-    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    Ch* PutBegin()
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
     void Put(Ch) { RAPIDJSON_ASSERT(false); }
     void Flush() { RAPIDJSON_ASSERT(false); }
-    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+    size_t PutEnd(Ch*)
+    {
+        RAPIDJSON_ASSERT(false);
+        return 0;
+    }
 
-    const Ch* src_;     //!< Current read position.
-    const Ch* head_;    //!< Original head of the string.
+    const Ch* src_;  //!< Current read position.
+    const Ch* head_; //!< Original head of the string.
 };
 
 template <typename Encoding>
-struct StreamTraits<GenericStringStream<Encoding> > {
-    enum { copyOptimization = 1 };
+struct StreamTraits<GenericStringStream<Encoding>>
+{
+    enum
+    {
+        copyOptimization = 1
+    };
 };
 
 //! String stream with UTF8 encoding.
-typedef GenericStringStream<UTF8<> > StringStream;
+typedef GenericStringStream<UTF8<>> StringStream;
 
 ///////////////////////////////////////////////////////////////////////////////
 // InsituStringStream
@@ -185,10 +206,11 @@ typedef GenericStringStream<UTF8<> > StringStream;
     \note implements Stream concept
 */
 template <typename Encoding>
-struct GenericInsituStringStream {
+struct GenericInsituStringStream
+{
     typedef typename Encoding::Ch Ch;
 
-    GenericInsituStringStream(Ch *src) : src_(src), dst_(0), head_(src) {}
+    GenericInsituStringStream(Ch* src) : src_(src), dst_(0), head_(src) {}
 
     // Read
     Ch Peek() { return *src_; }
@@ -196,13 +218,22 @@ struct GenericInsituStringStream {
     size_t Tell() { return static_cast<size_t>(src_ - head_); }
 
     // Write
-    void Put(Ch c) { RAPIDJSON_ASSERT(dst_ != 0); *dst_++ = c; }
+    void Put(Ch c)
+    {
+        RAPIDJSON_ASSERT(dst_ != 0);
+        *dst_++ = c;
+    }
 
     Ch* PutBegin() { return dst_ = src_; }
     size_t PutEnd(Ch* begin) { return static_cast<size_t>(dst_ - begin); }
     void Flush() {}
 
-    Ch* Push(size_t count) { Ch* begin = dst_; dst_ += count; return begin; }
+    Ch* Push(size_t count)
+    {
+        Ch* begin = dst_;
+        dst_ += count;
+        return begin;
+    }
     void Pop(size_t count) { dst_ -= count; }
 
     Ch* src_;
@@ -211,12 +242,16 @@ struct GenericInsituStringStream {
 };
 
 template <typename Encoding>
-struct StreamTraits<GenericInsituStringStream<Encoding> > {
-    enum { copyOptimization = 1 };
+struct StreamTraits<GenericInsituStringStream<Encoding>>
+{
+    enum
+    {
+        copyOptimization = 1
+    };
 };
 
 //! Insitu string stream with UTF8 encoding.
-typedef GenericInsituStringStream<UTF8<> > InsituStringStream;
+typedef GenericInsituStringStream<UTF8<>> InsituStringStream;
 
 RAPIDJSON_NAMESPACE_END
 
diff --git a/include/rapidjson/stringbuffer.h b/include/rapidjson/stringbuffer.h
index 82ad3ca6bb..163d68840b 100644
--- a/include/rapidjson/stringbuffer.h
+++ b/include/rapidjson/stringbuffer.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_STRINGBUFFER_H_
@@ -26,7 +26,7 @@
 
 #if defined(__clang__)
 RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(c++98-compat)
+RAPIDJSON_DIAG_OFF(c++ 98 - compat)
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
@@ -38,16 +38,21 @@ RAPIDJSON_NAMESPACE_BEGIN
     \note implements Stream concept
 */
 template <typename Encoding, typename Allocator = CrtAllocator>
-class GenericStringBuffer {
-public:
+class GenericStringBuffer
+{
+    public:
     typedef typename Encoding::Ch Ch;
 
-    GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {}
+    GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity)
+        : stack_(allocator, capacity)
+    {
+    }
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
     GenericStringBuffer(GenericStringBuffer&& rhs) : stack_(std::move(rhs.stack_)) {}
-    GenericStringBuffer& operator=(GenericStringBuffer&& rhs) {
-        if (&rhs != this)
+    GenericStringBuffer& operator=(GenericStringBuffer&& rhs)
+    {
+        if(&rhs != this)
             stack_ = std::move(rhs.stack_);
         return *this;
     }
@@ -58,7 +63,8 @@ public:
     void Flush() {}
 
     void Clear() { stack_.Clear(); }
-    void ShrinkToFit() {
+    void ShrinkToFit()
+    {
         // Push and pop a null terminator. This is safe.
         *stack_.template Push<Ch>() = '\0';
         stack_.ShrinkToFit();
@@ -70,7 +76,8 @@ public:
     Ch* PushUnsafe(size_t count) { return stack_.template PushUnsafe<Ch>(count); }
     void Pop(size_t count) { stack_.template Pop<Ch>(count); }
 
-    const Ch* GetString() const {
+    const Ch* GetString() const
+    {
         // Push and pop a null terminator. This is safe.
         *stack_.template Push<Ch>() = '\0';
         stack_.template Pop<Ch>(1);
@@ -87,28 +94,31 @@ public:
     static const size_t kDefaultCapacity = 256;
     mutable internal::Stack<Allocator> stack_;
 
-private:
+    private:
     // Prohibit copy constructor & assignment operator.
     GenericStringBuffer(const GenericStringBuffer&);
     GenericStringBuffer& operator=(const GenericStringBuffer&);
 };
 
 //! String buffer with UTF8 encoding
-typedef GenericStringBuffer<UTF8<> > StringBuffer;
+typedef GenericStringBuffer<UTF8<>> StringBuffer;
 
-template<typename Encoding, typename Allocator>
-inline void PutReserve(GenericStringBuffer<Encoding, Allocator>& stream, size_t count) {
+template <typename Encoding, typename Allocator>
+inline void PutReserve(GenericStringBuffer<Encoding, Allocator>& stream, size_t count)
+{
     stream.Reserve(count);
 }
 
-template<typename Encoding, typename Allocator>
-inline void PutUnsafe(GenericStringBuffer<Encoding, Allocator>& stream, typename Encoding::Ch c) {
+template <typename Encoding, typename Allocator>
+inline void PutUnsafe(GenericStringBuffer<Encoding, Allocator>& stream, typename Encoding::Ch c)
+{
     stream.PutUnsafe(c);
 }
 
 //! Implement specialized version of PutN() with memset() for better performance.
-template<>
-inline void PutN(GenericStringBuffer<UTF8<> >& stream, char c, size_t n) {
+template <>
+inline void PutN(GenericStringBuffer<UTF8<>>& stream, char c, size_t n)
+{
     std::memset(stream.stack_.Push<char>(n), c, n * sizeof(c));
 }
 
diff --git a/include/rapidjson/uri.h b/include/rapidjson/uri.h
index f93e508a4f..cd00488548 100644
--- a/include/rapidjson/uri.h
+++ b/include/rapidjson/uri.h
@@ -19,7 +19,7 @@
 
 #if defined(__clang__)
 RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(c++98-compat)
+RAPIDJSON_DIAG_OFF(c++ 98 - compat)
 #elif defined(_MSC_VER)
 RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
 #endif
@@ -29,66 +29,141 @@ RAPIDJSON_NAMESPACE_BEGIN
 ///////////////////////////////////////////////////////////////////////////////
 // GenericUri
 
-template <typename ValueType, typename Allocator=CrtAllocator>
-class GenericUri {
-public:
+template <typename ValueType, typename Allocator = CrtAllocator>
+class GenericUri
+{
+    public:
     typedef typename ValueType::Ch Ch;
 #if RAPIDJSON_HAS_STDSTRING
     typedef std::basic_string<Ch> String;
 #endif
 
     //! Constructors
-    GenericUri(Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
+    GenericUri(Allocator* allocator = 0)
+        : uri_(),
+          base_(),
+          scheme_(),
+          auth_(),
+          path_(),
+          query_(),
+          frag_(),
+          allocator_(allocator),
+          ownAllocator_()
+    {
     }
 
-    GenericUri(const Ch* uri, SizeType len, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
+    GenericUri(const Ch* uri, SizeType len, Allocator* allocator = 0)
+        : uri_(),
+          base_(),
+          scheme_(),
+          auth_(),
+          path_(),
+          query_(),
+          frag_(),
+          allocator_(allocator),
+          ownAllocator_()
+    {
         Parse(uri, len);
     }
 
-    GenericUri(const Ch* uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
+    GenericUri(const Ch* uri, Allocator* allocator = 0)
+        : uri_(),
+          base_(),
+          scheme_(),
+          auth_(),
+          path_(),
+          query_(),
+          frag_(),
+          allocator_(allocator),
+          ownAllocator_()
+    {
         Parse(uri, internal::StrLen<Ch>(uri));
     }
 
     // Use with specializations of GenericValue
-    template<typename T> GenericUri(const T& uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
+    template <typename T>
+    GenericUri(const T& uri, Allocator* allocator = 0)
+        : uri_(),
+          base_(),
+          scheme_(),
+          auth_(),
+          path_(),
+          query_(),
+          frag_(),
+          allocator_(allocator),
+          ownAllocator_()
+    {
         const Ch* u = uri.template Get<const Ch*>(); // TypeHelper from document.h
         Parse(u, internal::StrLen<Ch>(u));
     }
 
 #if RAPIDJSON_HAS_STDSTRING
-    GenericUri(const String& uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
+    GenericUri(const String& uri, Allocator* allocator = 0)
+        : uri_(),
+          base_(),
+          scheme_(),
+          auth_(),
+          path_(),
+          query_(),
+          frag_(),
+          allocator_(allocator),
+          ownAllocator_()
+    {
         Parse(uri.c_str(), internal::StrLen<Ch>(uri.c_str()));
     }
 #endif
 
     //! Copy constructor
-    GenericUri(const GenericUri& rhs) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(), ownAllocator_() {
+    GenericUri(const GenericUri& rhs)
+        : uri_(),
+          base_(),
+          scheme_(),
+          auth_(),
+          path_(),
+          query_(),
+          frag_(),
+          allocator_(),
+          ownAllocator_()
+    {
         *this = rhs;
     }
 
     //! Copy constructor
-    GenericUri(const GenericUri& rhs, Allocator* allocator) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
+    GenericUri(const GenericUri& rhs, Allocator* allocator)
+        : uri_(),
+          base_(),
+          scheme_(),
+          auth_(),
+          path_(),
+          query_(),
+          frag_(),
+          allocator_(allocator),
+          ownAllocator_()
+    {
         *this = rhs;
     }
 
     //! Destructor.
-    ~GenericUri() {
+    ~GenericUri()
+    {
         Free();
         RAPIDJSON_DELETE(ownAllocator_);
     }
 
     //! Assignment operator
-    GenericUri& operator=(const GenericUri& rhs) {
-        if (this != &rhs) {
+    GenericUri& operator=(const GenericUri& rhs)
+    {
+        if(this != &rhs)
+        {
             // Do not delete ownAllocator
             Free();
             Allocate(rhs.GetStringLength());
-            auth_ = CopyPart(scheme_, rhs.scheme_, rhs.GetSchemeStringLength());
-            path_ = CopyPart(auth_, rhs.auth_, rhs.GetAuthStringLength());
+            auth_  = CopyPart(scheme_, rhs.scheme_, rhs.GetSchemeStringLength());
+            path_  = CopyPart(auth_, rhs.auth_, rhs.GetAuthStringLength());
             query_ = CopyPart(path_, rhs.path_, rhs.GetPathStringLength());
-            frag_ = CopyPart(query_, rhs.query_, rhs.GetQueryStringLength());
-            base_ = CopyPart(frag_, rhs.frag_, rhs.GetFragStringLength());
-            uri_ = CopyPart(base_, rhs.base_, rhs.GetBaseStringLength());
+            frag_  = CopyPart(query_, rhs.query_, rhs.GetQueryStringLength());
+            base_  = CopyPart(frag_, rhs.frag_, rhs.GetFragStringLength());
+            uri_   = CopyPart(base_, rhs.base_, rhs.GetBaseStringLength());
             CopyPart(uri_, rhs.uri_, rhs.GetStringLength());
         }
         return *this;
@@ -96,7 +171,9 @@ public:
 
     //! Getters
     // Use with specializations of GenericValue
-    template<typename T> void Get(T& uri, Allocator& allocator) {
+    template <typename T>
+    void Get(T& uri, Allocator& allocator)
+    {
         uri.template Set<const Ch*>(this->GetString(), allocator); // TypeHelper from document.h
     }
 
@@ -105,7 +182,10 @@ public:
     const Ch* GetBaseString() const { return base_; }
     SizeType GetBaseStringLength() const { return base_ == 0 ? 0 : internal::StrLen<Ch>(base_); }
     const Ch* GetSchemeString() const { return scheme_; }
-    SizeType GetSchemeStringLength() const { return scheme_ == 0 ? 0 : internal::StrLen<Ch>(scheme_); }
+    SizeType GetSchemeStringLength() const
+    {
+        return scheme_ == 0 ? 0 : internal::StrLen<Ch>(scheme_);
+    }
     const Ch* GetAuthString() const { return auth_; }
     SizeType GetAuthStringLength() const { return auth_ == 0 ? 0 : internal::StrLen<Ch>(auth_); }
     const Ch* GetPathString() const { return path_; }
@@ -116,36 +196,59 @@ public:
     SizeType GetFragStringLength() const { return frag_ == 0 ? 0 : internal::StrLen<Ch>(frag_); }
 
 #if RAPIDJSON_HAS_STDSTRING
-    static String Get(const GenericUri& uri) { return String(uri.GetString(), uri.GetStringLength()); }
-    static String GetBase(const GenericUri& uri) { return String(uri.GetBaseString(), uri.GetBaseStringLength()); }
-    static String GetScheme(const GenericUri& uri) { return String(uri.GetSchemeString(), uri.GetSchemeStringLength()); }
-    static String GetAuth(const GenericUri& uri) { return String(uri.GetAuthString(), uri.GetAuthStringLength()); }
-    static String GetPath(const GenericUri& uri) { return String(uri.GetPathString(), uri.GetPathStringLength()); }
-    static String GetQuery(const GenericUri& uri) { return String(uri.GetQueryString(), uri.GetQueryStringLength()); }
-    static String GetFrag(const GenericUri& uri) { return String(uri.GetFragString(), uri.GetFragStringLength()); }
+    static String Get(const GenericUri& uri)
+    {
+        return String(uri.GetString(), uri.GetStringLength());
+    }
+    static String GetBase(const GenericUri& uri)
+    {
+        return String(uri.GetBaseString(), uri.GetBaseStringLength());
+    }
+    static String GetScheme(const GenericUri& uri)
+    {
+        return String(uri.GetSchemeString(), uri.GetSchemeStringLength());
+    }
+    static String GetAuth(const GenericUri& uri)
+    {
+        return String(uri.GetAuthString(), uri.GetAuthStringLength());
+    }
+    static String GetPath(const GenericUri& uri)
+    {
+        return String(uri.GetPathString(), uri.GetPathStringLength());
+    }
+    static String GetQuery(const GenericUri& uri)
+    {
+        return String(uri.GetQueryString(), uri.GetQueryStringLength());
+    }
+    static String GetFrag(const GenericUri& uri)
+    {
+        return String(uri.GetFragString(), uri.GetFragStringLength());
+    }
 #endif
 
     //! Equality operators
-    bool operator==(const GenericUri& rhs) const {
-        return Match(rhs, true);
-    }
+    bool operator==(const GenericUri& rhs) const { return Match(rhs, true); }
 
-    bool operator!=(const GenericUri& rhs) const {
-        return !Match(rhs, true);
-    }
+    bool operator!=(const GenericUri& rhs) const { return !Match(rhs, true); }
 
-    bool Match(const GenericUri& uri, bool full = true) const {
+    bool Match(const GenericUri& uri, bool full = true) const
+    {
         Ch* s1;
         Ch* s2;
-        if (full) {
+        if(full)
+        {
             s1 = uri_;
             s2 = uri.uri_;
-        } else {
+        }
+        else
+        {
             s1 = base_;
             s2 = uri.base_;
         }
-        if (s1 == s2) return true;
-        if (s1 == 0 || s2 == 0) return false;
+        if(s1 == s2)
+            return true;
+        if(s1 == 0 || s2 == 0)
+            return false;
         return internal::StrCmp<Ch>(s1, s2) == 0;
     }
 
@@ -153,56 +256,80 @@ public:
     // See https://tools.ietf.org/html/rfc3986
     // Use for resolving an id or $ref with an in-scope id.
     // Returns a new GenericUri for the resolved URI.
-    GenericUri Resolve(const GenericUri& baseuri, Allocator* allocator = 0) {
+    GenericUri Resolve(const GenericUri& baseuri, Allocator* allocator = 0)
+    {
         GenericUri resuri;
         resuri.allocator_ = allocator;
         // Ensure enough space for combining paths
         resuri.Allocate(GetStringLength() + baseuri.GetStringLength() + 1); // + 1 for joining slash
 
-        if (!(GetSchemeStringLength() == 0)) {
+        if(!(GetSchemeStringLength() == 0))
+        {
             // Use all of this URI
-            resuri.auth_ = CopyPart(resuri.scheme_, scheme_, GetSchemeStringLength());
-            resuri.path_ = CopyPart(resuri.auth_, auth_, GetAuthStringLength());
+            resuri.auth_  = CopyPart(resuri.scheme_, scheme_, GetSchemeStringLength());
+            resuri.path_  = CopyPart(resuri.auth_, auth_, GetAuthStringLength());
             resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength());
-            resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
+            resuri.frag_  = CopyPart(resuri.query_, query_, GetQueryStringLength());
             resuri.RemoveDotSegments();
-        } else {
+        }
+        else
+        {
             // Use the base scheme
-            resuri.auth_ = CopyPart(resuri.scheme_, baseuri.scheme_, baseuri.GetSchemeStringLength());
-            if (!(GetAuthStringLength() == 0)) {
+            resuri.auth_ =
+                CopyPart(resuri.scheme_, baseuri.scheme_, baseuri.GetSchemeStringLength());
+            if(!(GetAuthStringLength() == 0))
+            {
                 // Use this auth, path, query
-                resuri.path_ = CopyPart(resuri.auth_, auth_, GetAuthStringLength());
+                resuri.path_  = CopyPart(resuri.auth_, auth_, GetAuthStringLength());
                 resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength());
-                resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
+                resuri.frag_  = CopyPart(resuri.query_, query_, GetQueryStringLength());
                 resuri.RemoveDotSegments();
-            } else {
+            }
+            else
+            {
                 // Use the base auth
                 resuri.path_ = CopyPart(resuri.auth_, baseuri.auth_, baseuri.GetAuthStringLength());
-                if (GetPathStringLength() == 0) {
+                if(GetPathStringLength() == 0)
+                {
                     // Use the base path
-                    resuri.query_ = CopyPart(resuri.path_, baseuri.path_, baseuri.GetPathStringLength());
-                    if (GetQueryStringLength() == 0) {
+                    resuri.query_ =
+                        CopyPart(resuri.path_, baseuri.path_, baseuri.GetPathStringLength());
+                    if(GetQueryStringLength() == 0)
+                    {
                         // Use the base query
-                        resuri.frag_ = CopyPart(resuri.query_, baseuri.query_, baseuri.GetQueryStringLength());
-                    } else {
+                        resuri.frag_ =
+                            CopyPart(resuri.query_, baseuri.query_, baseuri.GetQueryStringLength());
+                    }
+                    else
+                    {
                         // Use this query
                         resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
                     }
-                } else {
-                    if (path_[0] == '/') {
+                }
+                else
+                {
+                    if(path_[0] == '/')
+                    {
                         // Absolute path - use all of this path
                         resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength());
                         resuri.RemoveDotSegments();
-                    } else {
-                        // Relative path - append this path to base path after base path's last slash
+                    }
+                    else
+                    {
+                        // Relative path - append this path to base path after base path's last
+                        // slash
                         size_t pos = 0;
-                        if (!(baseuri.GetAuthStringLength() == 0) && baseuri.GetPathStringLength() == 0) {
+                        if(!(baseuri.GetAuthStringLength() == 0) &&
+                           baseuri.GetPathStringLength() == 0)
+                        {
                             resuri.path_[pos] = '/';
                             pos++;
                         }
                         size_t lastslashpos = baseuri.GetPathStringLength();
-                        while (lastslashpos > 0) {
-                            if (baseuri.path_[lastslashpos - 1] == '/') break;
+                        while(lastslashpos > 0)
+                        {
+                            if(baseuri.path_[lastslashpos - 1] == '/')
+                                break;
                             lastslashpos--;
                         }
                         std::memcpy(&resuri.path_[pos], baseuri.path_, lastslashpos * sizeof(Ch));
@@ -228,74 +355,87 @@ public:
     //! Get the allocator of this GenericUri.
     Allocator& GetAllocator() { return *allocator_; }
 
-private:
+    private:
     // Allocate memory for a URI
     // Returns total amount allocated
-    std::size_t Allocate(std::size_t len) {
+    std::size_t Allocate(std::size_t len)
+    {
         // Create own allocator if user did not supply.
-        if (!allocator_)
-            ownAllocator_ =  allocator_ = RAPIDJSON_NEW(Allocator)();
+        if(!allocator_)
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
 
-        // Allocate one block containing each part of the URI (5) plus base plus full URI, all null terminated.
-        // Order: scheme, auth, path, query, frag, base, uri
-        // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
+        // Allocate one block containing each part of the URI (5) plus base plus full URI, all null
+        // terminated. Order: scheme, auth, path, query, frag, base, uri Note need to set,
+        // increment, assign in 3 stages to avoid compiler warning bug.
         size_t total = (3 * len + 7) * sizeof(Ch);
-        scheme_ = static_cast<Ch*>(allocator_->Malloc(total));
-        *scheme_ = '\0';
-        auth_ = scheme_;
+        scheme_      = static_cast<Ch*>(allocator_->Malloc(total));
+        *scheme_     = '\0';
+        auth_        = scheme_;
         auth_++;
         *auth_ = '\0';
-        path_ = auth_;
+        path_  = auth_;
         path_++;
         *path_ = '\0';
         query_ = path_;
         query_++;
         *query_ = '\0';
-        frag_ = query_;
+        frag_   = query_;
         frag_++;
         *frag_ = '\0';
-        base_ = frag_;
+        base_  = frag_;
         base_++;
         *base_ = '\0';
-        uri_ = base_;
+        uri_   = base_;
         uri_++;
         *uri_ = '\0';
         return total;
     }
 
     // Free memory for a URI
-    void Free() {
-        if (scheme_) {
+    void Free()
+    {
+        if(scheme_)
+        {
             Allocator::Free(scheme_);
             scheme_ = 0;
         }
     }
 
     // Parse a URI into constituent scheme, authority, path, query, & fragment parts
-    // Supports URIs that match regex ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? as per
-    // https://tools.ietf.org/html/rfc3986
-    void Parse(const Ch* uri, std::size_t len) {
+    // Supports URIs that match regex ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? as
+    // per https://tools.ietf.org/html/rfc3986
+    void Parse(const Ch* uri, std::size_t len)
+    {
         std::size_t start = 0, pos1 = 0, pos2 = 0;
         Allocate(len);
 
         // Look for scheme ([^:/?#]+):)?
-        if (start < len) {
-            while (pos1 < len) {
-                if (uri[pos1] == ':') break;
+        if(start < len)
+        {
+            while(pos1 < len)
+            {
+                if(uri[pos1] == ':')
+                    break;
                 pos1++;
             }
-            if (pos1 != len) {
-                while (pos2 < len) {
-                    if (uri[pos2] == '/') break;
-                    if (uri[pos2] == '?') break;
-                    if (uri[pos2] == '#') break;
+            if(pos1 != len)
+            {
+                while(pos2 < len)
+                {
+                    if(uri[pos2] == '/')
+                        break;
+                    if(uri[pos2] == '?')
+                        break;
+                    if(uri[pos2] == '#')
+                        break;
                     pos2++;
                 }
-                if (pos1 < pos2) {
+                if(pos1 < pos2)
+                {
                     pos1++;
                     std::memcpy(scheme_, &uri[start], pos1 * sizeof(Ch));
                     scheme_[pos1] = '\0';
-                    start = pos1;
+                    start         = pos1;
                 }
             }
         }
@@ -304,35 +444,45 @@ private:
         auth_ = scheme_ + GetSchemeStringLength();
         auth_++;
         *auth_ = '\0';
-        if (start < len - 1 && uri[start] == '/' && uri[start + 1] == '/') {
+        if(start < len - 1 && uri[start] == '/' && uri[start + 1] == '/')
+        {
             pos2 = start + 2;
-            while (pos2 < len) {
-                if (uri[pos2] == '/') break;
-                if (uri[pos2] == '?') break;
-                if (uri[pos2] == '#') break;
+            while(pos2 < len)
+            {
+                if(uri[pos2] == '/')
+                    break;
+                if(uri[pos2] == '?')
+                    break;
+                if(uri[pos2] == '#')
+                    break;
                 pos2++;
             }
             std::memcpy(auth_, &uri[start], (pos2 - start) * sizeof(Ch));
             auth_[pos2 - start] = '\0';
-            start = pos2;
+            start               = pos2;
         }
         // Look for path ([^?#]*)
         // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
         path_ = auth_ + GetAuthStringLength();
         path_++;
         *path_ = '\0';
-        if (start < len) {
+        if(start < len)
+        {
             pos2 = start;
-            while (pos2 < len) {
-                if (uri[pos2] == '?') break;
-                if (uri[pos2] == '#') break;
+            while(pos2 < len)
+            {
+                if(uri[pos2] == '?')
+                    break;
+                if(uri[pos2] == '#')
+                    break;
                 pos2++;
             }
-            if (start != pos2) {
+            if(start != pos2)
+            {
                 std::memcpy(path_, &uri[start], (pos2 - start) * sizeof(Ch));
                 path_[pos2 - start] = '\0';
-                if (path_[0] == '/')
-                    RemoveDotSegments();   // absolute path - normalize
+                if(path_[0] == '/')
+                    RemoveDotSegments(); // absolute path - normalize
                 start = pos2;
             }
         }
@@ -341,16 +491,20 @@ private:
         query_ = path_ + GetPathStringLength();
         query_++;
         *query_ = '\0';
-        if (start < len && uri[start] == '?') {
+        if(start < len && uri[start] == '?')
+        {
             pos2 = start + 1;
-            while (pos2 < len) {
-                if (uri[pos2] == '#') break;
+            while(pos2 < len)
+            {
+                if(uri[pos2] == '#')
+                    break;
                 pos2++;
             }
-            if (start != pos2) {
+            if(start != pos2)
+            {
                 std::memcpy(query_, &uri[start], (pos2 - start) * sizeof(Ch));
                 query_[pos2 - start] = '\0';
-                start = pos2;
+                start                = pos2;
             }
         }
         // Look for fragment (#(.*))?
@@ -358,7 +512,8 @@ private:
         frag_ = query_ + GetQueryStringLength();
         frag_++;
         *frag_ = '\0';
-        if (start < len && uri[start] == '#') {
+        if(start < len && uri[start] == '#')
+        {
             std::memcpy(frag_, &uri[start], (len - start) * sizeof(Ch));
             frag_[len - start] = '\0';
         }
@@ -371,36 +526,39 @@ private:
     }
 
     // Reconstitute base
-    void SetBase() {
+    void SetBase()
+    {
         Ch* next = base_;
         std::memcpy(next, scheme_, GetSchemeStringLength() * sizeof(Ch));
-        next+= GetSchemeStringLength();
+        next += GetSchemeStringLength();
         std::memcpy(next, auth_, GetAuthStringLength() * sizeof(Ch));
-        next+= GetAuthStringLength();
+        next += GetAuthStringLength();
         std::memcpy(next, path_, GetPathStringLength() * sizeof(Ch));
-        next+= GetPathStringLength();
+        next += GetPathStringLength();
         std::memcpy(next, query_, GetQueryStringLength() * sizeof(Ch));
-        next+= GetQueryStringLength();
+        next += GetQueryStringLength();
         *next = '\0';
     }
 
     // Reconstitute uri
-    void SetUri() {
+    void SetUri()
+    {
         Ch* next = uri_;
         std::memcpy(next, base_, GetBaseStringLength() * sizeof(Ch));
-        next+= GetBaseStringLength();
+        next += GetBaseStringLength();
         std::memcpy(next, frag_, GetFragStringLength() * sizeof(Ch));
-        next+= GetFragStringLength();
+        next += GetFragStringLength();
         *next = '\0';
     }
 
     // Copy a part from one GenericUri to another
     // Return the pointer to the next part to be copied to
-    Ch* CopyPart(Ch* to, Ch* from, std::size_t len) {
+    Ch* CopyPart(Ch* to, Ch* from, std::size_t len)
+    {
         RAPIDJSON_ASSERT(to != 0);
         RAPIDJSON_ASSERT(from != 0);
         std::memcpy(to, from, len * sizeof(Ch));
-        to[len] = '\0';
+        to[len]  = '\0';
         Ch* next = to + len + 1;
         return next;
     }
@@ -408,45 +566,58 @@ private:
     // Remove . and .. segments from the path_ member.
     // https://tools.ietf.org/html/rfc3986
     // This is done in place as we are only removing segments.
-    void RemoveDotSegments() {
+    void RemoveDotSegments()
+    {
         std::size_t pathlen = GetPathStringLength();
-        std::size_t pathpos = 0;  // Position in path_
-        std::size_t newpos = 0;   // Position in new path_
+        std::size_t pathpos = 0; // Position in path_
+        std::size_t newpos  = 0; // Position in new path_
 
         // Loop through each segment in original path_
-        while (pathpos < pathlen) {
+        while(pathpos < pathlen)
+        {
             // Get next segment, bounded by '/' or end
             size_t slashpos = 0;
-            while ((pathpos + slashpos) < pathlen) {
-                if (path_[pathpos + slashpos] == '/') break;
+            while((pathpos + slashpos) < pathlen)
+            {
+                if(path_[pathpos + slashpos] == '/')
+                    break;
                 slashpos++;
             }
             // Check for .. and . segments
-            if (slashpos == 2 && path_[pathpos] == '.' && path_[pathpos + 1] == '.') {
+            if(slashpos == 2 && path_[pathpos] == '.' && path_[pathpos + 1] == '.')
+            {
                 // Backup a .. segment in the new path_
                 // We expect to find a previously added slash at the end or nothing
                 RAPIDJSON_ASSERT(newpos == 0 || path_[newpos - 1] == '/');
                 size_t lastslashpos = newpos;
                 // Make sure we don't go beyond the start segment
-                if (lastslashpos > 1) {
+                if(lastslashpos > 1)
+                {
                     // Find the next to last slash and back up to it
                     lastslashpos--;
-                    while (lastslashpos > 0) {
-                        if (path_[lastslashpos - 1] == '/') break;
+                    while(lastslashpos > 0)
+                    {
+                        if(path_[lastslashpos - 1] == '/')
+                            break;
                         lastslashpos--;
                     }
                     // Set the new path_ position
                     newpos = lastslashpos;
                 }
-            } else if (slashpos == 1 && path_[pathpos] == '.') {
+            }
+            else if(slashpos == 1 && path_[pathpos] == '.')
+            {
                 // Discard . segment, leaves new path_ unchanged
-            } else {
+            }
+            else
+            {
                 // Move any other kind of segment to the new path_
                 RAPIDJSON_ASSERT(newpos <= pathpos);
                 std::memmove(&path_[newpos], &path_[pathpos], slashpos * sizeof(Ch));
                 newpos += slashpos;
                 // Add slash if not at end
-                if ((pathpos + slashpos) < pathlen) {
+                if((pathpos + slashpos) < pathlen)
+                {
                     path_[newpos] = '/';
                     newpos++;
                 }
@@ -465,8 +636,9 @@ private:
     Ch* query_;  // Includes the ?
     Ch* frag_;   // Includes the #
 
-    Allocator* allocator_;      //!< The current allocator. It is either user-supplied or equal to ownAllocator_.
-    Allocator* ownAllocator_;   //!< Allocator owned by this Uri.
+    Allocator* allocator_;    //!< The current allocator. It is either user-supplied or equal to
+                              //!< ownAllocator_.
+    Allocator* ownAllocator_; //!< Allocator owned by this Uri.
 };
 
 //! GenericUri for Value (UTF-8, default allocator).
diff --git a/include/rapidjson/writer.h b/include/rapidjson/writer.h
index 632e02ce74..634060020d 100644
--- a/include/rapidjson/writer.h
+++ b/include/rapidjson/writer.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_WRITER_H_
@@ -23,7 +23,7 @@
 #include "internal/dtoa.h"
 #include "internal/itoa.h"
 #include "stringbuffer.h"
-#include <new>      // placement new
+#include <new> // placement new
 
 #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
 #include <intrin.h>
@@ -40,8 +40,8 @@
 #ifdef __clang__
 RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(padded)
-RAPIDJSON_DIAG_OFF(unreachable-code)
-RAPIDJSON_DIAG_OFF(c++98-compat)
+RAPIDJSON_DIAG_OFF(unreachable - code)
+RAPIDJSON_DIAG_OFF(c++ 98 - compat)
 #elif defined(_MSC_VER)
 RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
@@ -52,7 +52,7 @@ RAPIDJSON_NAMESPACE_BEGIN
 ///////////////////////////////////////////////////////////////////////////////
 // WriteFlag
 
-/*! \def RAPIDJSON_WRITE_DEFAULT_FLAGS 
+/*! \def RAPIDJSON_WRITE_DEFAULT_FLAGS
     \ingroup RAPIDJSON_CONFIG
     \brief User-defined kWriteDefaultFlags definition.
 
@@ -63,12 +63,15 @@ RAPIDJSON_NAMESPACE_BEGIN
 #endif
 
 //! Combination of writeFlags
-enum WriteFlag {
-    kWriteNoFlags = 0,              //!< No flags are set.
+enum WriteFlag
+{
+    kWriteNoFlags              = 0, //!< No flags are set.
     kWriteValidateEncodingFlag = 1, //!< Validate encoding of JSON strings.
-    kWriteNanAndInfFlag = 2,        //!< Allow writing of Infinity, -Infinity and NaN.
-    kWriteNanAndInfNullFlag = 4,    //!< Allow writing of Infinity, -Infinity and NaN as null.
-    kWriteDefaultFlags = RAPIDJSON_WRITE_DEFAULT_FLAGS  //!< Default write flags. Can be customized by defining RAPIDJSON_WRITE_DEFAULT_FLAGS
+    kWriteNanAndInfFlag        = 2, //!< Allow writing of Infinity, -Infinity and NaN.
+    kWriteNanAndInfNullFlag    = 4, //!< Allow writing of Infinity, -Infinity and NaN as null.
+    kWriteDefaultFlags =
+        RAPIDJSON_WRITE_DEFAULT_FLAGS //!< Default write flags. Can be customized by defining
+                                      //!< RAPIDJSON_WRITE_DEFAULT_FLAGS
 };
 
 //! JSON writer
@@ -77,7 +80,7 @@ enum WriteFlag {
 
     User may programmatically calls the functions of a writer to generate JSON text.
 
-    On the other side, a writer can also be passed to objects that generates events, 
+    On the other side, a writer can also be passed to objects that generates events,
 
     for example Reader::Parse() and Document::Accept().
 
@@ -87,9 +90,14 @@ enum WriteFlag {
     \tparam StackAllocator Type of allocator for allocating memory of stack.
     \note implements Handler concept
 */
-template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags>
-class Writer {
-public:
+template <typename OutputStream,
+          typename SourceEncoding = UTF8<>,
+          typename TargetEncoding = UTF8<>,
+          typename StackAllocator = CrtAllocator,
+          unsigned writeFlags     = kWriteDefaultFlags>
+class Writer
+{
+    public:
     typedef typename SourceEncoding::Ch Ch;
 
     static const int kDefaultMaxDecimalPlaces = 324;
@@ -99,17 +107,31 @@ public:
         \param stackAllocator User supplied allocator. If it is null, it will create a private one.
         \param levelDepth Initial capacity of stack.
     */
-    explicit
-    Writer(OutputStream& os, StackAllocator* stackAllocator = 0, size_t levelDepth = kDefaultLevelDepth) : 
-        os_(&os), level_stack_(stackAllocator, levelDepth * sizeof(Level)), maxDecimalPlaces_(kDefaultMaxDecimalPlaces), hasRoot_(false) {}
+    explicit Writer(OutputStream& os,
+                    StackAllocator* stackAllocator = 0,
+                    size_t levelDepth              = kDefaultLevelDepth)
+        : os_(&os),
+          level_stack_(stackAllocator, levelDepth * sizeof(Level)),
+          maxDecimalPlaces_(kDefaultMaxDecimalPlaces),
+          hasRoot_(false)
+    {
+    }
 
-    explicit
-    Writer(StackAllocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) :
-        os_(0), level_stack_(allocator, levelDepth * sizeof(Level)), maxDecimalPlaces_(kDefaultMaxDecimalPlaces), hasRoot_(false) {}
+    explicit Writer(StackAllocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth)
+        : os_(0),
+          level_stack_(allocator, levelDepth * sizeof(Level)),
+          maxDecimalPlaces_(kDefaultMaxDecimalPlaces),
+          hasRoot_(false)
+    {
+    }
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
-    Writer(Writer&& rhs) :
-        os_(rhs.os_), level_stack_(std::move(rhs.level_stack_)), maxDecimalPlaces_(rhs.maxDecimalPlaces_), hasRoot_(rhs.hasRoot_) {
+    Writer(Writer&& rhs)
+        : os_(rhs.os_),
+          level_stack_(std::move(rhs.level_stack_)),
+          maxDecimalPlaces_(rhs.maxDecimalPlaces_),
+          hasRoot_(rhs.hasRoot_)
+    {
         rhs.os_ = 0;
     }
 #endif
@@ -132,8 +154,9 @@ public:
         writer.EndObject();
         \endcode
     */
-    void Reset(OutputStream& os) {
-        os_ = &os;
+    void Reset(OutputStream& os)
+    {
+        os_      = &os;
         hasRoot_ = false;
         level_stack_.Clear();
     }
@@ -142,66 +165,87 @@ public:
     /*!
         A complete JSON has a complete root object or array.
     */
-    bool IsComplete() const {
-        return hasRoot_ && level_stack_.Empty();
-    }
+    bool IsComplete() const { return hasRoot_ && level_stack_.Empty(); }
 
-    int GetMaxDecimalPlaces() const {
-        return maxDecimalPlaces_;
-    }
+    int GetMaxDecimalPlaces() const { return maxDecimalPlaces_; }
 
     //! Sets the maximum number of decimal places for double output.
     /*!
         This setting truncates the output with specified number of decimal places.
 
-        For example, 
+        For example,
 
         \code
         writer.SetMaxDecimalPlaces(3);
         writer.StartArray();
         writer.Double(0.12345);                 // "0.123"
         writer.Double(0.0001);                  // "0.0"
-        writer.Double(1.234567890123456e30);    // "1.234567890123456e30" (do not truncate significand for positive exponent)
-        writer.Double(1.23e-4);                 // "0.0"                  (do truncate significand for negative exponent)
-        writer.EndArray();
-        \endcode
+        writer.Double(1.234567890123456e30);    // "1.234567890123456e30" (do not truncate
+       significand for positive exponent) writer.Double(1.23e-4);                 // "0.0" (do
+       truncate significand for negative exponent) writer.EndArray(); \endcode
 
-        The default setting does not truncate any decimal places. You can restore to this setting by calling
-        \code
-        writer.SetMaxDecimalPlaces(Writer::kDefaultMaxDecimalPlaces);
-        \endcode
+        The default setting does not truncate any decimal places. You can restore to this setting by
+       calling \code writer.SetMaxDecimalPlaces(Writer::kDefaultMaxDecimalPlaces); \endcode
     */
-    void SetMaxDecimalPlaces(int maxDecimalPlaces) {
-        maxDecimalPlaces_ = maxDecimalPlaces;
-    }
+    void SetMaxDecimalPlaces(int maxDecimalPlaces) { maxDecimalPlaces_ = maxDecimalPlaces; }
 
     /*!@name Implementation of Handler
         \see Handler
     */
     //@{
 
-    bool Null()                 { Prefix(kNullType);   return EndValue(WriteNull()); }
-    bool Bool(bool b)           { Prefix(b ? kTrueType : kFalseType); return EndValue(WriteBool(b)); }
-    bool Int(int i)             { Prefix(kNumberType); return EndValue(WriteInt(i)); }
-    bool Uint(unsigned u)       { Prefix(kNumberType); return EndValue(WriteUint(u)); }
-    bool Int64(int64_t i64)     { Prefix(kNumberType); return EndValue(WriteInt64(i64)); }
-    bool Uint64(uint64_t u64)   { Prefix(kNumberType); return EndValue(WriteUint64(u64)); }
+    bool Null()
+    {
+        Prefix(kNullType);
+        return EndValue(WriteNull());
+    }
+    bool Bool(bool b)
+    {
+        Prefix(b ? kTrueType : kFalseType);
+        return EndValue(WriteBool(b));
+    }
+    bool Int(int i)
+    {
+        Prefix(kNumberType);
+        return EndValue(WriteInt(i));
+    }
+    bool Uint(unsigned u)
+    {
+        Prefix(kNumberType);
+        return EndValue(WriteUint(u));
+    }
+    bool Int64(int64_t i64)
+    {
+        Prefix(kNumberType);
+        return EndValue(WriteInt64(i64));
+    }
+    bool Uint64(uint64_t u64)
+    {
+        Prefix(kNumberType);
+        return EndValue(WriteUint64(u64));
+    }
 
     //! Writes the given \c double value to the stream
     /*!
         \param d The value to be written.
         \return Whether it is succeed.
     */
-    bool Double(double d)       { Prefix(kNumberType); return EndValue(WriteDouble(d)); }
+    bool Double(double d)
+    {
+        Prefix(kNumberType);
+        return EndValue(WriteDouble(d));
+    }
 
-    bool RawNumber(const Ch* str, SizeType length, bool copy = false) {
+    bool RawNumber(const Ch* str, SizeType length, bool copy = false)
+    {
         RAPIDJSON_ASSERT(str != 0);
         (void)copy;
         Prefix(kNumberType);
         return EndValue(WriteString(str, length));
     }
 
-    bool String(const Ch* str, SizeType length, bool copy = false) {
+    bool String(const Ch* str, SizeType length, bool copy = false)
+    {
         RAPIDJSON_ASSERT(str != 0);
         (void)copy;
         Prefix(kStringType);
@@ -209,42 +253,49 @@ public:
     }
 
 #if RAPIDJSON_HAS_STDSTRING
-    bool String(const std::basic_string<Ch>& str) {
+    bool String(const std::basic_string<Ch>& str)
+    {
         return String(str.data(), SizeType(str.size()));
     }
 #endif
 
-    bool StartObject() {
+    bool StartObject()
+    {
         Prefix(kObjectType);
-        new (level_stack_.template Push<Level>()) Level(false);
+        new(level_stack_.template Push<Level>()) Level(false);
         return WriteStartObject();
     }
 
-    bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); }
+    bool Key(const Ch* str, SizeType length, bool copy = false)
+    {
+        return String(str, length, copy);
+    }
 
 #if RAPIDJSON_HAS_STDSTRING
-    bool Key(const std::basic_string<Ch>& str)
-    {
-      return Key(str.data(), SizeType(str.size()));
-    }
+    bool Key(const std::basic_string<Ch>& str) { return Key(str.data(), SizeType(str.size())); }
 #endif
 
-    bool EndObject(SizeType memberCount = 0) {
+    bool EndObject(SizeType memberCount = 0)
+    {
         (void)memberCount;
         RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); // not inside an Object
-        RAPIDJSON_ASSERT(!level_stack_.template Top<Level>()->inArray); // currently inside an Array, not Object
-        RAPIDJSON_ASSERT(0 == level_stack_.template Top<Level>()->valueCount % 2); // Object has a Key without a Value
+        RAPIDJSON_ASSERT(
+            !level_stack_.template Top<Level>()->inArray); // currently inside an Array, not Object
+        RAPIDJSON_ASSERT(0 == level_stack_.template Top<Level>()->valueCount %
+                                  2); // Object has a Key without a Value
         level_stack_.template Pop<Level>(1);
         return EndValue(WriteEndObject());
     }
 
-    bool StartArray() {
+    bool StartArray()
+    {
         Prefix(kArrayType);
-        new (level_stack_.template Push<Level>()) Level(true);
+        new(level_stack_.template Push<Level>()) Level(true);
         return WriteStartArray();
     }
 
-    bool EndArray(SizeType elementCount = 0) {
+    bool EndArray(SizeType elementCount = 0)
+    {
         (void)elementCount;
         RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level));
         RAPIDJSON_ASSERT(level_stack_.template Top<Level>()->inArray);
@@ -259,18 +310,18 @@ public:
     //! Simpler but slower overload.
     bool String(const Ch* const& str) { return String(str, internal::StrLen(str)); }
     bool Key(const Ch* const& str) { return Key(str, internal::StrLen(str)); }
-    
+
     //@}
 
     //! Write a raw JSON value.
     /*!
         For user to write a stringified JSON as a value.
 
-        \param json A well-formed JSON value. It should not contain null character within [0, length - 1] range.
-        \param length Length of the json.
-        \param type Type of the root of json.
+        \param json A well-formed JSON value. It should not contain null character within [0, length
+       - 1] range. \param length Length of the json. \param type Type of the root of json.
     */
-    bool RawValue(const Ch* json, size_t length, Type type) {
+    bool RawValue(const Ch* json, size_t length, Type type)
+    {
         RAPIDJSON_ASSERT(json != 0);
         Prefix(type);
         return EndValue(WriteRawValue(json, length));
@@ -280,225 +331,298 @@ public:
     /*!
         Allows the user to flush the output stream immediately.
      */
-    void Flush() {
-        os_->Flush();
-    }
+    void Flush() { os_->Flush(); }
 
     static const size_t kDefaultLevelDepth = 32;
 
-protected:
+    protected:
     //! Information for each nested level
-    struct Level {
+    struct Level
+    {
         Level(bool inArray_) : valueCount(0), inArray(inArray_) {}
-        size_t valueCount;  //!< number of values in this level
-        bool inArray;       //!< true if in array, otherwise in object
+        size_t valueCount; //!< number of values in this level
+        bool inArray;      //!< true if in array, otherwise in object
     };
 
-    bool WriteNull()  {
+    bool WriteNull()
+    {
         PutReserve(*os_, 4);
-        PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 'l'); return true;
+        PutUnsafe(*os_, 'n');
+        PutUnsafe(*os_, 'u');
+        PutUnsafe(*os_, 'l');
+        PutUnsafe(*os_, 'l');
+        return true;
     }
 
-    bool WriteBool(bool b)  {
-        if (b) {
+    bool WriteBool(bool b)
+    {
+        if(b)
+        {
             PutReserve(*os_, 4);
-            PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'r'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'e');
+            PutUnsafe(*os_, 't');
+            PutUnsafe(*os_, 'r');
+            PutUnsafe(*os_, 'u');
+            PutUnsafe(*os_, 'e');
         }
-        else {
+        else
+        {
             PutReserve(*os_, 5);
-            PutUnsafe(*os_, 'f'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 's'); PutUnsafe(*os_, 'e');
+            PutUnsafe(*os_, 'f');
+            PutUnsafe(*os_, 'a');
+            PutUnsafe(*os_, 'l');
+            PutUnsafe(*os_, 's');
+            PutUnsafe(*os_, 'e');
         }
         return true;
     }
 
-    bool WriteInt(int i) {
+    bool WriteInt(int i)
+    {
         char buffer[11];
         const char* end = internal::i32toa(i, buffer);
         PutReserve(*os_, static_cast<size_t>(end - buffer));
-        for (const char* p = buffer; p != end; ++p)
+        for(const char* p = buffer; p != end; ++p)
             PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
-    bool WriteUint(unsigned u) {
+    bool WriteUint(unsigned u)
+    {
         char buffer[10];
         const char* end = internal::u32toa(u, buffer);
         PutReserve(*os_, static_cast<size_t>(end - buffer));
-        for (const char* p = buffer; p != end; ++p)
+        for(const char* p = buffer; p != end; ++p)
             PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
-    bool WriteInt64(int64_t i64) {
+    bool WriteInt64(int64_t i64)
+    {
         char buffer[21];
         const char* end = internal::i64toa(i64, buffer);
         PutReserve(*os_, static_cast<size_t>(end - buffer));
-        for (const char* p = buffer; p != end; ++p)
+        for(const char* p = buffer; p != end; ++p)
             PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
-    bool WriteUint64(uint64_t u64) {
+    bool WriteUint64(uint64_t u64)
+    {
         char buffer[20];
         char* end = internal::u64toa(u64, buffer);
         PutReserve(*os_, static_cast<size_t>(end - buffer));
-        for (char* p = buffer; p != end; ++p)
+        for(char* p = buffer; p != end; ++p)
             PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
-    bool WriteDouble(double d) {
-        if (internal::Double(d).IsNanOrInf()) {
-            if (!(writeFlags & kWriteNanAndInfFlag) && !(writeFlags & kWriteNanAndInfNullFlag))
+    bool WriteDouble(double d)
+    {
+        if(internal::Double(d).IsNanOrInf())
+        {
+            if(!(writeFlags & kWriteNanAndInfFlag) && !(writeFlags & kWriteNanAndInfNullFlag))
                 return false;
-            if (writeFlags & kWriteNanAndInfNullFlag) {
+            if(writeFlags & kWriteNanAndInfNullFlag)
+            {
                 PutReserve(*os_, 4);
-                PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 'l');
+                PutUnsafe(*os_, 'n');
+                PutUnsafe(*os_, 'u');
+                PutUnsafe(*os_, 'l');
+                PutUnsafe(*os_, 'l');
                 return true;
             }
-            if (internal::Double(d).IsNan()) {
+            if(internal::Double(d).IsNan())
+            {
                 PutReserve(*os_, 3);
-                PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N');
+                PutUnsafe(*os_, 'N');
+                PutUnsafe(*os_, 'a');
+                PutUnsafe(*os_, 'N');
                 return true;
             }
-            if (internal::Double(d).Sign()) {
+            if(internal::Double(d).Sign())
+            {
                 PutReserve(*os_, 9);
                 PutUnsafe(*os_, '-');
             }
             else
                 PutReserve(*os_, 8);
-            PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f');
-            PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y');
+            PutUnsafe(*os_, 'I');
+            PutUnsafe(*os_, 'n');
+            PutUnsafe(*os_, 'f');
+            PutUnsafe(*os_, 'i');
+            PutUnsafe(*os_, 'n');
+            PutUnsafe(*os_, 'i');
+            PutUnsafe(*os_, 't');
+            PutUnsafe(*os_, 'y');
             return true;
         }
 
         char buffer[25];
         char* end = internal::dtoa(d, buffer, maxDecimalPlaces_);
         PutReserve(*os_, static_cast<size_t>(end - buffer));
-        for (char* p = buffer; p != end; ++p)
+        for(char* p = buffer; p != end; ++p)
             PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
-    bool WriteString(const Ch* str, SizeType length)  {
-        static const typename OutputStream::Ch hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+    bool WriteString(const Ch* str, SizeType length)
+    {
+        static const typename OutputStream::Ch hexDigits[16] = {
+            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
         static const char escape[256] = {
-#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-            //0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
-            'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't', 'n', 'u', 'f', 'r', 'u', 'u', // 00
-            'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', // 10
-              0,   0, '"',   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, // 20
-            Z16, Z16,                                                                       // 30~4F
-              0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,'\\',   0,   0,   0, // 50
-            Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16                                // 60~FF
+#define Z16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+            // 0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
+            'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't', 'n', 'u', 'f',  'r', 'u', 'u', // 00
+            'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u',  'u', 'u', 'u', // 10
+            0,   0,   '"', 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   // 20
+            Z16, Z16,                                                                      // 30~4F
+            0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   '\\', 0,   0,   0, // 50
+            Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16                               // 60~FF
 #undef Z16
         };
 
-        if (TargetEncoding::supportUnicode)
+        if(TargetEncoding::supportUnicode)
             PutReserve(*os_, 2 + length * 6); // "\uxxxx..."
         else
-            PutReserve(*os_, 2 + length * 12);  // "\uxxxx\uyyyy..."
+            PutReserve(*os_, 2 + length * 12); // "\uxxxx\uyyyy..."
 
         PutUnsafe(*os_, '\"');
         GenericStringStream<SourceEncoding> is(str);
-        while (ScanWriteUnescapedString(is, length)) {
+        while(ScanWriteUnescapedString(is, length))
+        {
             const Ch c = is.Peek();
-            if (!TargetEncoding::supportUnicode && static_cast<unsigned>(c) >= 0x80) {
+            if(!TargetEncoding::supportUnicode && static_cast<unsigned>(c) >= 0x80)
+            {
                 // Unicode escaping
                 unsigned codepoint;
-                if (RAPIDJSON_UNLIKELY(!SourceEncoding::Decode(is, &codepoint)))
+                if(RAPIDJSON_UNLIKELY(!SourceEncoding::Decode(is, &codepoint)))
                     return false;
                 PutUnsafe(*os_, '\\');
                 PutUnsafe(*os_, 'u');
-                if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) {
+                if(codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF))
+                {
                     PutUnsafe(*os_, hexDigits[(codepoint >> 12) & 15]);
-                    PutUnsafe(*os_, hexDigits[(codepoint >>  8) & 15]);
-                    PutUnsafe(*os_, hexDigits[(codepoint >>  4) & 15]);
-                    PutUnsafe(*os_, hexDigits[(codepoint      ) & 15]);
+                    PutUnsafe(*os_, hexDigits[(codepoint >> 8) & 15]);
+                    PutUnsafe(*os_, hexDigits[(codepoint >> 4) & 15]);
+                    PutUnsafe(*os_, hexDigits[(codepoint) & 15]);
                 }
-                else {
+                else
+                {
                     RAPIDJSON_ASSERT(codepoint >= 0x010000 && codepoint <= 0x10FFFF);
                     // Surrogate pair
-                    unsigned s = codepoint - 0x010000;
-                    unsigned lead = (s >> 10) + 0xD800;
+                    unsigned s     = codepoint - 0x010000;
+                    unsigned lead  = (s >> 10) + 0xD800;
                     unsigned trail = (s & 0x3FF) + 0xDC00;
                     PutUnsafe(*os_, hexDigits[(lead >> 12) & 15]);
-                    PutUnsafe(*os_, hexDigits[(lead >>  8) & 15]);
-                    PutUnsafe(*os_, hexDigits[(lead >>  4) & 15]);
-                    PutUnsafe(*os_, hexDigits[(lead      ) & 15]);
+                    PutUnsafe(*os_, hexDigits[(lead >> 8) & 15]);
+                    PutUnsafe(*os_, hexDigits[(lead >> 4) & 15]);
+                    PutUnsafe(*os_, hexDigits[(lead) & 15]);
                     PutUnsafe(*os_, '\\');
                     PutUnsafe(*os_, 'u');
                     PutUnsafe(*os_, hexDigits[(trail >> 12) & 15]);
-                    PutUnsafe(*os_, hexDigits[(trail >>  8) & 15]);
-                    PutUnsafe(*os_, hexDigits[(trail >>  4) & 15]);
-                    PutUnsafe(*os_, hexDigits[(trail      ) & 15]);                    
+                    PutUnsafe(*os_, hexDigits[(trail >> 8) & 15]);
+                    PutUnsafe(*os_, hexDigits[(trail >> 4) & 15]);
+                    PutUnsafe(*os_, hexDigits[(trail) & 15]);
                 }
             }
-            else if ((sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256) && RAPIDJSON_UNLIKELY(escape[static_cast<unsigned char>(c)]))  {
+            else if((sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256) &&
+                    RAPIDJSON_UNLIKELY(escape[static_cast<unsigned char>(c)]))
+            {
                 is.Take();
                 PutUnsafe(*os_, '\\');
-                PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(escape[static_cast<unsigned char>(c)]));
-                if (escape[static_cast<unsigned char>(c)] == 'u') {
+                PutUnsafe(
+                    *os_,
+                    static_cast<typename OutputStream::Ch>(escape[static_cast<unsigned char>(c)]));
+                if(escape[static_cast<unsigned char>(c)] == 'u')
+                {
                     PutUnsafe(*os_, '0');
                     PutUnsafe(*os_, '0');
                     PutUnsafe(*os_, hexDigits[static_cast<unsigned char>(c) >> 4]);
                     PutUnsafe(*os_, hexDigits[static_cast<unsigned char>(c) & 0xF]);
                 }
             }
-            else if (RAPIDJSON_UNLIKELY(!(writeFlags & kWriteValidateEncodingFlag ? 
-                Transcoder<SourceEncoding, TargetEncoding>::Validate(is, *os_) :
-                Transcoder<SourceEncoding, TargetEncoding>::TranscodeUnsafe(is, *os_))))
+            else if(RAPIDJSON_UNLIKELY(
+                        !(writeFlags & kWriteValidateEncodingFlag
+                              ? Transcoder<SourceEncoding, TargetEncoding>::Validate(is, *os_)
+                              : Transcoder<SourceEncoding, TargetEncoding>::TranscodeUnsafe(is,
+                                                                                            *os_))))
                 return false;
         }
         PutUnsafe(*os_, '\"');
         return true;
     }
 
-    bool ScanWriteUnescapedString(GenericStringStream<SourceEncoding>& is, size_t length) {
+    bool ScanWriteUnescapedString(GenericStringStream<SourceEncoding>& is, size_t length)
+    {
         return RAPIDJSON_LIKELY(is.Tell() < length);
     }
 
-    bool WriteStartObject() { os_->Put('{'); return true; }
-    bool WriteEndObject()   { os_->Put('}'); return true; }
-    bool WriteStartArray()  { os_->Put('['); return true; }
-    bool WriteEndArray()    { os_->Put(']'); return true; }
+    bool WriteStartObject()
+    {
+        os_->Put('{');
+        return true;
+    }
+    bool WriteEndObject()
+    {
+        os_->Put('}');
+        return true;
+    }
+    bool WriteStartArray()
+    {
+        os_->Put('[');
+        return true;
+    }
+    bool WriteEndArray()
+    {
+        os_->Put(']');
+        return true;
+    }
 
-    bool WriteRawValue(const Ch* json, size_t length) {
+    bool WriteRawValue(const Ch* json, size_t length)
+    {
         PutReserve(*os_, length);
         GenericStringStream<SourceEncoding> is(json);
-        while (RAPIDJSON_LIKELY(is.Tell() < length)) {
+        while(RAPIDJSON_LIKELY(is.Tell() < length))
+        {
             RAPIDJSON_ASSERT(is.Peek() != '\0');
-            if (RAPIDJSON_UNLIKELY(!(writeFlags & kWriteValidateEncodingFlag ? 
-                Transcoder<SourceEncoding, TargetEncoding>::Validate(is, *os_) :
-                Transcoder<SourceEncoding, TargetEncoding>::TranscodeUnsafe(is, *os_))))
+            if(RAPIDJSON_UNLIKELY(
+                   !(writeFlags & kWriteValidateEncodingFlag
+                         ? Transcoder<SourceEncoding, TargetEncoding>::Validate(is, *os_)
+                         : Transcoder<SourceEncoding, TargetEncoding>::TranscodeUnsafe(is, *os_))))
                 return false;
         }
         return true;
     }
 
-    void Prefix(Type type) {
+    void Prefix(Type type)
+    {
         (void)type;
-        if (RAPIDJSON_LIKELY(level_stack_.GetSize() != 0)) { // this value is not at root
+        if(RAPIDJSON_LIKELY(level_stack_.GetSize() != 0))
+        { // this value is not at root
             Level* level = level_stack_.template Top<Level>();
-            if (level->valueCount > 0) {
-                if (level->inArray) 
+            if(level->valueCount > 0)
+            {
+                if(level->inArray)
                     os_->Put(','); // add comma if it is not the first element in array
-                else  // in object
+                else               // in object
                     os_->Put((level->valueCount % 2 == 0) ? ',' : ':');
             }
-            if (!level->inArray && level->valueCount % 2 == 0)
-                RAPIDJSON_ASSERT(type == kStringType);  // if it's in object, then even number should be a name
+            if(!level->inArray && level->valueCount % 2 == 0)
+                RAPIDJSON_ASSERT(
+                    type == kStringType); // if it's in object, then even number should be a name
             level->valueCount++;
         }
-        else {
-            RAPIDJSON_ASSERT(!hasRoot_);    // Should only has one and only one root.
+        else
+        {
+            RAPIDJSON_ASSERT(!hasRoot_); // Should only has one and only one root.
             hasRoot_ = true;
         }
     }
 
     // Flush the value if it is the top level one.
-    bool EndValue(bool ret) {
-        if (RAPIDJSON_UNLIKELY(level_stack_.Empty()))   // end of json text
+    bool EndValue(bool ret)
+    {
+        if(RAPIDJSON_UNLIKELY(level_stack_.Empty())) // end of json text
             Flush();
         return ret;
     }
@@ -508,7 +632,7 @@ protected:
     int maxDecimalPlaces_;
     bool hasRoot_;
 
-private:
+    private:
     // Prohibit copy constructor & assignment operator.
     Writer(const Writer&);
     Writer& operator=(const Writer&);
@@ -516,89 +640,114 @@ private:
 
 // Full specialization for StringStream to prevent memory copying
 
-template<>
-inline bool Writer<StringBuffer>::WriteInt(int i) {
-    char *buffer = os_->Push(11);
+template <>
+inline bool Writer<StringBuffer>::WriteInt(int i)
+{
+    char* buffer    = os_->Push(11);
     const char* end = internal::i32toa(i, buffer);
     os_->Pop(static_cast<size_t>(11 - (end - buffer)));
     return true;
 }
 
-template<>
-inline bool Writer<StringBuffer>::WriteUint(unsigned u) {
-    char *buffer = os_->Push(10);
+template <>
+inline bool Writer<StringBuffer>::WriteUint(unsigned u)
+{
+    char* buffer    = os_->Push(10);
     const char* end = internal::u32toa(u, buffer);
     os_->Pop(static_cast<size_t>(10 - (end - buffer)));
     return true;
 }
 
-template<>
-inline bool Writer<StringBuffer>::WriteInt64(int64_t i64) {
-    char *buffer = os_->Push(21);
+template <>
+inline bool Writer<StringBuffer>::WriteInt64(int64_t i64)
+{
+    char* buffer    = os_->Push(21);
     const char* end = internal::i64toa(i64, buffer);
     os_->Pop(static_cast<size_t>(21 - (end - buffer)));
     return true;
 }
 
-template<>
-inline bool Writer<StringBuffer>::WriteUint64(uint64_t u) {
-    char *buffer = os_->Push(20);
+template <>
+inline bool Writer<StringBuffer>::WriteUint64(uint64_t u)
+{
+    char* buffer    = os_->Push(20);
     const char* end = internal::u64toa(u, buffer);
     os_->Pop(static_cast<size_t>(20 - (end - buffer)));
     return true;
 }
 
-template<>
-inline bool Writer<StringBuffer>::WriteDouble(double d) {
-    if (internal::Double(d).IsNanOrInf()) {
-        // Note: This code path can only be reached if (RAPIDJSON_WRITE_DEFAULT_FLAGS & kWriteNanAndInfFlag).
-        if (!(kWriteDefaultFlags & kWriteNanAndInfFlag))
+template <>
+inline bool Writer<StringBuffer>::WriteDouble(double d)
+{
+    if(internal::Double(d).IsNanOrInf())
+    {
+        // Note: This code path can only be reached if (RAPIDJSON_WRITE_DEFAULT_FLAGS &
+        // kWriteNanAndInfFlag).
+        if(!(kWriteDefaultFlags & kWriteNanAndInfFlag))
             return false;
-        if (kWriteDefaultFlags & kWriteNanAndInfNullFlag) {
+        if(kWriteDefaultFlags & kWriteNanAndInfNullFlag)
+        {
             PutReserve(*os_, 4);
-            PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 'l');
+            PutUnsafe(*os_, 'n');
+            PutUnsafe(*os_, 'u');
+            PutUnsafe(*os_, 'l');
+            PutUnsafe(*os_, 'l');
             return true;
         }
-        if (internal::Double(d).IsNan()) {
+        if(internal::Double(d).IsNan())
+        {
             PutReserve(*os_, 3);
-            PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N');
+            PutUnsafe(*os_, 'N');
+            PutUnsafe(*os_, 'a');
+            PutUnsafe(*os_, 'N');
             return true;
         }
-        if (internal::Double(d).Sign()) {
+        if(internal::Double(d).Sign())
+        {
             PutReserve(*os_, 9);
             PutUnsafe(*os_, '-');
         }
         else
             PutReserve(*os_, 8);
-        PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f');
-        PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y');
+        PutUnsafe(*os_, 'I');
+        PutUnsafe(*os_, 'n');
+        PutUnsafe(*os_, 'f');
+        PutUnsafe(*os_, 'i');
+        PutUnsafe(*os_, 'n');
+        PutUnsafe(*os_, 'i');
+        PutUnsafe(*os_, 't');
+        PutUnsafe(*os_, 'y');
         return true;
     }
-    
-    char *buffer = os_->Push(25);
-    char* end = internal::dtoa(d, buffer, maxDecimalPlaces_);
+
+    char* buffer = os_->Push(25);
+    char* end    = internal::dtoa(d, buffer, maxDecimalPlaces_);
     os_->Pop(static_cast<size_t>(25 - (end - buffer)));
     return true;
 }
 
 #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
-template<>
-inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, size_t length) {
-    if (length < 16)
+template <>
+inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, size_t length)
+{
+    if(length < 16)
         return RAPIDJSON_LIKELY(is.Tell() < length);
 
-    if (!RAPIDJSON_LIKELY(is.Tell() < length))
+    if(!RAPIDJSON_LIKELY(is.Tell() < length))
         return false;
 
-    const char* p = is.src_;
-    const char* end = is.head_ + length;
-    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
-    const char* endAligned = reinterpret_cast<const char*>(reinterpret_cast<size_t>(end) & static_cast<size_t>(~15));
-    if (nextAligned > end)
+    const char* p           = is.src_;
+    const char* end         = is.head_ + length;
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) &
+                                                            static_cast<size_t>(~15));
+    const char* endAligned =
+        reinterpret_cast<const char*>(reinterpret_cast<size_t>(end) & static_cast<size_t>(~15));
+    if(nextAligned > end)
         return true;
 
-    while (p != nextAligned)
-        if (*p < 0x20 || *p == '\"' || *p == '\\') {
+    while(p != nextAligned)
+        if(*p < 0x20 || *p == '\"' || *p == '\\')
+        {
             is.src_ = p;
             return RAPIDJSON_LIKELY(is.Tell() < length);
         }
@@ -606,23 +755,71 @@ inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, siz
             os_->PutUnsafe(*p++);
 
     // The rest of string using SIMD
-    static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
-    static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
-    static const char space[16]  = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
-    const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
-    const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
-    const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
+    static const char dquote[16] = {'\"',
+                                    '\"',
+                                    '\"',
+                                    '\"',
+                                    '\"',
+                                    '\"',
+                                    '\"',
+                                    '\"',
+                                    '\"',
+                                    '\"',
+                                    '\"',
+                                    '\"',
+                                    '\"',
+                                    '\"',
+                                    '\"',
+                                    '\"'};
+    static const char bslash[16] = {'\\',
+                                    '\\',
+                                    '\\',
+                                    '\\',
+                                    '\\',
+                                    '\\',
+                                    '\\',
+                                    '\\',
+                                    '\\',
+                                    '\\',
+                                    '\\',
+                                    '\\',
+                                    '\\',
+                                    '\\',
+                                    '\\',
+                                    '\\'};
+    static const char space[16]  = {0x1F,
+                                    0x1F,
+                                    0x1F,
+                                    0x1F,
+                                    0x1F,
+                                    0x1F,
+                                    0x1F,
+                                    0x1F,
+                                    0x1F,
+                                    0x1F,
+                                    0x1F,
+                                    0x1F,
+                                    0x1F,
+                                    0x1F,
+                                    0x1F,
+                                    0x1F};
+    const __m128i dq             = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&dquote[0]));
+    const __m128i bs             = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&bslash[0]));
+    const __m128i sp             = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&space[0]));
 
-    for (; p != endAligned; p += 16) {
-        const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+    for(; p != endAligned; p += 16)
+    {
+        const __m128i s  = _mm_load_si128(reinterpret_cast<const __m128i*>(p));
         const __m128i t1 = _mm_cmpeq_epi8(s, dq);
         const __m128i t2 = _mm_cmpeq_epi8(s, bs);
-        const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
-        const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
+        const __m128i t3 =
+            _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
+        const __m128i x  = _mm_or_si128(_mm_or_si128(t1, t2), t3);
         unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
-        if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
+        if(RAPIDJSON_UNLIKELY(r != 0))
+        { // some of characters is escaped
             SizeType len;
-#ifdef _MSC_VER         // Find the index of first escaped
+#ifdef _MSC_VER // Find the index of first escaped
             unsigned long offset;
             _BitScanForward(&offset, r);
             len = offset;
@@ -630,36 +827,40 @@ inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, siz
             len = static_cast<SizeType>(__builtin_ffs(r) - 1);
 #endif
             char* q = reinterpret_cast<char*>(os_->PushUnsafe(len));
-            for (size_t i = 0; i < len; i++)
+            for(size_t i = 0; i < len; i++)
                 q[i] = p[i];
 
             p += len;
             break;
         }
-        _mm_storeu_si128(reinterpret_cast<__m128i *>(os_->PushUnsafe(16)), s);
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(os_->PushUnsafe(16)), s);
     }
 
     is.src_ = p;
     return RAPIDJSON_LIKELY(is.Tell() < length);
 }
 #elif defined(RAPIDJSON_NEON)
-template<>
-inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, size_t length) {
-    if (length < 16)
+template <>
+inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, size_t length)
+{
+    if(length < 16)
         return RAPIDJSON_LIKELY(is.Tell() < length);
 
-    if (!RAPIDJSON_LIKELY(is.Tell() < length))
+    if(!RAPIDJSON_LIKELY(is.Tell() < length))
         return false;
 
-    const char* p = is.src_;
-    const char* end = is.head_ + length;
-    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
-    const char* endAligned = reinterpret_cast<const char*>(reinterpret_cast<size_t>(end) & static_cast<size_t>(~15));
-    if (nextAligned > end)
+    const char* p           = is.src_;
+    const char* end         = is.head_ + length;
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) &
+                                                            static_cast<size_t>(~15));
+    const char* endAligned =
+        reinterpret_cast<const char*>(reinterpret_cast<size_t>(end) & static_cast<size_t>(~15));
+    if(nextAligned > end)
         return true;
 
-    while (p != nextAligned)
-        if (*p < 0x20 || *p == '\"' || *p == '\\') {
+    while(p != nextAligned)
+        if(*p < 0x20 || *p == '\"' || *p == '\\')
+        {
             is.src_ = p;
             return RAPIDJSON_LIKELY(is.Tell() < length);
         }
@@ -672,39 +873,45 @@ inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, siz
     const uint8x16_t s2 = vmovq_n_u8('\b');
     const uint8x16_t s3 = vmovq_n_u8(32);
 
-    for (; p != endAligned; p += 16) {
-        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
-        uint8x16_t x = vceqq_u8(s, s0);
-        x = vorrq_u8(x, vceqq_u8(s, s1));
-        x = vorrq_u8(x, vceqq_u8(s, s2));
-        x = vorrq_u8(x, vcltq_u8(s, s3));
+    for(; p != endAligned; p += 16)
+    {
+        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t*>(p));
+        uint8x16_t x       = vceqq_u8(s, s0);
+        x                  = vorrq_u8(x, vceqq_u8(s, s1));
+        x                  = vorrq_u8(x, vceqq_u8(s, s2));
+        x                  = vorrq_u8(x, vcltq_u8(s, s3));
 
-        x = vrev64q_u8(x);                     // Rev in 64
-        uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
-        uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
+        x             = vrev64q_u8(x);                              // Rev in 64
+        uint64_t low  = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
+        uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
 
         SizeType len = 0;
         bool escaped = false;
-        if (low == 0) {
-            if (high != 0) {
+        if(low == 0)
+        {
+            if(high != 0)
+            {
                 uint32_t lz = internal::clzll(high);
-                len = 8 + (lz >> 3);
-                escaped = true;
+                len         = 8 + (lz >> 3);
+                escaped     = true;
             }
-        } else {
-            uint32_t lz = internal::clzll(low);
-            len = lz >> 3;
-            escaped = true;
         }
-        if (RAPIDJSON_UNLIKELY(escaped)) {   // some of characters is escaped
+        else
+        {
+            uint32_t lz = internal::clzll(low);
+            len         = lz >> 3;
+            escaped     = true;
+        }
+        if(RAPIDJSON_UNLIKELY(escaped))
+        { // some of characters is escaped
             char* q = reinterpret_cast<char*>(os_->PushUnsafe(len));
-            for (size_t i = 0; i < len; i++)
+            for(size_t i = 0; i < len; i++)
                 q[i] = p[i];
 
             p += len;
             break;
         }
-        vst1q_u8(reinterpret_cast<uint8_t *>(os_->PushUnsafe(16)), s);
+        vst1q_u8(reinterpret_cast<uint8_t*>(os_->PushUnsafe(16)), s);
     }
 
     is.src_ = p;
diff --git a/python/ck4inductor/__init__.py b/python/ck4inductor/__init__.py
index ac44aeb777..f4f71c2d60 100644
--- a/python/ck4inductor/__init__.py
+++ b/python/ck4inductor/__init__.py
@@ -8,12 +8,12 @@ def __version__():
         hash = subprocess.check_output("git rev-parse HEAD", shell=True, text=True)[
             :hash_width
         ]
-    except:
+    except Exception:
         hash = "0" * hash_width
     try:
         change_count = subprocess.check_output(
             f"git rev-list rocm-{rocm_version}..HEAD --count", shell=True, text=True
         ).strip()
-    except:
+    except Exception:
         change_count = "0"
     return f"{rocm_version}.dev{change_count}+g{hash}"
diff --git a/script/dependency-parser/main.py b/script/dependency-parser/main.py
index 5c956bca00..623ae05afd 100644
--- a/script/dependency-parser/main.py
+++ b/script/dependency-parser/main.py
@@ -14,43 +14,69 @@ Features:
 
 import argparse
 import sys
-import os
+
 
 def run_dependency_parser(args):
     from src.enhanced_ninja_parser import main as ninja_main
+
     sys.argv = ["enhanced_ninja_parser.py"] + args
     ninja_main()
 
+
 def run_selective_test_filter(args):
     from src.selective_test_filter import main as filter_main
+
     sys.argv = ["selective_test_filter.py"] + args
     filter_main()
 
+
 def main():
-    parser = argparse.ArgumentParser(description="Unified Ninja Dependency & Selective Testing Tool")
+    parser = argparse.ArgumentParser(
+        description="Unified Ninja Dependency & Selective Testing Tool"
+    )
     subparsers = parser.add_subparsers(dest="command", required=True)
 
     # Dependency parsing
-    parser_parse = subparsers.add_parser("parse", help="Parse build.ninja and generate dependency mapping")
+    parser_parse = subparsers.add_parser(
+        "parse", help="Parse build.ninja and generate dependency mapping"
+    )
     parser_parse.add_argument("build_ninja", help="Path to build.ninja")
-    parser_parse.add_argument("--ninja", help="Path to ninja executable", default="ninja")
-    parser_parse.add_argument("--workspace-root", help="Path to workspace root", default=None)
+    parser_parse.add_argument(
+        "--ninja", help="Path to ninja executable", default="ninja"
+    )
+    parser_parse.add_argument(
+        "--workspace-root", help="Path to workspace root", default=None
+    )
 
     # Selective testing
-    parser_test = subparsers.add_parser("select", help="Selective test filtering between git refs")
+    parser_test = subparsers.add_parser(
+        "select", help="Selective test filtering between git refs"
+    )
     parser_test.add_argument("depmap_json", help="Path to dependency mapping JSON")
     parser_test.add_argument("ref1", help="Source git ref")
     parser_test.add_argument("ref2", help="Target git ref")
-    parser_test.add_argument("--all", action="store_true", help="Include all executables")
-    parser_test.add_argument("--test-prefix", action="store_true", help="Only include executables starting with 'test_'")
-    parser_test.add_argument("--output", help="Output JSON file", default="tests_to_run.json")
+    parser_test.add_argument(
+        "--all", action="store_true", help="Include all executables"
+    )
+    parser_test.add_argument(
+        "--test-prefix",
+        action="store_true",
+        help="Only include executables starting with 'test_'",
+    )
+    parser_test.add_argument(
+        "--output", help="Output JSON file", default="tests_to_run.json"
+    )
 
     # Code auditing
-    parser_audit = subparsers.add_parser("audit", help="List all files and their dependent executables")
+    parser_audit = subparsers.add_parser(
+        "audit", help="List all files and their dependent executables"
+    )
     parser_audit.add_argument("depmap_json", help="Path to dependency mapping JSON")
 
     # Build optimization
-    parser_opt = subparsers.add_parser("optimize", help="List affected executables for changed files")
+    parser_opt = subparsers.add_parser(
+        "optimize", help="List affected executables for changed files"
+    )
     parser_opt.add_argument("depmap_json", help="Path to dependency mapping JSON")
     parser_opt.add_argument("changed_files", nargs="+", help="List of changed files")
 
@@ -73,9 +99,12 @@ def main():
     elif args.command == "audit":
         run_selective_test_filter([args.depmap_json, "--audit"])
     elif args.command == "optimize":
-        run_selective_test_filter([args.depmap_json, "--optimize-build"] + args.changed_files)
+        run_selective_test_filter(
+            [args.depmap_json, "--optimize-build"] + args.changed_files
+        )
     else:
         parser.print_help()
 
+
 if __name__ == "__main__":
     main()
diff --git a/script/dependency-parser/src/enhanced_ninja_parser.py b/script/dependency-parser/src/enhanced_ninja_parser.py
index 725768a61f..ff6344a4c1 100644
--- a/script/dependency-parser/src/enhanced_ninja_parser.py
+++ b/script/dependency-parser/src/enhanced_ninja_parser.py
@@ -14,96 +14,100 @@ import re
 import os
 import sys
 import subprocess
-from pathlib import Path
 from collections import defaultdict
 import json
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import threading
 
+
 class EnhancedNinjaDependencyParser:
     def __init__(self, build_file_path, ninja_executable="ninja"):
         self.build_file_path = build_file_path
         self.build_dir = os.path.dirname(build_file_path)
         self.ninja_executable = ninja_executable
-        
+
         # Core data structures
         self.executable_to_objects = {}  # exe -> [object_files]
-        self.object_to_source = {}       # object -> primary_source
-        self.object_to_all_deps = {}     # object -> [all_dependencies]
+        self.object_to_source = {}  # object -> primary_source
+        self.object_to_all_deps = {}  # object -> [all_dependencies]
         self.file_to_executables = defaultdict(set)  # file -> {executables}
-        
+
         # Thread safety
         self.lock = threading.Lock()
-        
+
     def parse_dependencies(self):
         """Main method to parse all dependencies."""
         print(f"Parsing ninja dependencies from: {self.build_file_path}")
-        
+
         # Step 1: Parse build file for executable -> object mappings
         self._parse_build_file()
-        
+
         # Step 2: Get all object files and their dependencies
         print(f"Found {len(self.object_to_source)} object files")
         print("Extracting detailed dependencies for all object files...")
         self._extract_object_dependencies()
-        
+
         # Step 3: Build the final file -> executables mapping
         self._build_file_to_executable_mapping()
-        
+
     def _parse_build_file(self):
         """Parse the ninja build file to extract executable -> object mappings."""
         print("Parsing ninja build file...")
-        
-        with open(self.build_file_path, 'r') as f:
+
+        with open(self.build_file_path, "r") as f:
             content = f.read()
-          # Parse executable build rules
-        exe_pattern = r'^build (bin/[^:]+):\s+\S+\s+([^|]+)'
-        obj_pattern = r'^build ([^:]+\.(?:cpp|cu|hip)\.o):\s+\S+\s+([^\s|]+)'
-        
-        lines = content.split('\n')
-        
+        # Parse executable build rules
+        exe_pattern = r"^build (bin/[^:]+):\s+\S+\s+([^|]+)"
+        obj_pattern = r"^build ([^:]+\.(?:cpp|cu|hip)\.o):\s+\S+\s+([^\s|]+)"
+
+        lines = content.split("\n")
+
         for line in lines:
             # Match executable rules
             exe_match = re.match(exe_pattern, line)
-            if exe_match and ('EXECUTABLE' in line or 'test_' in exe_match.group(1) or 'example_' in exe_match.group(1)):
+            if exe_match and (
+                "EXECUTABLE" in line
+                or "test_" in exe_match.group(1)
+                or "example_" in exe_match.group(1)
+            ):
                 exe = exe_match.group(1)
                 deps_part = exe_match.group(2).strip()
-                
+
                 object_files = []
                 for dep in deps_part.split():
-                    if dep.endswith('.o') and not dep.startswith('/'):
+                    if dep.endswith(".o") and not dep.startswith("/"):
                         object_files.append(dep)
-                
+
                 self.executable_to_objects[exe] = object_files
                 continue
-            
+
             # Match object compilation rules
             obj_match = re.match(obj_pattern, line)
             if obj_match:
                 object_file = obj_match.group(1)
                 source_file = obj_match.group(2)
                 self.object_to_source[object_file] = source_file
-                
+
         print(f"Found {len(self.executable_to_objects)} executables")
         print(f"Found {len(self.object_to_source)} object-to-source mappings")
-        
+
     def _extract_object_dependencies(self):
         """Extract detailed dependencies for all object files using ninja -t deps."""
         object_files = list(self.object_to_source.keys())
-          # Process object files in parallel for better performance
+        # Process object files in parallel for better performance
         if not object_files:
             print("No object files found - skipping dependency extraction")
             return
-            
+
         max_workers = min(16, len(object_files))  # Limit concurrent processes
-        
+
         with ThreadPoolExecutor(max_workers=max_workers) as executor:
             # Submit all object files for processing
             future_to_obj = {
-                executor.submit(self._get_object_dependencies, obj): obj 
+                executor.submit(self._get_object_dependencies, obj): obj
                 for obj in object_files
             }
-              # Process completed futures
+            # Process completed futures
             completed = 0
             for future in as_completed(future_to_obj):
                 obj_file = future_to_obj[future]
@@ -113,52 +117,52 @@ class EnhancedNinjaDependencyParser:
                         self.object_to_all_deps[obj_file] = dependencies
                         completed += 1
                         if completed % 100 == 0:
-                            print(f"Processed {completed}/{len(object_files)} object files...")
+                            print(
+                                f"Processed {completed}/{len(object_files)} object files..."
+                            )
                 except Exception as e:
                     print(f"Error processing {obj_file}: {e}")
-                    
-        print(f"Completed dependency extraction for {len(self.object_to_all_deps)} object files")
-        
+
+        print(
+            f"Completed dependency extraction for {len(self.object_to_all_deps)} object files"
+        )
+
     def _get_object_dependencies(self, object_file):
         """Get all dependencies for a single object file using ninja -t deps."""
         try:
             # Run ninja -t deps for this object file
             cmd = [self.ninja_executable, "-t", "deps", object_file]
             result = subprocess.run(
-                cmd, 
-                cwd=self.build_dir,
-                capture_output=True, 
-                text=True, 
-                timeout=30
+                cmd, cwd=self.build_dir, capture_output=True, text=True, timeout=30
             )
-            
+
             if result.returncode != 0:
                 return []
-                
+
             dependencies = []
-            lines = result.stdout.strip().split('\n')
-            
+            lines = result.stdout.strip().split("\n")
+
             for line in lines[1:]:  # Skip first line with metadata
                 line = line.strip()
-                if line and not line.startswith('#'):
+                if line and not line.startswith("#"):
                     # Convert absolute paths to relative paths from workspace root
                     dep_file = line
                     ws_root = getattr(self, "workspace_root", "..")
                     ws_prefix = ws_root.rstrip("/") + "/"
                     if dep_file.startswith(ws_prefix):
-                        dep_file = dep_file[len(ws_prefix):]
+                        dep_file = dep_file[len(ws_prefix) :]
                     dependencies.append(dep_file)
-                    
+
             return dependencies
-            
+
         except Exception as e:
             print(f"Error getting dependencies for {object_file}: {e}")
             return []
-    
+
     def _build_file_to_executable_mapping(self):
         """Build the final mapping from files to executables."""
         print("Building file-to-executable mapping...")
-        
+
         for exe, object_files in self.executable_to_objects.items():
             for obj_file in object_files:
                 # Add all dependencies of this object file
@@ -167,106 +171,135 @@ class EnhancedNinjaDependencyParser:
                         # Filter out system files and focus on project files
                         if self._is_project_file(dep_file):
                             self.file_to_executables[dep_file].add(exe)
-                            
+
         print(f"Built mapping for {len(self.file_to_executables)} files")
-        
+
         # Show statistics
-        multi_exe_files = {f: exes for f, exes in self.file_to_executables.items() if len(exes) > 1}
+        multi_exe_files = {
+            f: exes for f, exes in self.file_to_executables.items() if len(exes) > 1
+        }
         print(f"Files used by multiple executables: {len(multi_exe_files)}")
-        
+
         if multi_exe_files:
             print("Sample files with multiple dependencies:")
             for f, exes in sorted(multi_exe_files.items())[:5]:
                 print(f"  {f}: {len(exes)} executables")
-                
+
     def _is_project_file(self, file_path):
         """Determine if a file is part of the project (not system files)."""
         # Include files that are clearly part of the project
-        if any(file_path.startswith(prefix) for prefix in [
-            'include/', 'library/', 'test/', 'example/', 'src/', 'profiler/',
-            'build/include/', 'build/_deps/gtest', 'client_example', 'codegen', 'tile_engine'
-        ]):
+        if any(
+            file_path.startswith(prefix)
+            for prefix in [
+                "include/",
+                "library/",
+                "test/",
+                "example/",
+                "src/",
+                "profiler/",
+                "build/include/",
+                "build/_deps/gtest",
+                "client_example",
+                "codegen",
+                "tile_engine",
+            ]
+        ):
             return True
-            
+
         # Exclude system files
-        if any(file_path.startswith(prefix) for prefix in [
-            '/usr/', '/opt/rocm', '/lib/', '/system/', '/local/'
-        ]):
+        if any(
+            file_path.startswith(prefix)
+            for prefix in ["/usr/", "/opt/rocm", "/lib/", "/system/", "/local/"]
+        ):
             return False
-            
+
         # Include files with common source/header extensions
-        if file_path.endswith(('.cpp', '.hpp', '.h', '.c', '.cc', '.cxx', '.cu', '.hip', '.inc')):
+        if file_path.endswith(
+            (".cpp", ".hpp", ".h", ".c", ".cc", ".cxx", ".cu", ".hip", ".inc")
+        ):
             return True
-            
+
         return False
-          
+
     def export_to_csv(self, output_file):
         """Export the file-to-executable mapping to CSV with proper comma separation."""
         print(f"Exporting mapping to {output_file}")
-        
-        with open(output_file, 'w') as f:
+
+        with open(output_file, "w") as f:
             f.write("source_file,executables\n")
             for file_path in sorted(self.file_to_executables.keys()):
                 executables = sorted(self.file_to_executables[file_path])
                 # Use semicolon to separate multiple executables within the field
-                exe_list = ';'.join(executables)
+                exe_list = ";".join(executables)
                 f.write(f'"{file_path}","{exe_list}"\n')
-                
+
     def export_to_json(self, output_file):
         """Export the complete mapping to JSON."""
         print(f"Exporting complete mapping to {output_file}")
-        
+
         # Build reverse mapping (executable -> files)
         exe_to_files = defaultdict(set)
         for file_path, exes in self.file_to_executables.items():
             for exe in exes:
                 exe_to_files[exe].add(file_path)
-        
+
         mapping_data = {
-            'file_to_executables': {
-                file_path: list(exes) for file_path, exes in self.file_to_executables.items()
+            "file_to_executables": {
+                file_path: list(exes)
+                for file_path, exes in self.file_to_executables.items()
             },
-            'executable_to_files': {
+            "executable_to_files": {
                 exe: sorted(files) for exe, files in exe_to_files.items()
             },
-            'statistics': {
-                'total_files': len(self.file_to_executables),
-                'total_executables': len(self.executable_to_objects),
-                'total_object_files': len(self.object_to_source),
-                'files_with_multiple_executables': len([f for f, exes in self.file_to_executables.items() if len(exes) > 1])
-            }
+            "statistics": {
+                "total_files": len(self.file_to_executables),
+                "total_executables": len(self.executable_to_objects),
+                "total_object_files": len(self.object_to_source),
+                "files_with_multiple_executables": len(
+                    [f for f, exes in self.file_to_executables.items() if len(exes) > 1]
+                ),
+            },
         }
-        
-        with open(output_file, 'w') as f:
+
+        with open(output_file, "w") as f:
             json.dump(mapping_data, f, indent=2)
-            
+
     def print_summary(self):
-        """Print a summary of the parsed dependencies."""        
+        """Print a summary of the parsed dependencies."""
         print("\n=== Enhanced Dependency Mapping Summary ===")
         print(f"Total executables: {len(self.executable_to_objects)}")
         print(f"Total files mapped: {len(self.file_to_executables)}")
         print(f"Total object files processed: {len(self.object_to_all_deps)}")
-        
+
         # Files by type
-        cpp_files = sum(1 for f in self.file_to_executables.keys() if f.endswith('.cpp'))
-        hpp_files = sum(1 for f in self.file_to_executables.keys() if f.endswith('.hpp'))
-        h_files = sum(1 for f in self.file_to_executables.keys() if f.endswith('.h'))
-        
-        print(f"\nFile types:")
+        cpp_files = sum(
+            1 for f in self.file_to_executables.keys() if f.endswith(".cpp")
+        )
+        hpp_files = sum(
+            1 for f in self.file_to_executables.keys() if f.endswith(".hpp")
+        )
+        h_files = sum(1 for f in self.file_to_executables.keys() if f.endswith(".h"))
+
+        print("\nFile types:")
         print(f"  .cpp files: {cpp_files}")
         print(f"  .hpp files: {hpp_files}")
         print(f"  .h files: {h_files}")
-        
+
         # Multi-executable files
-        multi_exe_files = {f: exes for f, exes in self.file_to_executables.items() if len(exes) > 1}
+        multi_exe_files = {
+            f: exes for f, exes in self.file_to_executables.items() if len(exes) > 1
+        }
         print(f"\nFiles used by multiple executables: {len(multi_exe_files)}")
-        
+
         if multi_exe_files:
             print("\nTop files with most dependencies:")
-            sorted_multi = sorted(multi_exe_files.items(), key=lambda x: len(x[1]), reverse=True)
+            sorted_multi = sorted(
+                multi_exe_files.items(), key=lambda x: len(x[1]), reverse=True
+            )
             for file_path, exes in sorted_multi[:10]:
                 print(f"  {file_path}: {len(exes)} executables")
 
+
 def main():
     # Accept: build_file, ninja_path, workspace_root
     default_workspace_root = ".."
@@ -304,15 +337,16 @@ def main():
 
     # Export results
     output_dir = os.path.dirname(build_file)
-    csv_file = os.path.join(output_dir, 'enhanced_file_executable_mapping.csv')
-    json_file = os.path.join(output_dir, 'enhanced_dependency_mapping.json')
+    csv_file = os.path.join(output_dir, "enhanced_file_executable_mapping.csv")
+    json_file = os.path.join(output_dir, "enhanced_dependency_mapping.json")
 
     parser.export_to_csv(csv_file)
     parser.export_to_json(json_file)
 
-    print(f"\nResults exported to:")
+    print("\nResults exported to:")
     print(f"  CSV: {csv_file}")
     print(f"  JSON: {json_file}")
 
+
 if __name__ == "__main__":
     main()
diff --git a/script/dependency-parser/src/selective_test_filter.py b/script/dependency-parser/src/selective_test_filter.py
index e8698d115d..d3228ef624 100644
--- a/script/dependency-parser/src/selective_test_filter.py
+++ b/script/dependency-parser/src/selective_test_filter.py
@@ -30,12 +30,15 @@ import subprocess
 import json
 import os
 
+
 def get_changed_files(ref1, ref2):
     """Return a set of files changed between two git refs."""
     try:
         result = subprocess.run(
             ["git", "diff", "--name-only", ref1, ref2],
-            capture_output=True, text=True, check=True
+            capture_output=True,
+            text=True,
+            check=True,
         )
         files = set(line.strip() for line in result.stdout.splitlines() if line.strip())
         return files
@@ -43,6 +46,7 @@ def get_changed_files(ref1, ref2):
         print(f"Error running git diff: {e}")
         sys.exit(1)
 
+
 def load_depmap(depmap_json):
     """Load the dependency mapping JSON."""
     with open(depmap_json, "r") as f:
@@ -52,6 +56,7 @@ def load_depmap(depmap_json):
         return data["file_to_executables"]
     return data
 
+
 def select_tests(file_to_executables, changed_files, filter_mode):
     """Return a set of test executables affected by changed files."""
     affected = set()
@@ -64,6 +69,7 @@ def select_tests(file_to_executables, changed_files, filter_mode):
                     affected.add(exe)
     return sorted(affected)
 
+
 def main():
     if "--audit" in sys.argv:
         if len(sys.argv) < 2:
@@ -81,7 +87,9 @@ def main():
 
     if "--optimize-build" in sys.argv:
         if len(sys.argv) < 3:
-            print("Usage: python selective_test_filter.py <depmap_json> --optimize-build <changed_file1> [<changed_file2> ...]")
+            print(
+                "Usage: python selective_test_filter.py <depmap_json> --optimize-build <changed_file1> [<changed_file2> ...]"
+            )
             sys.exit(1)
         depmap_json = sys.argv[1]
         changed_files = set(sys.argv[sys.argv.index("--optimize-build") + 1 :])
@@ -100,7 +108,9 @@ def main():
         sys.exit(0)
 
     if len(sys.argv) < 4:
-        print("Usage: python selective_test_filter.py <depmap_json> <ref1> <ref2> [--all | --test-prefix] [--output <output_json>]")
+        print(
+            "Usage: python selective_test_filter.py <depmap_json> <ref1> <ref2> [--all | --test-prefix] [--output <output_json>]"
+        )
         sys.exit(1)
 
     depmap_json = sys.argv[1]
@@ -131,9 +141,12 @@ def main():
         tests = select_tests(file_to_executables, changed_files, filter_mode)
 
     with open(output_json, "w") as f:
-        json.dump({"tests_to_run": tests, "changed_files": sorted(changed_files)}, f, indent=2)
+        json.dump(
+            {"tests_to_run": tests, "changed_files": sorted(changed_files)}, f, indent=2
+        )
 
     print(f"Exported {len(tests)} tests to run to {output_json}")
 
+
 if __name__ == "__main__":
     main()
diff --git a/script/ninja_json_converter.py b/script/ninja_json_converter.py
index 7bfb2f867b..e68f7ccfa3 100644
--- a/script/ninja_json_converter.py
+++ b/script/ninja_json_converter.py
@@ -12,38 +12,38 @@ import os
 import re
 import sys
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Iterator
+from typing import Dict, List, Optional, Iterator
 
 
 class BuildTarget:
     """Represents a single build target with timing information."""
-    
+
     def __init__(self, start_time: int, end_time: int, output_name: str, cmd_hash: str):
         self.start_time = int(start_time)
         self.end_time = int(end_time)
         self.cmd_hash = cmd_hash
         self.duration = self.end_time - self.start_time
         self.targets = [output_name]  # List of target names for this command hash
-        
+
     @property
     def category(self) -> str:
         """Categorize the build target based on file extension."""
         # Use the first target for categorization
         primary_target = self.targets[0] if self.targets else ""
         ext = Path(primary_target).suffix.lower()
-        if ext in ['.o', '.obj']:
-            return 'compile'
-        elif ext in ['.a', '.lib']:
-            return 'archive'
-        elif ext in ['.so', '.dll', '.dylib']:
-            return 'link_shared'
-        elif ext in ['.exe', '.out']:
-            return 'link_executable'
-        elif 'test' in primary_target.lower():
-            return 'test'
+        if ext in [".o", ".obj"]:
+            return "compile"
+        elif ext in [".a", ".lib"]:
+            return "archive"
+        elif ext in [".so", ".dll", ".dylib"]:
+            return "link_shared"
+        elif ext in [".exe", ".out"]:
+            return "link_executable"
+        elif "test" in primary_target.lower():
+            return "test"
         else:
-            return 'other'
-    
+            return "other"
+
     @property
     def output_name(self) -> str:
         """Get the primary output name (for backward compatibility)."""
@@ -52,11 +52,11 @@ class BuildTarget:
 
 class ThreadScheduler:
     """Simulates thread allocation for parallelism analysis."""
-    
+
     def __init__(self, legacy_mode: bool = False):
         self.workers: List[int] = []
         self.legacy_mode = legacy_mode
-        
+
     def allocate_thread(self, target: BuildTarget) -> int:
         """Allocate a thread for the given target."""
         if self.legacy_mode:
@@ -73,7 +73,7 @@ class ThreadScheduler:
                 if worker_end_time <= target.start_time:
                     self.workers[i] = target.end_time
                     return i
-            
+
             # No available worker, create a new one
             self.workers.append(target.end_time)
             return len(self.workers) - 1
@@ -81,62 +81,67 @@ class ThreadScheduler:
 
 class NinjaLogParser:
     """Parser for ninja build log files."""
-    
+
     def __init__(self, show_all_builds: bool = False):
         self.show_all_builds = show_all_builds
-        
+
     def parse_log_file(self, log_path: str) -> List[BuildTarget]:
         """Parse the ninja log file and return build targets."""
         if not os.path.exists(log_path):
             raise FileNotFoundError(f"Ninja log file not found: {log_path}")
-            
-        with open(log_path, 'r', encoding='utf-8') as file:
+
+        with open(log_path, "r", encoding="utf-8") as file:
             lines = file.readlines()
-            
+
         if not lines:
             raise ValueError("Empty ninja log file")
-            
+
         # Parse and validate header
         header = lines[0].strip()
-        version_match = re.match(r'^# ninja log v(\d+)$', header)
+        version_match = re.match(r"^# ninja log v(\d+)$", header)
         if not version_match:
             raise ValueError(f"Invalid ninja log header: {header}")
-            
+
         version = int(version_match.group(1))
         if version < 5:
             raise ValueError(f"Unsupported ninja log version: {version}")
-            
+
         # Skip additional header line for version 6
         start_line = 2 if version > 5 else 1
-        
+
         targets: Dict[str, BuildTarget] = {}
         last_end_time = 0
-        
+
         for line_num, line in enumerate(lines[start_line:], start=start_line + 1):
             line = line.strip()
-            
+
             # Skip empty lines and comments
-            if not line or line.startswith('#'):
+            if not line or line.startswith("#"):
                 continue
-                
-            parts = line.split('\t')
+
+            parts = line.split("\t")
             if len(parts) < 5:
-                print(f"Warning: Skipping malformed line {line_num}: {line}", file=sys.stderr)
+                print(
+                    f"Warning: Skipping malformed line {line_num}: {line}",
+                    file=sys.stderr,
+                )
                 continue
-                
+
             try:
                 start_time, end_time, _, output_name, cmd_hash = parts[:5]
                 start_time, end_time = int(start_time), int(end_time)
-                
+
                 # Handle incremental builds
                 if not self.show_all_builds and end_time < last_end_time:
                     targets.clear()
-                    
+
                 last_end_time = end_time
-                
+
                 # Group targets by command hash
                 if cmd_hash not in targets:
-                    targets[cmd_hash] = BuildTarget(start_time, end_time, output_name, cmd_hash)
+                    targets[cmd_hash] = BuildTarget(
+                        start_time, end_time, output_name, cmd_hash
+                    )
                 else:
                     # Update with the latest timing and add output
                     existing = targets[cmd_hash]
@@ -144,223 +149,260 @@ class NinjaLogParser:
                     existing.end_time = max(existing.end_time, end_time)
                     existing.duration = existing.end_time - existing.start_time
                     existing.targets.append(output_name)
-                    
+
             except (ValueError, IndexError) as e:
                 print(f"Warning: Error parsing line {line_num}: {e}", file=sys.stderr)
                 continue
-                
+
         return sorted(targets.values(), key=lambda t: t.end_time, reverse=True)
 
 
 class FTimeTraceReader:
     """Reads and processes Clang -ftime-trace JSON files."""
-    
+
     def __init__(self, granularity_us: int = 50000):
         self.granularity_us = granularity_us
-        
+
     def read_trace_file(self, trace_path: str) -> Optional[Dict]:
         """Read and parse a Clang time trace file."""
         try:
-            with open(trace_path, 'r', encoding='utf-8') as f:
+            with open(trace_path, "r", encoding="utf-8") as f:
                 return json.load(f)
         except (FileNotFoundError, json.JSONDecodeError, IOError):
             return None
-            
+
     def filter_events(self, trace_data: Dict) -> List[Dict]:
         """Filter trace events based on criteria."""
-        if 'traceEvents' not in trace_data:
+        if "traceEvents" not in trace_data:
             return []
-            
+
         filtered_events = []
-        for event in trace_data['traceEvents']:
+        for event in trace_data["traceEvents"]:
             # Only include complete events (ph=X) that meet duration threshold
-            if (event.get('ph') == 'X' and 
-                event.get('dur', 0) >= self.granularity_us and
-                not event.get('name', '').startswith('Total')):
+            if (
+                event.get("ph") == "X"
+                and event.get("dur", 0) >= self.granularity_us
+                and not event.get("name", "").startswith("Total")
+            ):
                 filtered_events.append(event)
-                
+
         return filtered_events
-        
-    def adjust_event_timing(self, event: Dict, target: BuildTarget, pid: int, tid: int) -> Dict:
+
+    def adjust_event_timing(
+        self, event: Dict, target: BuildTarget, pid: int, tid: int
+    ) -> Dict:
         """Adjust event timing to align with ninja build timing."""
         ninja_duration_us = target.duration * 1000
-        
+
         # Validate event duration against ninja timing
-        if event.get('dur', 0) > ninja_duration_us:
-            print(f"Warning: Clang trace event duration ({event['dur']}μs) exceeds "
-                  f"ninja duration ({ninja_duration_us}μs) for {target.output_name}", 
-                  file=sys.stderr)
+        if event.get("dur", 0) > ninja_duration_us:
+            print(
+                f"Warning: Clang trace event duration ({event['dur']}μs) exceeds "
+                f"ninja duration ({ninja_duration_us}μs) for {target.output_name}",
+                file=sys.stderr,
+            )
             return None
-            
+
         # Adjust event timing
         adjusted_event = event.copy()
-        adjusted_event['pid'] = pid
-        adjusted_event['tid'] = tid
-        adjusted_event['ts'] += target.start_time * 1000  # Offset by ninja start time
-        
+        adjusted_event["pid"] = pid
+        adjusted_event["tid"] = tid
+        adjusted_event["ts"] += target.start_time * 1000  # Offset by ninja start time
+
         return adjusted_event
 
+
 class ChromeTraceGenerator:
     """Generates Chrome tracing format from build targets."""
-    
-    def __init__(self, process_id: int = 1, embed_ftime_traces: bool = False, 
-                 granularity_us: int = 50000, ninja_log_dir: Optional[str] = None,
-                 legacy_format: bool = False):
+
+    def __init__(
+        self,
+        process_id: int = 1,
+        embed_ftime_traces: bool = False,
+        granularity_us: int = 50000,
+        ninja_log_dir: Optional[str] = None,
+        legacy_format: bool = False,
+    ):
         self.process_id = process_id
         self.scheduler = ThreadScheduler(legacy_mode=legacy_format)
         self.embed_ftime_traces = embed_ftime_traces
         self.ninja_log_dir = ninja_log_dir
-        self.ftime_reader = FTimeTraceReader(granularity_us) if embed_ftime_traces else None
+        self.ftime_reader = (
+            FTimeTraceReader(granularity_us) if embed_ftime_traces else None
+        )
         self.legacy_format = legacy_format
-        
+
     def find_ftime_trace_files(self, target: BuildTarget) -> List[str]:
         """Find Clang -ftime-trace files for a build target."""
         if not self.ninja_log_dir:
             return []
-            
+
         trace_files = []
-        
+
         # Look for .json files adjacent to object files
         obj_path = Path(self.ninja_log_dir) / target.output_name
-        json_path = obj_path.with_suffix('.json')
-        
+        json_path = obj_path.with_suffix(".json")
+
         if json_path.exists():
             trace_files.append(str(json_path))
-            
+
         return trace_files
-        
+
     def generate_ftime_events(self, target: BuildTarget, tid: int) -> Iterator[Dict]:
         """Generate Clang -ftime-trace events for a target."""
         if not self.embed_ftime_traces or not self.ftime_reader:
             return
-            
+
         trace_files = self.find_ftime_trace_files(target)
-        
+
         for trace_file in trace_files:
             trace_data = self.ftime_reader.read_trace_file(trace_file)
             if not trace_data:
                 continue
-                
+
             filtered_events = self.ftime_reader.filter_events(trace_data)
-            
+
             for event in filtered_events:
                 adjusted_event = self.ftime_reader.adjust_event_timing(
                     event, target, self.process_id, tid
                 )
                 if adjusted_event:
                     yield adjusted_event
-        
+
     def generate_trace_events(self, targets: List[BuildTarget]) -> List[Dict]:
         """Generate Chrome trace events from build targets."""
         events = []
-        
+
         for target in targets:
             thread_id = self.scheduler.allocate_thread(target)
-            
+
             # Add main ninja build event
             if self.legacy_format:
                 # Legacy format: join multiple targets with commas, use "targets" category, empty args
-                target_name = ', '.join(target.targets) if len(target.targets) > 1 else target.output_name
+                target_name = (
+                    ", ".join(target.targets)
+                    if len(target.targets) > 1
+                    else target.output_name
+                )
                 ninja_event = {
-                    'name': target_name,
-                    'cat': 'targets',
-                    'ph': 'X',  # Complete event
-                    'ts': target.start_time * 1000,  # Convert to microseconds
-                    'dur': target.duration * 1000,   # Convert to microseconds
-                    'pid': self.process_id,
-                    'tid': thread_id,
-                    'args': {}
+                    "name": target_name,
+                    "cat": "targets",
+                    "ph": "X",  # Complete event
+                    "ts": target.start_time * 1000,  # Convert to microseconds
+                    "dur": target.duration * 1000,  # Convert to microseconds
+                    "pid": self.process_id,
+                    "tid": thread_id,
+                    "args": {},
                 }
             else:
                 # New format: smart categorization, detailed args
                 ninja_event = {
-                    'name': target.output_name,
-                    'cat': target.category,
-                    'ph': 'X',  # Complete event
-                    'ts': target.start_time * 1000,  # Convert to microseconds
-                    'dur': target.duration * 1000,   # Convert to microseconds
-                    'pid': self.process_id,
-                    'tid': thread_id,
-                    'args': {
-                        'output': target.output_name,
-                        'duration_ms': target.duration,
-                        'cmd_hash': target.cmd_hash
-                    }
+                    "name": target.output_name,
+                    "cat": target.category,
+                    "ph": "X",  # Complete event
+                    "ts": target.start_time * 1000,  # Convert to microseconds
+                    "dur": target.duration * 1000,  # Convert to microseconds
+                    "pid": self.process_id,
+                    "tid": thread_id,
+                    "args": {
+                        "output": target.output_name,
+                        "duration_ms": target.duration,
+                        "cmd_hash": target.cmd_hash,
+                    },
                 }
             events.append(ninja_event)
-            
+
             # Add embedded Clang -ftime-trace events
             if self.embed_ftime_traces:
                 ftime_events = list(self.generate_ftime_events(target, thread_id))
                 events.extend(ftime_events)
-                
+
                 if ftime_events:
-                    print(f"Embedded {len(ftime_events)} -ftime-trace events for {target.output_name}", 
-                          file=sys.stderr)
-            
+                    print(
+                        f"Embedded {len(ftime_events)} -ftime-trace events for {target.output_name}",
+                        file=sys.stderr,
+                    )
+
         return events
 
 
 class BuildAnalyzer:
     """Analyzes build performance and provides statistics."""
-    
+
     def __init__(self, targets: List[BuildTarget]):
         self.targets = targets
-        
+
     def get_build_summary(self) -> Dict:
         """Generate build performance summary."""
         if not self.targets:
             return {}
-            
+
         total_duration = sum(t.duration for t in self.targets)
         total_targets = len(self.targets)
-        
+
         # Category statistics
         category_stats = {}
         for target in self.targets:
             cat = target.category
             if cat not in category_stats:
-                category_stats[cat] = {'count': 0, 'total_time': 0}
-            category_stats[cat]['count'] += 1
-            category_stats[cat]['total_time'] += target.duration
-            
+                category_stats[cat] = {"count": 0, "total_time": 0}
+            category_stats[cat]["count"] += 1
+            category_stats[cat]["total_time"] += target.duration
+
         # Top slowest targets
-        slowest_targets = sorted(self.targets, key=lambda t: t.duration, reverse=True)[:10]
-        
+        slowest_targets = sorted(self.targets, key=lambda t: t.duration, reverse=True)[
+            :10
+        ]
+
         return {
-            'total_targets': total_targets,
-            'total_duration_ms': total_duration,
-            'total_duration_sec': total_duration / 1000,
-            'average_duration_ms': total_duration / total_targets if total_targets > 0 else 0,
-            'category_stats': category_stats,
-            'slowest_targets': [
-                {'name': t.output_name, 'duration_ms': t.duration, 'category': t.category}
+            "total_targets": total_targets,
+            "total_duration_ms": total_duration,
+            "total_duration_sec": total_duration / 1000,
+            "average_duration_ms": total_duration / total_targets
+            if total_targets > 0
+            else 0,
+            "category_stats": category_stats,
+            "slowest_targets": [
+                {
+                    "name": t.output_name,
+                    "duration_ms": t.duration,
+                    "category": t.category,
+                }
                 for t in slowest_targets
-            ]
+            ],
         }
-        
+
     def print_summary(self):
         """Print build summary to stderr."""
         summary = self.get_build_summary()
         if not summary:
             print("No build data available", file=sys.stderr)
             return
-            
-        print(f"\n=== Build Summary ===", file=sys.stderr)
+
+        print("\n=== Build Summary ===", file=sys.stderr)
         print(f"Total targets: {summary['total_targets']}", file=sys.stderr)
         print(f"Total time: {summary['total_duration_sec']:.2f}s", file=sys.stderr)
-        print(f"Average time per target: {summary['average_duration_ms']:.2f}ms", file=sys.stderr)
-        
-        print(f"\nBy category:", file=sys.stderr)
-        for category, stats in summary['category_stats'].items():
-            avg_time = stats['total_time'] / stats['count'] if stats['count'] > 0 else 0
-            print(f"  {category:15} {stats['count']:6} targets "
-                  f"{stats['total_time']/1000:8.2f}s "
-                  f"(avg: {avg_time/1000:.3f}s)", file=sys.stderr)
-                  
-        print(f"\nSlowest targets:", file=sys.stderr)
-        for i, target in enumerate(summary['slowest_targets'][:5], 1):
-            print(f"  {i:2}. {target['name']} ({target['duration_ms']}ms, {target['category']})", file=sys.stderr)
+        print(
+            f"Average time per target: {summary['average_duration_ms']:.2f}ms",
+            file=sys.stderr,
+        )
+
+        print("\nBy category:", file=sys.stderr)
+        for category, stats in summary["category_stats"].items():
+            avg_time = stats["total_time"] / stats["count"] if stats["count"] > 0 else 0
+            print(
+                f"  {category:15} {stats['count']:6} targets "
+                f"{stats['total_time'] / 1000:8.2f}s "
+                f"(avg: {avg_time / 1000:.3f}s)",
+                file=sys.stderr,
+            )
+
+        print("\nSlowest targets:", file=sys.stderr)
+        for i, target in enumerate(summary["slowest_targets"][:5], 1):
+            print(
+                f"  {i:2}. {target['name']} ({target['duration_ms']}ms, {target['category']})",
+                file=sys.stderr,
+            )
 
 
 def create_argument_parser() -> argparse.ArgumentParser:
@@ -376,57 +418,48 @@ Examples:
   %(prog)s build/.ninja_log --show-all      # Include all builds
   %(prog)s build/.ninja_log --embed-ftime-trace  # Include Clang timing data
   %(prog)s build/.ninja_log --granularity 10000  # Custom granularity threshold
-        """
+        """,
     )
-    
+
     parser.add_argument(
-        'ninja_logs',
-        nargs='+',  # Accept one or more ninja log files
-        help='Path(s) to the .ninja_log file(s)'
+        "ninja_logs",
+        nargs="+",  # Accept one or more ninja log files
+        help="Path(s) to the .ninja_log file(s)",
     )
-    
+
+    parser.add_argument("-o", "--output", help="Output file (default: stdout)")
+
     parser.add_argument(
-        '-o', '--output',
-        help='Output file (default: stdout)'
+        "--show-all", action="store_true", help="Show all builds, not just the last one"
     )
-    
+
     parser.add_argument(
-        '--show-all',
-        action='store_true',
-        help='Show all builds, not just the last one'
+        "--summary", action="store_true", help="Print build summary to stderr"
     )
-    
+
     parser.add_argument(
-        '--summary',
-        action='store_true',
-        help='Print build summary to stderr'
+        "--pretty", action="store_true", help="Pretty-print JSON output"
     )
-    
+
     parser.add_argument(
-        '--pretty',
-        action='store_true',
-        help='Pretty-print JSON output'
+        "--embed-ftime-trace",
+        action="store_true",
+        help="Embed Clang -ftime-trace JSON files found adjacent to targets",
     )
-    
+
     parser.add_argument(
-        '--embed-ftime-trace',
-        action='store_true',
-        help='Embed Clang -ftime-trace JSON files found adjacent to targets'
-    )
-    
-    parser.add_argument(
-        '--granularity',
+        "--granularity",
         type=int,
         default=50000,
-        help='Minimum duration for -ftime-trace events in microseconds (default: 50000)'
+        help="Minimum duration for -ftime-trace events in microseconds (default: 50000)",
     )
-    
+
     parser.add_argument(
-        '--legacy-format',
-        action='store_true',
-        help='Output in legacy format compatible with old ninjatracer (simple JSON array, all categories as "targets", empty args)'
+        "--legacy-format",
+        action="store_true",
+        help='Output in legacy format compatible with old ninjatracer (simple JSON array, all categories as "targets", empty args)',
     )
-    
+
     return parser
 
 
@@ -434,75 +467,79 @@ def main():
     """Main entry point."""
     parser = create_argument_parser()
     args = parser.parse_args()
-    
+
     try:
         # Process multiple ninja log files
         all_events = []
-        
+
         for pid, ninja_log_path in enumerate(args.ninja_logs):
             # Parse ninja log
             log_parser = NinjaLogParser(show_all_builds=args.show_all)
             targets = log_parser.parse_log_file(ninja_log_path)
-            
+
             if not targets:
-                print(f"No build targets found in ninja log: {ninja_log_path}", file=sys.stderr)
+                print(
+                    f"No build targets found in ninja log: {ninja_log_path}",
+                    file=sys.stderr,
+                )
                 continue
-                
+
             # Determine ninja log directory for -ftime-trace files
-            ninja_log_dir = os.path.dirname(os.path.abspath(ninja_log_path)) if args.embed_ftime_trace else None
-            
+            ninja_log_dir = (
+                os.path.dirname(os.path.abspath(ninja_log_path))
+                if args.embed_ftime_trace
+                else None
+            )
+
             # Generate trace events for this log file
             trace_generator = ChromeTraceGenerator(
                 process_id=pid,  # Use different PID for each log file
                 embed_ftime_traces=args.embed_ftime_trace,
                 granularity_us=args.granularity,
                 ninja_log_dir=ninja_log_dir,
-                legacy_format=args.legacy_format
+                legacy_format=args.legacy_format,
             )
             events = trace_generator.generate_trace_events(targets)
             all_events.extend(events)
-            
+
             # Print summary if requested (for each log file)
             if args.summary:
                 print(f"\n=== Summary for {ninja_log_path} ===", file=sys.stderr)
                 analyzer = BuildAnalyzer(targets)
                 analyzer.print_summary()
-        
+
         if not all_events:
             print("No build targets found in any ninja log files", file=sys.stderr)
             return 1
-        
+
         # Output format logic
         if args.legacy_format:
             # Legacy format: always output simple JSON array
-            json_kwargs = {'indent': 2} if args.pretty else {}
+            json_kwargs = {"indent": 2} if args.pretty else {}
             json_output = json.dumps(all_events, **json_kwargs)
         elif args.output or args.pretty:
             # Enhanced format with metadata (when saving to file or pretty printing)
             trace_data = {
-                'traceEvents': all_events,
-                'displayTimeUnit': 'ms',
-                'systemTraceEvents': 'SystemTraceData',
-                'otherData': {
-                    'version': '1.0',
-                    'generator': 'ninja_json_converter.py'
-                }
+                "traceEvents": all_events,
+                "displayTimeUnit": "ms",
+                "systemTraceEvents": "SystemTraceData",
+                "otherData": {"version": "1.0", "generator": "ninja_json_converter.py"},
             }
-            json_kwargs = {'indent': 2} if args.pretty else {}
+            json_kwargs = {"indent": 2} if args.pretty else {}
             json_output = json.dumps(trace_data, **json_kwargs)
         else:
             # Original format (simple JSON array to stdout)
             json_output = json.dumps(all_events)
-        
+
         if args.output:
-            with open(args.output, 'w') as f:
+            with open(args.output, "w") as f:
                 f.write(json_output)
             print(f"Trace written to {args.output}", file=sys.stderr)
         else:
             print(json_output)
-            
+
         return 0
-        
+
     except Exception as e:
         print(f"Error: {e}", file=sys.stderr)
         return 1
diff --git a/script/process_perf_data.py b/script/process_perf_data.py
index 2dd54fa62d..b35ba64041 100644
--- a/script/process_perf_data.py
+++ b/script/process_perf_data.py
@@ -1,13 +1,16 @@
 #!/usr/bin/env python3
-import os, io, argparse, datetime
-#import numpy as np
+import os
+import io
+import argparse
+import datetime
+
+# import numpy as np
 import sqlalchemy
-from sqlalchemy.types import NVARCHAR, Float, Integer
 from sqlalchemy import text
-import pymysql
 import pandas as pd
 from sshtunnel import SSHTunnelForwarder
 
+
 def print_to_string(*args, **kwargs):
     output = io.StringIO()
     print(*args, file=output, **kwargs)
@@ -15,15 +18,18 @@ def print_to_string(*args, **kwargs):
     output.close()
     return contents
 
+
 def parse_args():
-    parser = argparse.ArgumentParser(description='Parse results from tf benchmark runs')
-    parser.add_argument('filename', type=str, help='Log file to prase or directory containing log files')
+    parser = argparse.ArgumentParser(description="Parse results from tf benchmark runs")
+    parser.add_argument(
+        "filename", type=str, help="Log file to prase or directory containing log files"
+    )
     args = parser.parse_args()
     files = []
     if os.path.isdir(args.filename):
         all_files = os.listdir(args.filename)
         for name in all_files:
-            if not 'log' in name:
+            if "log" not in name:
                 continue
             files.append(os.path.join(args.filename, name))
     else:
@@ -31,62 +37,76 @@ def parse_args():
     args.files = files
     return args
 
+
 def get_log_params(logfile):
-    print("logfile=",logfile)
-    branch_name=' '
-    node_id=' '
-    gpu_arch=' '
-    hip_vers=' '
-    compute_units=0
-    environment=' '
-    rocm_vers=' '
+    print("logfile=", logfile)
+    branch_name = " "
+    node_id = " "
+    gpu_arch = " "
+    hip_vers = " "
+    compute_units = 0
+    environment = " "
+    rocm_vers = " "
     for line in open(logfile):
-        if 'Branch name' in line:
-            lst=line.split()
-            branch_name=lst[2]
-        if 'On branch' in line:
-            lst=line.split()
-            branch_name=lst[2]
-        if 'Node name' in line:
-            lst=line.split()
-            node_id=lst[2]
-        if 'GPU_arch' in line:
-            lst=line.split()
-            gpu_arch=lst[2]
-        if 'HIP version' in line:
-            lst=line.split()
-            hip_vers=lst[2]
-        if 'Compute Unit' in line:
-            lst=line.split()
-            compute_units=lst[2]
-        if 'Environment type' in line:
-            lst=line.split()
-            environment=lst[2]
-        if 'InstalledDir' in line:
-            lst=line.split()
-            rocm_vers=lst[1][lst[1].find('/opt/rocm-')+len('/opt/rocm-'):lst[1].rfind('/llvm/bin')]
-    return branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment
+        if "Branch name" in line:
+            lst = line.split()
+            branch_name = lst[2]
+        if "On branch" in line:
+            lst = line.split()
+            branch_name = lst[2]
+        if "Node name" in line:
+            lst = line.split()
+            node_id = lst[2]
+        if "GPU_arch" in line:
+            lst = line.split()
+            gpu_arch = lst[2]
+        if "HIP version" in line:
+            lst = line.split()
+            hip_vers = lst[2]
+        if "Compute Unit" in line:
+            lst = line.split()
+            compute_units = lst[2]
+        if "Environment type" in line:
+            lst = line.split()
+            environment = lst[2]
+        if "InstalledDir" in line:
+            lst = line.split()
+            rocm_vers = lst[1][
+                lst[1].find("/opt/rocm-") + len("/opt/rocm-") : lst[1].rfind(
+                    "/llvm/bin"
+                )
+            ]
+    return (
+        branch_name,
+        node_id,
+        gpu_arch,
+        compute_units,
+        rocm_vers,
+        hip_vers,
+        environment,
+    )
+
 
 def parse_logfile(logfile):
-    glue=''
-    res=[]
-    tests=[]
-    kernels=[]
-    tflops=[]
-    dtype=[]
-    alayout=[]
-    blayout=[]
-    M=[]
-    N=[]
-    K=[]
-    StrideA=[]
-    StrideB=[]
-    StrideC=[]
-    if 'perf_gemm' in logfile and 'gemm_bilinear' not in logfile:
+    glue = ""
+    res = []
+    tests = []
+    kernels = []
+    tflops = []
+    dtype = []
+    alayout = []
+    blayout = []
+    M = []
+    N = []
+    K = []
+    StrideA = []
+    StrideB = []
+    StrideC = []
+    if "perf_gemm" in logfile and "gemm_bilinear" not in logfile:
         for line in open(logfile):
-            if 'Best Perf' in line:
-                lst=line.split()
-                if len(lst)>=37: #the line is complete
+            if "Best Perf" in line:
+                lst = line.split()
+                if len(lst) >= 37:  # the line is complete
                     tests.append(glue.join(lst[5:30]))
                     kernels.append(glue.join(lst[37:]))
                     tflops.append(lst[33])
@@ -99,7 +119,7 @@ def parse_logfile(logfile):
                     StrideA.append(lst[23])
                     StrideB.append(lst[26])
                     StrideC.append(lst[29])
-                elif len(lst)<37 and len(lst)>=33: #the tflops are available
+                elif len(lst) < 37 and len(lst) >= 33:  # the tflops are available
                     tests.append(glue.join(lst[5:30]))
                     kernels.append("N/A")
                     tflops.append(lst[33])
@@ -112,87 +132,141 @@ def parse_logfile(logfile):
                     StrideA.append(lst[23])
                     StrideB.append(lst[26])
                     StrideC.append(lst[29])
-                    print("warning: incomplete line:",lst)
-                elif len(lst)<33: #even the tflops are not available
+                    print("warning: incomplete line:", lst)
+                elif len(lst) < 33:  # even the tflops are not available
                     print("Error in ckProfiler output!")
-                    print("warning: incomplete line=",lst)
-        #sort results
-        #sorted_tests = sorted(tests)
-        res = [x for _,x in sorted(zip(tests,tflops))]
-        #sorted_kernels = [x for _,x in sorted(zip(tests,kernels))]
-        test_list=list(range(1,len(tests)+1))
-    #parse conv_fwd and conv_bwd performance tests:
-    elif 'conv_fwd' in logfile or 'conv_bwd' in logfile:
+                    print("warning: incomplete line=", lst)
+        # sort results
+        # sorted_tests = sorted(tests)
+        res = [x for _, x in sorted(zip(tests, tflops))]
+        # sorted_kernels = [x for _,x in sorted(zip(tests,kernels))]
+        # test_list = list(range(1, len(tests) + 1))
+    # parse conv_fwd and conv_bwd performance tests:
+    elif "conv_fwd" in logfile or "conv_bwd" in logfile:
         for line in open(logfile):
-            if 'tflops:' in line:
-                lst=line.split()
+            if "tflops:" in line:
+                lst = line.split()
                 res.append(lst[1])
-    #parse all other performance tests:
-    elif 'resnet50' in logfile or 'batched_gemm' in logfile or 'grouped_gemm' in logfile  or 'gemm_bilinear' in logfile or 'reduction' in logfile:
+    # parse all other performance tests:
+    elif (
+        "resnet50" in logfile
+        or "batched_gemm" in logfile
+        or "grouped_gemm" in logfile
+        or "gemm_bilinear" in logfile
+        or "reduction" in logfile
+    ):
         for line in open(logfile):
-            if 'Best Perf' in line:
-                lst=line.split()
+            if "Best Perf" in line:
+                lst = line.split()
                 res.append(lst[4])
-    elif 'onnx_gemm' in logfile:
+    elif "onnx_gemm" in logfile:
         for line in open(logfile):
-            if 'Best Perf' in line:
-                lst=line.split()
+            if "Best Perf" in line:
+                lst = line.split()
                 res.append(lst[33])
-    elif 'splitK_gemm' in logfile or 'mixed_gemm' in logfile:
+    elif "splitK_gemm" in logfile or "mixed_gemm" in logfile:
         for line in open(logfile):
-            if 'Best Perf' in line:
-                lst=line.split()
+            if "Best Perf" in line:
+                lst = line.split()
                 res.append(lst[36])
-    elif 'perf_fmha' in logfile:
+    elif "perf_fmha" in logfile:
         for line in open(logfile):
-            if 'TFlops' in line:
-                lst=line.split()
-                line_dict=dict(zip(lst[1:],lst))
-                res.append(line_dict['TFlops,'])
-    elif 'perf_tile_gemm_basic' in logfile or 'perf_tile_gemm_mem_pipeline' in logfile:
+            if "TFlops" in line:
+                lst = line.split()
+                line_dict = dict(zip(lst[1:], lst))
+                res.append(line_dict["TFlops,"])
+    elif "perf_tile_gemm_basic" in logfile or "perf_tile_gemm_mem_pipeline" in logfile:
         for line in open(logfile):
-            if 'TFlops' in line:
-                lst=line.split()
-                line_dict=dict(zip(lst[1:],lst))
-                res.append(line_dict['TFlops,'])
+            if "TFlops" in line:
+                lst = line.split()
+                line_dict = dict(zip(lst[1:], lst))
+                res.append(line_dict["TFlops,"])
     return res
 
 
 def get_baseline(table, connection):
-    query = text('''SELECT * from '''+table+''' WHERE Datetime = (SELECT MAX(Datetime) FROM '''+table+''' where Branch_ID='develop' );''')
+    query = text(
+        """SELECT * from """
+        + table
+        + """ WHERE Datetime = (SELECT MAX(Datetime) FROM """
+        + table
+        + """ where Branch_ID='develop' );"""
+    )
     return pd.read_sql(query, connection)
 
-def store_new_test_result(table_name, test_results, testlist, branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment, connection):
-    params=[str(branch_name),str(node_id),str(gpu_arch),compute_units,str(rocm_vers),str(hip_vers),str(environment),str(datetime.datetime.now())]
-    df=pd.DataFrame(data=[params],columns=['Branch_ID','Node_ID','GPU_arch','Compute Units','ROCM_version','HIP_version','Environment','Datetime'])
-    df_add=pd.DataFrame(data=[test_results],columns=testlist)
-    df=pd.concat([df,df_add],axis=1)
-    #print("new test results dataframe:",df)
-    df.to_sql(table_name,connection,if_exists='append',index=False)
+
+def store_new_test_result(
+    table_name,
+    test_results,
+    testlist,
+    branch_name,
+    node_id,
+    gpu_arch,
+    compute_units,
+    rocm_vers,
+    hip_vers,
+    environment,
+    connection,
+):
+    params = [
+        str(branch_name),
+        str(node_id),
+        str(gpu_arch),
+        compute_units,
+        str(rocm_vers),
+        str(hip_vers),
+        str(environment),
+        str(datetime.datetime.now()),
+    ]
+    df = pd.DataFrame(
+        data=[params],
+        columns=[
+            "Branch_ID",
+            "Node_ID",
+            "GPU_arch",
+            "Compute Units",
+            "ROCM_version",
+            "HIP_version",
+            "Environment",
+            "Datetime",
+        ],
+    )
+    df_add = pd.DataFrame(data=[test_results], columns=testlist)
+    df = pd.concat([df, df_add], axis=1)
+    # print("new test results dataframe:",df)
+    df.to_sql(table_name, connection, if_exists="append", index=False)
     return 0
 
-def compare_test_to_baseline(baseline,test,testlist):
-    regression=0
+
+def compare_test_to_baseline(baseline, test, testlist):
+    regression = 0
     if not baseline.empty:
-        base=baseline[testlist].to_numpy(dtype='float')
-        base_list=base[0]
-        ave_perf=0
+        base = baseline[testlist].to_numpy(dtype="float")
+        base_list = base[0]
+        ave_perf = 0
         for i in range(len(base_list)):
             # success criterion:
-            if base_list[i]>1.01*float(test[i]):
-                print("test # ",i,"shows regression by {:.3f}%".format(
-                    (float(test[i])-base_list[i])/base_list[i]*100))
-                regression=1
-            if base_list[i]>0: ave_perf=ave_perf+float(test[i])/base_list[i]
-        if regression==0:
+            if base_list[i] > 1.01 * float(test[i]):
+                print(
+                    "test # ",
+                    i,
+                    "shows regression by {:.3f}%".format(
+                        (float(test[i]) - base_list[i]) / base_list[i] * 100
+                    ),
+                )
+                regression = 1
+            if base_list[i] > 0:
+                ave_perf = ave_perf + float(test[i]) / base_list[i]
+        if regression == 0:
             print("no regressions found")
-        ave_perf=ave_perf/len(base_list)
-        print("average performance relative to baseline:",ave_perf)
+        ave_perf = ave_perf / len(base_list)
+        print("average performance relative to baseline:", ave_perf)
     else:
         print("could not find a baseline")
     return regression
 
-'''
+
+"""
 def post_test_params(tlist,connection):
     sorted_dtypes = [x for _,x in sorted(zip(tests,dtype))]
     sorted_alayout = [x for _,x in sorted(zip(tests,alayout))]
@@ -223,29 +297,38 @@ def post_test_params(tlist,connection):
         'StrideC': Integer()
         }
     df.to_sql("ck_gemm_test_params",connection,if_exists='replace',index=False, dtype=dtypes)
-'''
+"""
+
 
 def main():
     args = parse_args()
-    results=[]
-    tflops_base=[]
-    testlist=[]
-    #parse the test parameters from the logfile
+    results = []
+    tflops_base = []
+    testlist = []
+    # parse the test parameters from the logfile
     for filename in args.files:
-        branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment = get_log_params(filename)
+        (
+            branch_name,
+            node_id,
+            gpu_arch,
+            compute_units,
+            rocm_vers,
+            hip_vers,
+            environment,
+        ) = get_log_params(filename)
 
-    print("Branch name:",branch_name)
-    print("Node name:",node_id)
-    print("GPU_arch:",gpu_arch)
-    print("Compute units:",compute_units)
-    print("ROCM_version:",rocm_vers)
-    print("HIP_version:",hip_vers)
-    print("Environment:",environment)
-    #parse results, get the Tflops value for "Best Perf" kernels
-    results=parse_logfile(filename)
+    print("Branch name:", branch_name)
+    print("Node name:", node_id)
+    print("GPU_arch:", gpu_arch)
+    print("Compute units:", compute_units)
+    print("ROCM_version:", rocm_vers)
+    print("HIP_version:", hip_vers)
+    print("Environment:", environment)
+    # parse results, get the Tflops value for "Best Perf" kernels
+    results = parse_logfile(filename)
 
-    print("Number of tests:",len(results))
-    sql_hostname = '127.0.0.1'
+    print("Number of tests:", len(results))
+    sql_hostname = "127.0.0.1"
     sql_username = os.environ["dbuser"]
     sql_password = os.environ["dbpassword"]
     sql_main_database = os.environ["ck_perf_db"]
@@ -256,127 +339,147 @@ def main():
     ssh_pass = os.environ["dbsshpassword"]
 
     with SSHTunnelForwarder(
-            (ssh_host, ssh_port),
-            ssh_username=ssh_user,
-            ssh_password=ssh_pass,
-            remote_bind_address=(sql_hostname, sql_port)) as tunnel:
-
-        sqlEngine = sqlalchemy.create_engine('mysql+pymysql://{0}:{1}@{2}:{3}/{4}'.
-            format(sql_username, sql_password, sql_hostname, tunnel.local_bind_port, sql_main_database))
+        (ssh_host, ssh_port),
+        ssh_username=ssh_user,
+        ssh_password=ssh_pass,
+        remote_bind_address=(sql_hostname, sql_port),
+    ) as tunnel:
+        sqlEngine = sqlalchemy.create_engine(
+            "mysql+pymysql://{0}:{1}@{2}:{3}/{4}".format(
+                sql_username,
+                sql_password,
+                sql_hostname,
+                tunnel.local_bind_port,
+                sql_main_database,
+            )
+        )
         conn = sqlEngine.connect()
 
-        #save gemm performance tests:
-        if 'perf_gemm' in filename and 'gemm_bilinear' not in filename:
-            #write the ck_gemm_test_params table only needed once the test set changes
-            #post_test_params(test_list,conn)
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_gemm_tflops"
-        if 'batched_gemm' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_batched_gemm_tflops"
-        if 'grouped_gemm' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_grouped_gemm_tflops"
-        if 'perf_conv_fwd' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_conv_fwd_tflops"
-        if 'perf_conv_bwd_data' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_conv_bwd_data_tflops"
-        if 'grouped_conv_fwd' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_grouped_conv_fwd_tflops"
-        if 'grouped_conv_bwd_data' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_grouped_conv_bwd_data_tflops"
-        if 'grouped_conv_bwd_weight' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_grouped_conv_bwd_weight_tflops"
-        if 'gemm_bilinear' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_gemm_bilinear_tflops"
-        if 'reduction' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_reduction_GBps"
-        if 'resnet50_N4' in filename:
-            for i in range(1,50):
-                testlist.append("Layer%i"%i)
-            table_name="ck_resnet50_N4_tflops"
-        if 'resnet50_N256' in filename:
-            for i in range(1,50):
-                testlist.append("Layer%i"%i)
-            table_name="ck_resnet50_N256_tflops"
-        if 'onnx_gemm' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_onnx_gemm_tflops"
-        if 'splitK_gemm' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_splitK_gemm_tflops"
-        if 'mixed_gemm' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_mixed_gemm_tflops"
-        if 'fmha_fwd' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_fmha_fwd_tflops"
-        if 'fmha_bwd' in filename:
-            for i in range(1,len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_fmha_bwd_tflops"
-        if 'gemm_basic_fp16' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_basic_fp16_tflops"
-        if 'gemm_mem_pipeline_fp16' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_mem_pipeline_fp16_tflops"
-        if 'gemm_basic_bf16' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_basic_bf16_tflops"
-        if 'gemm_mem_pipeline_bf16' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_mem_pipeline_bf16_tflops"
-        if 'gemm_basic_fp8' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_basic_fp8_tflops"
-        if 'gemm_mem_pipeline_fp8' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_mem_pipeline_fp8_tflops"
-        if 'gemm_basic_bf8' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_basic_bf8_tflops"
-        if 'gemm_mem_pipeline_bf8' in filename:
-            for i in range(1, len(results)+1):
-                testlist.append("Test%i"%i)
-            table_name="ck_tile_gemm_mem_pipeline_bf8_tflops"
+        # save gemm performance tests:
+        if "perf_gemm" in filename and "gemm_bilinear" not in filename:
+            # write the ck_gemm_test_params table only needed once the test set changes
+            # post_test_params(test_list,conn)
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_gemm_tflops"
+        if "batched_gemm" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_batched_gemm_tflops"
+        if "grouped_gemm" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_grouped_gemm_tflops"
+        if "perf_conv_fwd" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_conv_fwd_tflops"
+        if "perf_conv_bwd_data" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_conv_bwd_data_tflops"
+        if "grouped_conv_fwd" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_grouped_conv_fwd_tflops"
+        if "grouped_conv_bwd_data" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_grouped_conv_bwd_data_tflops"
+        if "grouped_conv_bwd_weight" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_grouped_conv_bwd_weight_tflops"
+        if "gemm_bilinear" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_gemm_bilinear_tflops"
+        if "reduction" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_reduction_GBps"
+        if "resnet50_N4" in filename:
+            for i in range(1, 50):
+                testlist.append("Layer%i" % i)
+            table_name = "ck_resnet50_N4_tflops"
+        if "resnet50_N256" in filename:
+            for i in range(1, 50):
+                testlist.append("Layer%i" % i)
+            table_name = "ck_resnet50_N256_tflops"
+        if "onnx_gemm" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_onnx_gemm_tflops"
+        if "splitK_gemm" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_splitK_gemm_tflops"
+        if "mixed_gemm" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_mixed_gemm_tflops"
+        if "fmha_fwd" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_fmha_fwd_tflops"
+        if "fmha_bwd" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_fmha_bwd_tflops"
+        if "gemm_basic_fp16" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_basic_fp16_tflops"
+        if "gemm_mem_pipeline_fp16" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_mem_pipeline_fp16_tflops"
+        if "gemm_basic_bf16" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_basic_bf16_tflops"
+        if "gemm_mem_pipeline_bf16" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_mem_pipeline_bf16_tflops"
+        if "gemm_basic_fp8" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_basic_fp8_tflops"
+        if "gemm_mem_pipeline_fp8" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_mem_pipeline_fp8_tflops"
+        if "gemm_basic_bf8" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_basic_bf8_tflops"
+        if "gemm_mem_pipeline_bf8" in filename:
+            for i in range(1, len(results) + 1):
+                testlist.append("Test%i" % i)
+            table_name = "ck_tile_gemm_mem_pipeline_bf8_tflops"
 
-        tflops_base = get_baseline(table_name,conn)
-        store_new_test_result(table_name, results, testlist, branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment, sqlEngine)
+        tflops_base = get_baseline(table_name, conn)
+        store_new_test_result(
+            table_name,
+            results,
+            testlist,
+            branch_name,
+            node_id,
+            gpu_arch,
+            compute_units,
+            rocm_vers,
+            hip_vers,
+            environment,
+            sqlEngine,
+        )
         conn.close()
 
-    #compare the results to the baseline if baseline exists
-    regression=0
-    regression=compare_test_to_baseline(tflops_base,results,testlist)
+    # compare the results to the baseline if baseline exists
+    regression = 0
+    regression = compare_test_to_baseline(tflops_base, results, testlist)
     return regression
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/script/remod_for_ck_tile.sh b/script/remod_for_ck_tile.sh
index b017d2e1d6..7b99ec60bd 100755
--- a/script/remod_for_ck_tile.sh
+++ b/script/remod_for_ck_tile.sh
@@ -2,18 +2,6 @@
 # Copyright © Advanced Micro Devices, Inc., or its affiliates.
 # SPDX-License-Identifier: MIT
 
-# Get list of staged files
-STAGED_FILES=$(git diff --cached --name-only)
-
-# Check if any staged file is under include/ck_tile/ or example/ck_tile/
-if echo "$STAGED_FILES" | grep -qE '^(include/ck_tile/|example/ck_tile/)'; then
-    echo "Detected changes in ck_tile-related files. Running remod.py..."
-
-    # Run remod.py in both required locations
-    (cd include/ck_tile/ && python3 remod.py)
-    (cd example/ck_tile/ && python3 remod.py)
-
-    echo "remod.py completed."
-else
-    echo "No changes in ck_tile-related files. Skipping remod.py."
-fi
+# Run remod.py in both required locations
+(cd include/ck_tile/ && python3 remod.py)
+(cd example/ck_tile/ && python3 remod.py)
diff --git a/script/run_ck_profiler_gemm_with_csv_shapes.py b/script/run_ck_profiler_gemm_with_csv_shapes.py
index 553d46558e..eb0eb9c920 100644
--- a/script/run_ck_profiler_gemm_with_csv_shapes.py
+++ b/script/run_ck_profiler_gemm_with_csv_shapes.py
@@ -71,7 +71,7 @@ def tuples(filename):
             try:
                 m, n, k = map(int, line)
                 lines.append((m, n, k))
-            except:
+            except Exception:
                 pass
     return lines
 
@@ -163,19 +163,19 @@ def run_shape(shape, profiler_bin, op_name, dtype, layout):
     m, n, k = shape
     try:
         op = OPs[op_name]
-    except:
+    except KeyError:
         raise AssertionError(f"Invalid operator {op_name}")
     name_arg = op.name
     op_wrapper = op.value()
 
     try:
         dtype_arg = str(op_wrapper.dtype[dtype].value)
-    except:
+    except KeyError:
         raise AssertionError(f"Invalid dtype for {op_name}: {dtype}")
 
     try:
         layout_wrapper = op_wrapper.layout[layout]
-    except:
+    except KeyError:
         raise AssertionError(f"Invalid layout for {op_name}: {layout}")
     layout_arg = str(layout_wrapper.value)
     # verification: no, initialization: decimal, print tensor: no, time kernel: yes
@@ -286,7 +286,9 @@ def main():
     try:
         from tqdm import tqdm as iterate
     except ImportError:
-        iterate = lambda x: x
+
+        def iterate(x):
+            return x
 
     for s in iterate(shapes):
         run_shape_stdout_lines = run_shape(
diff --git a/test/ck_tile/layernorm2d/generate.py b/test/ck_tile/layernorm2d/generate.py
index f7446c0148..f387f7ce49 100644
--- a/test/ck_tile/layernorm2d/generate.py
+++ b/test/ck_tile/layernorm2d/generate.py
@@ -6,47 +6,50 @@ import argparse
 from enum import IntEnum
 from pathlib import Path
 import sys
-from typing import List, Optional, Any
+from typing import List, Any
 import functools
 import itertools
 import copy
 from dataclasses import dataclass
 
-def get_if_str(idx, total, lase_else = True):
+
+def get_if_str(idx, total, lase_else=True):
     if idx == 0:
-        return 'if'
+        return "if"
     elif idx < total - 1:
-        return 'else if'
+        return "else if"
     else:
         if lase_else:
-            return 'else'
+            return "else"
         else:
-            return 'else if'
+            return "else if"
 
-XBIAS_ENUM_STR_MAP = [
-    'no',
-    'xbias']      # pre-norm add bias
+
+XBIAS_ENUM_STR_MAP = ["no", "xbias"]  # pre-norm add bias
 
 FUSED_ADD_ENUM_STR_MAP = [
-    'no',
-    'pras',      # pre-norm
-    'pra' ]      # post-norm
+    "no",
+    "pras",  # pre-norm
+    "pra",
+]  # post-norm
 
-FUSED_FUSED_SWEEP_STR_MAP = [
-    'no',
-    'dquant' ]
+FUSED_FUSED_SWEEP_STR_MAP = ["no", "dquant"]
+
+DATA_TYPE_MAP = {
+    "fp32": "float",
+    "fp16": "ck_tile::fp16_t",
+    "bf16": "ck_tile::bf16_t",
+    "int8": "ck_tile::int8_t",
+    "fp8": "ck_tile::fp8_t",
+}
 
-DATA_TYPE_MAP = {'fp32' : 'float',
-                 'fp16' : 'ck_tile::fp16_t',
-                 'bf16' : 'ck_tile::bf16_t',
-                 'int8' : 'ck_tile::int8_t',
-                 'fp8'  : 'ck_tile::fp8_t'}
 
 def BOOL_MAP(b_) -> str:
     if b_:
-        return 'true'
+        return "true"
     else:
-        return 'false'
+        return "false"
+
 
 class layernorm_fwd_codegen:
     API_TRAITS_DEFINE = """
@@ -235,15 +238,15 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t,
 
 """
 
-    API_PER_DTYPE="""    {F_if}(t.prec_i == \"{F_i_type}\" && t.prec_o == \"{F_o_type}\"){{
+    API_PER_DTYPE = """    {F_if}(t.prec_i == \"{F_i_type}\" && t.prec_o == \"{F_o_type}\"){{
 {F_per_n_case}
     }}
 """
-    API_PER_N_CASE="""        {F_if} {F_N_COND} {{
+    API_PER_N_CASE = """        {F_if} {F_N_COND} {{
 {F_inner_dispatch}
         }}
 """
-    API_INNER_CASE="""            {F_if} {F_VEC_COND}
+    API_INNER_CASE = """            {F_if} {F_VEC_COND}
                 r={F_instance_func}(s, a);
 """
 
@@ -280,138 +283,141 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t,
 
     @dataclass
     class k_traits:
-        F_kPadN : bool
-        F_kSaveMeanInvStd : bool
-        F_kTwoPass : bool
-        F_kXbias : Any #: layernorm_fwd_codegen.k_bias_enum
-        F_kFusedAdd : Any #: layernorm_fwd_codegen.k_fuesd_add_enum
-        F_kFusedQuant : Any  #: layernorm_fwd_codegen.k_fused_sweep_enum
+        F_kPadN: bool
+        F_kSaveMeanInvStd: bool
+        F_kTwoPass: bool
+        F_kXbias: Any  #: layernorm_fwd_codegen.k_bias_enum
+        F_kFusedAdd: Any  #: layernorm_fwd_codegen.k_fuesd_add_enum
+        F_kFusedQuant: Any  #: layernorm_fwd_codegen.k_fused_sweep_enum
 
     @dataclass
     class k_shape:
-        F_BlockTile    : List[int]
-        F_WarpPerBlock : List[int]
-        F_WarpTile     : List[int]
-        F_Vector_      : List[int]
+        F_BlockTile: List[int]
+        F_WarpPerBlock: List[int]
+        F_WarpTile: List[int]
+        F_Vector_: List[int]
+
         @property
         def F_BlockSize(self) -> int:
-            return functools.reduce(lambda a, b: a*b, self.F_WarpTile)
+            return functools.reduce(lambda a, b: a * b, self.F_WarpTile)
 
     @dataclass
     class k_problem:
-        F_XDataType       : str
-        F_XBiasDataType   : str
-        F_GammaDataType   : str
-        F_BetaDataType    : str
-        F_ComputeDataType : str
-        F_YDataType       : str
-        F_MeanDataType    : str
-        F_InvStdDataType  : str
-        F_BlockShape      : str
-        F_Traits          : Any #k_traits
+        F_XDataType: str
+        F_XBiasDataType: str
+        F_GammaDataType: str
+        F_BetaDataType: str
+        F_ComputeDataType: str
+        F_YDataType: str
+        F_MeanDataType: str
+        F_InvStdDataType: str
+        F_BlockShape: str
+        F_Traits: Any  # k_traits
 
     @dataclass
     class k_pipeline_one_pass:
-        F_Problem         : Any #k_problem
-    
+        F_Problem: Any  # k_problem
+
     @dataclass
     class k_pipeline_two_pass:
-        F_Problem         : Any #k_problem
+        F_Problem: Any  # k_problem
 
     @dataclass
     class default_2d_epilogue_problem:
-        F_AccDataType : str
-        F_ODataType : str
-        F_kPadM : bool
-        F_kPadN : bool
+        F_AccDataType: str
+        F_ODataType: str
+        F_kPadM: bool
+        F_kPadN: bool
 
     @dataclass
     class default_2d_epilogue:
-        F_problem : Any
+        F_problem: Any
 
     @dataclass
     class k_kernel:
-        F_pipeline : Any
-        F_epilogue : Any
+        F_pipeline: Any
+        F_epilogue: Any
 
     @dataclass
     class h_traits:
-        F_XDataType : str
-        F_YDataType : str
-        F_SmoothScaleDataType : str
-        F_YScaleDataType : str
-        F_Repeat_M : int
-        F_Repeat_N : int
-        F_ThreadPerBlock_M : int
-        F_ThreadPerBlock_N : int
-        F_Vector_N : int
-        F_kPadN : bool
-        F_kSaveMeanInvStd_ : bool
-        F_kFastFDiv_ : bool
-        F_kWelford_ : bool
-        F_kTwoPass_ : bool
-        F_kXbias_ : int
-        F_kFusedAdd : int
-        F_kFusedQuant : int
+        F_XDataType: str
+        F_YDataType: str
+        F_SmoothScaleDataType: str
+        F_YScaleDataType: str
+        F_Repeat_M: int
+        F_Repeat_N: int
+        F_ThreadPerBlock_M: int
+        F_ThreadPerBlock_N: int
+        F_Vector_N: int
+        F_kPadN: bool
+        F_kSaveMeanInvStd_: bool
+        F_kFastFDiv_: bool
+        F_kWelford_: bool
+        F_kTwoPass_: bool
+        F_kXbias_: int
+        F_kFusedAdd: int
+        F_kFusedQuant: int
 
         @property
-        def trait_name(self) ->str:
-            t_ = f'{DATA_TYPE_MAP[self.F_XDataType]}, {DATA_TYPE_MAP[self.F_YDataType]}, {DATA_TYPE_MAP[self.F_SmoothScaleDataType]}, {DATA_TYPE_MAP[self.F_YScaleDataType]}, {self.F_Repeat_M:2}, {self.F_Repeat_N:2}, {self.F_ThreadPerBlock_M:2}, {self.F_ThreadPerBlock_N:4}'
-            t_ += f', {self.F_Vector_N:2}, {BOOL_MAP(self.F_kPadN):5}, {BOOL_MAP(self.F_kSaveMeanInvStd_):5}, {BOOL_MAP(self.F_kFastFDiv_):5}, {BOOL_MAP(self.F_kWelford_):5}'
-            t_ += f', {BOOL_MAP(self.F_kTwoPass_):5}, {self.F_kXbias:4}, {self.F_kFusedAdd:4}, {self.F_kFusedQuant:4}'
+        def trait_name(self) -> str:
+            t_ = f"{DATA_TYPE_MAP[self.F_XDataType]}, {DATA_TYPE_MAP[self.F_YDataType]}, {DATA_TYPE_MAP[self.F_SmoothScaleDataType]}, {DATA_TYPE_MAP[self.F_YScaleDataType]}, {self.F_Repeat_M:2}, {self.F_Repeat_N:2}, {self.F_ThreadPerBlock_M:2}, {self.F_ThreadPerBlock_N:4}"
+            t_ += f", {self.F_Vector_N:2}, {BOOL_MAP(self.F_kPadN):5}, {BOOL_MAP(self.F_kSaveMeanInvStd_):5}, {BOOL_MAP(self.F_kFastFDiv_):5}, {BOOL_MAP(self.F_kWelford_):5}"
+            t_ += f", {BOOL_MAP(self.F_kTwoPass_):5}, {self.F_kXbias:4}, {self.F_kFusedAdd:4}, {self.F_kFusedQuant:4}"
             return t_
 
         # string when calling this kernel
         @property
         def call_name(self) -> str:
-            return f'layernorm2d_fwd_<traits_<{self.trait_name}>>'
+            return f"layernorm2d_fwd_<traits_<{self.trait_name}>>"
 
         # string when define this kernel
         @property
         def def_name(self) -> str:
-            return f'template float layernorm2d_fwd_<traits_<{self.trait_name}>>(const S&, A);'
+            return f"template float layernorm2d_fwd_<traits_<{self.trait_name}>>(const S&, A);"
 
     # this class hold kernel under same source file
     @dataclass
     class h_instance:
-        F_DataTypePair : str
-        F_N : str
-        F_xbias : int
-        F_add : int
-        F_sweep : int
-        instance_list : List[Any] # List[h_traits]
+        F_DataTypePair: str
+        F_N: str
+        F_xbias: int
+        F_add: int
+        F_sweep: int
+        instance_list: List[Any]  # List[h_traits]
 
         @property
         def name(self) -> str:
-            prec_i, prec_o = self.F_DataTypePair.split(',')
-            dtype_str = f'{prec_i}' if prec_i == prec_o else f'{prec_i}_{prec_o}'
-            nnn = f'layernorm2d_fwd_{dtype_str}_n{self.F_N}'
+            prec_i, prec_o = self.F_DataTypePair.split(",")
+            dtype_str = f"{prec_i}" if prec_i == prec_o else f"{prec_i}_{prec_o}"
+            nnn = f"layernorm2d_fwd_{dtype_str}_n{self.F_N}"
             if self.F_xbias != 0:
-                nnn = nnn + '_' + XBIAS_ENUM_STR_MAP[self.F_xbias] 
+                nnn = nnn + "_" + XBIAS_ENUM_STR_MAP[self.F_xbias]
             if self.F_add != 0:
-                nnn = nnn + '_' + FUSED_ADD_ENUM_STR_MAP[self.F_add]
+                nnn = nnn + "_" + FUSED_ADD_ENUM_STR_MAP[self.F_add]
             if self.F_sweep != 0:
-                nnn = nnn + '_' + FUSED_FUSED_SWEEP_STR_MAP[self.F_sweep]
+                nnn = nnn + "_" + FUSED_FUSED_SWEEP_STR_MAP[self.F_sweep]
             return nnn
 
         @property
-        def instance_name(self) ->str:
+        def instance_name(self) -> str:
             return self.name
 
         @property
-        def content(self) ->str:
-            instance_defs = ''
+        def content(self) -> str:
+            instance_defs = ""
             for ins in self.instance_list:
-                instance_defs += ins.def_name + '\n'
-            return layernorm_fwd_codegen.INSTANCE_BASE.format(F_instance_def=instance_defs)
+                instance_defs += ins.def_name + "\n"
+            return layernorm_fwd_codegen.INSTANCE_BASE.format(
+                F_instance_def=instance_defs
+            )
 
     @property
     def name_api(self) -> str:
-        return 'layernorm2d_fwd_api'
+        return "layernorm2d_fwd_api"
 
     @property
     def name_common_header(self) -> str:
-        return 'layernorm2d_fwd_api_common'
+        return "layernorm2d_fwd_api_common"
 
     def content_api(self, args) -> str:
         # 1 sort based on dtype
@@ -424,40 +430,64 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t,
                 t_dtype_dict[blob.F_DataTypePair][blob.F_N] = []
             t_dtype_dict[blob.F_DataTypePair][blob.F_N].append(blob)
 
-        d_str = ''
+        d_str = ""
         for i_d, dtype_ in enumerate(t_dtype_dict):
             blob_per_t = t_dtype_dict[dtype_]
-            n_str = ''
+            n_str = ""
             for i_n, n_ in enumerate(blob_per_t):
                 blob_per_n = blob_per_t[n_]
                 inner_str = ""
                 for i_b, b_ in enumerate(blob_per_n):
                     # generate single kernel instance file
-                    #vec_str = ""
+                    # vec_str = ""
                     for i_ins, ins in enumerate(b_.instance_list):
                         idx_in_n = i_b * len(b_.instance_list) + i_ins
                         len_in_n = len(blob_per_n) * len(b_.instance_list)
                         # _if = 'if' if i_ins == 0 else 'else if'
                         if ins.F_kFusedQuant == 0:
-                            _sweep_cond = 't.fused_quant == {f_fused_sweep}'.format(f_fused_sweep = ins.F_kFusedQuant)
+                            _sweep_cond = "t.fused_quant == {f_fused_sweep}".format(
+                                f_fused_sweep=ins.F_kFusedQuant
+                            )
                         elif ins.F_kFusedQuant == 1:
-                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sm == \"{f_sx_type}\" && t.prec_sy == \"{f_sy_type}\")'.format(
-                                f_fused_sweep = ins.F_kFusedQuant, f_sx_type=ins.F_SmoothScaleDataType, f_sy_type=ins.F_YScaleDataType)
+                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sm == "{f_sx_type}" && t.prec_sy == "{f_sy_type}")'.format(
+                                f_fused_sweep=ins.F_kFusedQuant,
+                                f_sx_type=ins.F_SmoothScaleDataType,
+                                f_sy_type=ins.F_YScaleDataType,
+                            )
                         elif ins.F_kFusedQuant == 2:
-                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sy == \"{f_sy_type}\")'.format(
-                                f_fused_sweep = ins.F_kFusedQuant, f_sy_type=ins.F_YScaleDataType)
-                        _cond = '((a.n % {f_vec_n} == 0) && (t.xbias == {f_xbias}) && (t.fused_add == {f_fused_add}) && ({f_sweep_cond}))'.format(
-                                        f_vec_n = ins.F_Vector_N, f_xbias = ins.F_kXbias, f_fused_add = ins.F_kFusedAdd,
-                                        f_sweep_cond = _sweep_cond)
-                        inner_str += self.API_INNER_CASE.format(F_if = get_if_str(idx_in_n, len_in_n, False),
-                                            F_VEC_COND = _cond, F_instance_func=ins.call_name)
-                    #inner_str = inner_str + vec_str
-                n_cnd = f'(a.n <= {n_})' if isinstance(n_, int) else ''
-                n_str += self.API_PER_N_CASE.format(F_if = get_if_str(i_n, len(blob_per_t), not isinstance(n_, int)), F_N_COND=n_cnd, F_inner_dispatch=inner_str)
-            prec_i, prec_o = dtype_.split(',')
-            d_str += self.API_PER_DTYPE.format(F_if = get_if_str(i_d, len(t_dtype_dict), False), F_i_type=prec_i, F_o_type=prec_o, F_per_n_case=n_str)
+                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sy == "{f_sy_type}")'.format(
+                                f_fused_sweep=ins.F_kFusedQuant,
+                                f_sy_type=ins.F_YScaleDataType,
+                            )
+                        _cond = "((a.n % {f_vec_n} == 0) && (t.xbias == {f_xbias}) && (t.fused_add == {f_fused_add}) && ({f_sweep_cond}))".format(
+                            f_vec_n=ins.F_Vector_N,
+                            f_xbias=ins.F_kXbias,
+                            f_fused_add=ins.F_kFusedAdd,
+                            f_sweep_cond=_sweep_cond,
+                        )
+                        inner_str += self.API_INNER_CASE.format(
+                            F_if=get_if_str(idx_in_n, len_in_n, False),
+                            F_VEC_COND=_cond,
+                            F_instance_func=ins.call_name,
+                        )
+                    # inner_str = inner_str + vec_str
+                n_cnd = f"(a.n <= {n_})" if isinstance(n_, int) else ""
+                n_str += self.API_PER_N_CASE.format(
+                    F_if=get_if_str(i_n, len(blob_per_t), not isinstance(n_, int)),
+                    F_N_COND=n_cnd,
+                    F_inner_dispatch=inner_str,
+                )
+            prec_i, prec_o = dtype_.split(",")
+            d_str += self.API_PER_DTYPE.format(
+                F_if=get_if_str(i_d, len(t_dtype_dict), False),
+                F_i_type=prec_i,
+                F_o_type=prec_o,
+                F_per_n_case=n_str,
+            )
 
-        api_base = self.API_BASE.format(F_traits_define=self.API_TRAITS_DEFINE, F_dispatch=d_str)
+        api_base = self.API_BASE.format(
+            F_traits_define=self.API_TRAITS_DEFINE, F_dispatch=d_str
+        )
         return api_base
 
     @property
@@ -468,83 +498,982 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t,
         h_traits = layernorm_fwd_codegen.h_traits
         h_instance = layernorm_fwd_codegen.h_instance
 
-        dynamic_quant_out_dtype = ['int8', 'fp8']
+        dynamic_quant_out_dtype = ["int8", "fp8"]
         # some predefined support range
         # (prec_i,prec_o) for simplicity this string will be used as key for dict
-        scale_list = [('fp32,fp32')]
-        dtype_list = [('fp16,fp16'), ('bf16,bf16'),
-                        ('fp16,int8'), ('bf16,int8'),
-                        ('fp16,fp8'), ('bf16,fp8')] # NOTE: only fused-dynamic-quant use int8 or fp8 out
-        types_8bit = ('int8', 'fp8')
-        types_16bit = ('int16', 'fp16', 'bf16')
-        #fused_add_list = [0, 1, 2]
-        #fused_sweep_list = [0, 1, 2] # NOTE: only single pass can use fused dynamic quant
+        scale_list = [("fp32,fp32")]
+        dtype_list = [
+            ("fp16,fp16"),
+            ("bf16,bf16"),
+            ("fp16,int8"),
+            ("bf16,int8"),
+            ("fp16,fp8"),
+            ("bf16,fp8"),
+        ]  # NOTE: only fused-dynamic-quant use int8 or fp8 out
+        types_8bit = ("int8", "fp8")
+        types_16bit = ("int16", "fp16", "bf16")
+        # fused_add_list = [0, 1, 2]
+        # fused_sweep_list = [0, 1, 2] # NOTE: only single pass can use fused dynamic quant
         xbias_list = [0, 1]
         fused_add_list = [0, 1]
-        fused_sweep_list = [0, 1] # NOTE: only single pass can use fused dynamic quant
+        fused_sweep_list = [0, 1]  # NOTE: only single pass can use fused dynamic quant
         #                                                       rm  rn  tm   tn  vn  pd     mv     fdiv  welford   2p     xbias    add   sweep
-        h_trait_dict = {'64'  : [ h_traits('x', 'y', 'xs', 'ys', 1,  1,  8,  8,  8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  1,  4,  16, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  1,  4,  64, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '128' : [ h_traits('x', 'y', 'xs', 'ys', 1,  1,  4,  16, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  1,  4,  64, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  2,  4,  64, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '256' : [ h_traits('x', 'y', 'xs', 'ys', 1,  1,  4,  64, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  2,  4,  64, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  4,  64, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '512' : [ h_traits('x', 'y', 'xs', 'ys', 1,  1,  4,  64, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  2,  4,  64, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  4,  64, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  8,  4,  64, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '768' : [ h_traits('x', 'y', 'xs', 'ys', 1,  3,  4,  64, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  6,  4,  64, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1, 12,  4,  64, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '1024' :[ h_traits('x', 'y', 'xs', 'ys', 1,  1,  2, 128, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  2,  2, 128, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  2, 128, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1, 256, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '1536' :[ h_traits('x', 'y', 'xs', 'ys', 1,  3,  4,  64, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  3,  2, 128, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  3,  1, 256, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  6,  1, 256, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '2048' :[ h_traits('x', 'y', 'xs', 'ys', 1,  1,  1, 256, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  2,  1, 256, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1, 256, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  8,  1, 256, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '3072' :[ h_traits('x', 'y', 'xs', 'ys', 1,  3,  1, 128, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  3,  1, 256, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  6,  1, 256, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  3,  1,1024, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '4096' :[ h_traits('x', 'y', 'xs', 'ys', 1,  2,  1, 256, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1, 256, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  2,  1,1024, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1,1024, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '6144' :[ h_traits('x', 'y', 'xs', 'ys', 1,  3,  1, 256, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  3,  1, 512, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  3,  1,1024, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  6,  1,1024, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        '8192' :[ h_traits('x', 'y', 'xs', 'ys', 1,  4,  1, 256, 8,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1, 512, 4,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1,1024, 2,  True,  False, True, True,   False,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  8,  1,1024, 1,  True,  False, True, True,   False,   0,    0,    0)],
-                        'big'  :[ h_traits('x', 'y', 'xs', 'ys', 1,  1,  1,1024, 8,  True,  False, True, True,    True,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1, 256, 4,  True,  False, True, True,    True,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1, 12,  1, 256, 2,  True,  False, True, True,    True,   0,    0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 1,  4,  1,1024, 1,  True,  False, True, True,    True,   0,    0,    0)]}
+        h_trait_dict = {
+            "64": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    8,
+                    8,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    4,
+                    16,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "128": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    4,
+                    16,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    4,
+                    64,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    2,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "256": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    4,
+                    64,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    2,
+                    4,
+                    64,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "512": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    4,
+                    64,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    2,
+                    4,
+                    64,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    4,
+                    64,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    8,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "768": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    4,
+                    64,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    6,
+                    4,
+                    64,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    12,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "1024": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    2,
+                    128,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    2,
+                    2,
+                    128,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    2,
+                    128,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    256,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "1536": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    4,
+                    64,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    2,
+                    128,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    1,
+                    256,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    6,
+                    1,
+                    256,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "2048": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    1,
+                    256,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    2,
+                    1,
+                    256,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    256,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    8,
+                    1,
+                    256,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "3072": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    1,
+                    128,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    1,
+                    256,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    6,
+                    1,
+                    256,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "4096": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    2,
+                    1,
+                    256,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    256,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    2,
+                    1,
+                    1024,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "6144": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    1,
+                    256,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    1,
+                    512,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    3,
+                    1,
+                    1024,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    6,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "8192": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    256,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    512,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    1024,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    8,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    False,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+            "big": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    1,
+                    1,
+                    1024,
+                    8,
+                    True,
+                    False,
+                    True,
+                    True,
+                    True,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    256,
+                    4,
+                    True,
+                    False,
+                    True,
+                    True,
+                    True,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    12,
+                    1,
+                    256,
+                    2,
+                    True,
+                    False,
+                    True,
+                    True,
+                    True,
+                    0,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    1,
+                    4,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    True,
+                    True,
+                    True,
+                    0,
+                    0,
+                    0,
+                ),
+            ],
+        }
         total_blob = list()
         for hs_key in h_trait_dict:
             hs = h_trait_dict[hs_key]
             current_n = hs[0].F_Repeat_N * hs[0].F_ThreadPerBlock_N * hs[0].F_Vector_N
-            for dtype, scale_type, xbias, fused_add, fused_quant in itertools.product(dtype_list, scale_list, xbias_list, fused_add_list, fused_sweep_list):
-                prec_i, prec_o = dtype.split(',')
-                scale_sm, scale_y = scale_type.split(',')
+            for dtype, scale_type, xbias, fused_add, fused_quant in itertools.product(
+                dtype_list, scale_list, xbias_list, fused_add_list, fused_sweep_list
+            ):
+                prec_i, prec_o = dtype.split(",")
+                scale_sm, scale_y = scale_type.split(",")
                 if prec_o in dynamic_quant_out_dtype and fused_quant != 1:
-                    continue # skip non dynamic quant case
-                if fused_quant == 1 and hs_key == 'big':
+                    continue  # skip non dynamic quant case
+                if fused_quant == 1 and hs_key == "big":
                     continue
                 current_hs = list()
                 for chs_ in hs:
-                    h_ = copy.copy(chs_) # copy the base instance out
+                    h_ = copy.copy(chs_)  # copy the base instance out
                     h_.F_XDataType = prec_i
                     h_.F_YDataType = prec_o
                     h_.F_SmoothScaleDataType = scale_sm
@@ -554,29 +1483,33 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t,
                     h_.F_kFusedQuant = fused_quant
                     # disable welford update for 8bit and 16 bit smallN
                     if not h_.F_kTwoPass_:
-                        #disable 16 bit when set args disable_16b_welford
+                        # disable 16 bit when set args disable_16b_welford
                         if args.disable_16b_welford and prec_i in types_16bit:
                             h_.F_kWelford_ = False
-                        #disable 8bit by default
+                        # disable 8bit by default
                         elif prec_i in types_8bit or prec_o in types_8bit:
                             h_.F_kWelford_ = False
-                        #disable 16bit small N
-                        elif prec_i in types_16bit and hs_key == '64':
+                        # disable 16bit small N
+                        elif prec_i in types_16bit and hs_key == "64":
                             h_.F_kWelford_ = False
-                    current_hs.append(h_) # + "\n"
-                #f.write(str(f.parent / GEN_DIR / (blobs.api_common_header_
-                current_n_str = 'big' if hs_key == 'big' else current_n
-                total_blob.append(h_instance(dtype, current_n_str, xbias, fused_add, fused_quant, current_hs))
+                    current_hs.append(h_)  # + "\n"
+                # f.write(str(f.parent / GEN_DIR / (blobs.api_common_header_
+                current_n_str = "big" if hs_key == "big" else current_n
+                total_blob.append(
+                    h_instance(
+                        dtype, current_n_str, xbias, fused_add, fused_quant, current_hs
+                    )
+                )
         return total_blob
 
     def list_blobs(self, args) -> None:
         w_p = Path(self.working_path)
-        list_p = w_p / 'layernorm2d_fwd_blobs.txt'
+        list_p = w_p / "layernorm2d_fwd_blobs.txt"
         blobs = self.get_blobs(args)
-        with list_p.open('w') as list_f:
+        with list_p.open("w") as list_f:
             # api related file
-            list_f.write(str(w_p / (self.name_api + ".cpp"))  + "\n")
-            list_f.write(str(w_p / (self.name_common_header + ".hpp"))  + "\n")
+            list_f.write(str(w_p / (self.name_api + ".cpp")) + "\n")
+            list_f.write(str(w_p / (self.name_common_header + ".hpp")) + "\n")
             # kernel instance file
             for b in blobs:
                 list_f.write(str(w_p / (b.name + ".cpp")) + "\n")
@@ -585,24 +1518,28 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t,
         w_p = Path(self.working_path)
         w_str = self.content_api(args)
         (w_p / (self.name_api + ".cpp")).write_text(w_str)
-        (w_p / (self.name_common_header + ".hpp")).write_text(self.content_common_header)
+        (w_p / (self.name_common_header + ".hpp")).write_text(
+            self.content_common_header
+        )
         blobs = self.get_blobs(args)
         for b in blobs:
             (w_p / (b.name + ".cpp")).write_text(b.content)
 
+
 def list_blobs(args):
-    api_list = args.api.split(',')
+    api_list = args.api.split(",")
     for api in api_list:
-        if api == 'fwd':
+        if api == "fwd":
             layernorm_fwd_codegen(args.working_path, args.filter).list_blobs(args)
 
 
 def gen_blobs(args):
-    api_list = args.api.split(',')
+    api_list = args.api.split(",")
     for api in api_list:
-        if api == 'fwd':
+        if api == "fwd":
             layernorm_fwd_codegen(args.working_path, args.filter).gen_blobs(args)
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         prog="generate",
@@ -611,9 +1548,9 @@ if __name__ == "__main__":
     parser.add_argument(
         "-a",
         "--api",
-        default='fwd[all]',
+        default="fwd[all]",
         required=False,
-        help="supply API(s) to generate (default: fwd). separated by comma."
+        help="supply API(s) to generate (default: fwd). separated by comma.",
     )
 
     # the directory for list_blobs/gen_blobs to write files into
@@ -622,7 +1559,7 @@ if __name__ == "__main__":
         "--working_path",
         default="./",
         required=False,
-        help="the path where all the blobs are going to be generated"
+        help="the path where all the blobs are going to be generated",
     )
 
     # this script have 2 modes
@@ -634,15 +1571,15 @@ if __name__ == "__main__":
     parser.add_argument(
         "-l",
         "--list_blobs",
-        action='store_true',
-        help="list all the kernels to a file, "
+        action="store_true",
+        help="list all the kernels to a file, ",
     )
 
     parser.add_argument(
         "-g",
         "--gen_blobs",
-        action='store_true',
-        help="generate all kernels into different tile"
+        action="store_true",
+        help="generate all kernels into different tile",
     )
 
     # TODO: if using filter, must apply same value to output_dir and list_blobs
@@ -650,7 +1587,7 @@ if __name__ == "__main__":
         "-f",
         "--filter",
         required=False,
-        help="filter out kernels that need to generate, using fnmatch module"
+        help="filter out kernels that need to generate, using fnmatch module",
     )
 
     parser.add_argument(
@@ -658,29 +1595,27 @@ if __name__ == "__main__":
         "--traits",
         default="all",
         required=False,
-        help="enable/disable some feature. default generate all"
+        help="enable/disable some feature. default generate all",
     )
 
     parser.add_argument(
-        "-r",
-        "--receipt",
-        default=0,
-        required=False,
-        help="codegen receipt."
+        "-r", "--receipt", default=0, required=False, help="codegen receipt."
     )
 
     parser.add_argument(
         "--disable_16b_welford",
         default=False,
         required=False,
-        help="enable/disable welford for 16bit datatype n > 64"
+        help="enable/disable welford for 16bit datatype n > 64",
     )
 
     args = parser.parse_args()
 
     # print(f'{args.list_blobs}-{args.gen_blobs}')
-    if (args.gen_blobs and args.list_blobs) or ((not args.gen_blobs) and (not args.list_blobs)):
-        print('gen_blobs/list_blobs must specify only one option')
+    if (args.gen_blobs and args.list_blobs) or (
+        (not args.gen_blobs) and (not args.list_blobs)
+    ):
+        print("gen_blobs/list_blobs must specify only one option")
         sys.exit()
 
     p = Path(args.working_path)
diff --git a/test/ck_tile/pooling/test_pooling.cpp b/test/ck_tile/pooling/test_pooling.cpp
index 3cec19d2d6..fa98687bda 100644
--- a/test/ck_tile/pooling/test_pooling.cpp
+++ b/test/ck_tile/pooling/test_pooling.cpp
@@ -9,7 +9,7 @@
 
 #include "ck_tile/core.hpp"
 #include "ck_tile/host.hpp"
-#include "ck_tile/ops/pool.hpp"
+#include "ck_tile/ops/pooling.hpp"
 #include "ck_tile/host/reference/reference_pool.hpp"
 #include "ck_tile/host/kernel_launch.hpp"
 
diff --git a/test/ck_tile/rmsnorm2d/generate.py b/test/ck_tile/rmsnorm2d/generate.py
index 3bcc427e83..728e532c81 100644
--- a/test/ck_tile/rmsnorm2d/generate.py
+++ b/test/ck_tile/rmsnorm2d/generate.py
@@ -6,45 +6,51 @@ import argparse
 from enum import IntEnum
 from pathlib import Path
 import sys
-from typing import List, Optional, Any
+from typing import List, Any
 import functools
 import itertools
 import copy
 from dataclasses import dataclass
 
 
-def get_if_str(idx, total, lase_else = True):
+def get_if_str(idx, total, lase_else=True):
     if idx == 0:
-        return 'if'
+        return "if"
     elif idx < total - 1:
-        return 'else if'
+        return "else if"
     else:
         if lase_else:
-            return 'else'
+            return "else"
         else:
-            return 'else if'
+            return "else if"
+
 
 FUSED_ADD_ENUM_STR_MAP = [
-    'no',
-    'pras',      # pre-norm
-    'pra' ]      # post-norm
+    "no",
+    "pras",  # pre-norm
+    "pra",
+]  # post-norm
 
 FUSED_FUSED_SWEEP_STR_MAP = [
-    'no',
-    'sdquant',  # smooth dynamic quant
-    'dquant' ]  # dynamic quant (without sm_scale)
+    "no",
+    "sdquant",  # smooth dynamic quant
+    "dquant",
+]  # dynamic quant (without sm_scale)
+
+DATA_TYPE_MAP = {
+    "fp32": "float",
+    "fp16": "ck_tile::fp16_t",
+    "bf16": "ck_tile::bf16_t",
+    "int8": "ck_tile::int8_t",
+    "fp8": "ck_tile::fp8_t",
+}
 
-DATA_TYPE_MAP = {'fp32' : 'float',
-                 'fp16' : 'ck_tile::fp16_t',
-                 'bf16' : 'ck_tile::bf16_t',
-                 'int8' : 'ck_tile::int8_t',
-                 'fp8'  : 'ck_tile::fp8_t'}
 
 def BOOL_MAP(b_) -> str:
     if b_:
-        return 'true'
+        return "true"
     else:
-        return 'false'
+        return "false"
 
 
 class rmsnorm_fwd_codegen:
@@ -282,133 +288,136 @@ float rmsnorm2d_fwd(rmsnorm2d_fwd_traits t,
 
     @dataclass
     class k_traits:
-        F_kPadN : bool
-        F_kSaveMeanInvStd : bool
-        F_kTwoPass : bool
-        F_kFusedAdd : Any
-        F_kFusedQuant : Any
+        F_kPadN: bool
+        F_kSaveMeanInvStd: bool
+        F_kTwoPass: bool
+        F_kFusedAdd: Any
+        F_kFusedQuant: Any
 
     @dataclass
     class k_shape:
-        F_BlockTile    : List[int]
-        F_WarpPerBlock : List[int]
-        F_WarpTile     : List[int]
-        F_Vector_      : List[int]
+        F_BlockTile: List[int]
+        F_WarpPerBlock: List[int]
+        F_WarpTile: List[int]
+        F_Vector_: List[int]
+
         @property
         def F_BlockSize(self) -> int:
-            return functools.reduce(lambda a, b: a*b, self.F_WarpTile)
+            return functools.reduce(lambda a, b: a * b, self.F_WarpTile)
 
     @dataclass
     class k_problem:
-        F_XDataType       : str
-        F_GammaDataType   : str
-        F_ComputeDataType : str
-        F_YDataType       : str
-        F_InvRmsDataType  : str
-        F_BlockShape      : str
-        F_Traits          : Any #k_traits
+        F_XDataType: str
+        F_GammaDataType: str
+        F_ComputeDataType: str
+        F_YDataType: str
+        F_InvRmsDataType: str
+        F_BlockShape: str
+        F_Traits: Any  # k_traits
 
     @dataclass
     class k_pipeline_one_pass:
-        F_Problem         : Any #k_problem
+        F_Problem: Any  # k_problem
 
     @dataclass
     class k_pipeline_two_pass:
-        F_Problem         : Any #k_problem
+        F_Problem: Any  # k_problem
 
     @dataclass
     class default_2d_epilogue_problem:
-        F_AccDataType : str
-        F_ODataType : str
-        F_kPadM : bool
-        F_kPadN : bool
+        F_AccDataType: str
+        F_ODataType: str
+        F_kPadM: bool
+        F_kPadN: bool
 
     @dataclass
     class default_2d_epilogue:
-        F_problem : Any
+        F_problem: Any
 
     @dataclass
     class k_kernel:
-        F_pipeline : Any
-        F_epilogue : Any
+        F_pipeline: Any
+        F_epilogue: Any
 
     @dataclass
     class h_traits:
-        F_XDataType : str
-        F_YDataType : str
-        F_SmoothScaleDataType : str
-        F_YScaleDataType : str
-        F_UnquantYDataType : str
-        F_Repeat_M : int
-        F_Repeat_N : int
-        F_ThreadPerBlock_M : int
-        F_ThreadPerBlock_N : int
-        F_Vector_N : int
-        F_kPadN : bool
-        F_kSaveInvRms : bool
+        F_XDataType: str
+        F_YDataType: str
+        F_SmoothScaleDataType: str
+        F_YScaleDataType: str
+        F_UnquantYDataType: str
+        F_Repeat_M: int
+        F_Repeat_N: int
+        F_ThreadPerBlock_M: int
+        F_ThreadPerBlock_N: int
+        F_Vector_N: int
+        F_kPadN: bool
+        F_kSaveInvRms: bool
         F_kSaveUnquant: bool
-        F_kTwoPass : bool
-        F_kFusedAdd : int
-        F_kFusedQuant : int
+        F_kTwoPass: bool
+        F_kFusedAdd: int
+        F_kFusedQuant: int
 
         @property
-        def trait_name(self) ->str:
-            t_ = f'{DATA_TYPE_MAP[self.F_XDataType]}, {DATA_TYPE_MAP[self.F_YDataType]}, {DATA_TYPE_MAP[self.F_SmoothScaleDataType]}, {DATA_TYPE_MAP[self.F_YScaleDataType]}, {DATA_TYPE_MAP[self.F_UnquantYDataType]}, {self.F_Repeat_M:2}, {self.F_Repeat_N:2}, {self.F_ThreadPerBlock_M:2}, {self.F_ThreadPerBlock_N:4}'
-            t_ += f', {self.F_Vector_N:2}, {BOOL_MAP(self.F_kPadN):5}, {BOOL_MAP(self.F_kSaveInvRms):5}, {BOOL_MAP(self.F_kSaveUnquant):5}'
-            t_ += f', {BOOL_MAP(self.F_kTwoPass):5}, {self.F_kFusedAdd:4}, {self.F_kFusedQuant:4}'
+        def trait_name(self) -> str:
+            t_ = f"{DATA_TYPE_MAP[self.F_XDataType]}, {DATA_TYPE_MAP[self.F_YDataType]}, {DATA_TYPE_MAP[self.F_SmoothScaleDataType]}, {DATA_TYPE_MAP[self.F_YScaleDataType]}, {DATA_TYPE_MAP[self.F_UnquantYDataType]}, {self.F_Repeat_M:2}, {self.F_Repeat_N:2}, {self.F_ThreadPerBlock_M:2}, {self.F_ThreadPerBlock_N:4}"
+            t_ += f", {self.F_Vector_N:2}, {BOOL_MAP(self.F_kPadN):5}, {BOOL_MAP(self.F_kSaveInvRms):5}, {BOOL_MAP(self.F_kSaveUnquant):5}"
+            t_ += f", {BOOL_MAP(self.F_kTwoPass):5}, {self.F_kFusedAdd:4}, {self.F_kFusedQuant:4}"
             return t_
 
         # string when calling this kernel
         @property
         def call_name(self) -> str:
-            return f'rmsnorm2d_fwd_<traits_<{self.trait_name}>>'
+            return f"rmsnorm2d_fwd_<traits_<{self.trait_name}>>"
 
         # string when define this kernel
         @property
         def def_name(self) -> str:
-            return f'template float rmsnorm2d_fwd_<traits_<{self.trait_name}>>(const S&, A);'
+            return f"template float rmsnorm2d_fwd_<traits_<{self.trait_name}>>(const S&, A);"
 
     # this class hold kernel under same source file
     @dataclass
     class h_instance:
-        F_DataTypePair : str
-        F_N : str
-        F_add : int
-        F_sweep : int
-        F_saveunquant : bool
-        instance_list : List[Any] # List[h_traits]
+        F_DataTypePair: str
+        F_N: str
+        F_add: int
+        F_sweep: int
+        F_saveunquant: bool
+        instance_list: List[Any]  # List[h_traits]
 
         @property
         def name(self) -> str:
-            prec_i, prec_o = self.F_DataTypePair.split(',')
-            dtype_str = f'{prec_i}' if prec_i == prec_o else f'{prec_i}_{prec_o}'
-            nnn = f'rmsnorm2d_fwd_{dtype_str}_n{self.F_N}'
+            prec_i, prec_o = self.F_DataTypePair.split(",")
+            dtype_str = f"{prec_i}" if prec_i == prec_o else f"{prec_i}_{prec_o}"
+            nnn = f"rmsnorm2d_fwd_{dtype_str}_n{self.F_N}"
             if self.F_add != 0:
-                nnn = nnn + '_' + FUSED_ADD_ENUM_STR_MAP[self.F_add]
+                nnn = nnn + "_" + FUSED_ADD_ENUM_STR_MAP[self.F_add]
             if self.F_sweep != 0:
-                nnn = nnn + '_' + FUSED_FUSED_SWEEP_STR_MAP[self.F_sweep]
+                nnn = nnn + "_" + FUSED_FUSED_SWEEP_STR_MAP[self.F_sweep]
             if self.F_saveunquant:
-                nnn = nnn + '_saveunquant'
+                nnn = nnn + "_saveunquant"
             return nnn
 
         @property
-        def instance_name(self) ->str:
+        def instance_name(self) -> str:
             return self.name
 
         @property
-        def content(self) ->str:
-            instance_defs = ''
+        def content(self) -> str:
+            instance_defs = ""
             for ins in self.instance_list:
-                instance_defs += ins.def_name + '\n'
-            return rmsnorm_fwd_codegen.INSTANCE_BASE.format(F_instance_def=instance_defs)
+                instance_defs += ins.def_name + "\n"
+            return rmsnorm_fwd_codegen.INSTANCE_BASE.format(
+                F_instance_def=instance_defs
+            )
 
     @property
     def name_api(self) -> str:
-        return 'rmsnorm2d_fwd_api'
+        return "rmsnorm2d_fwd_api"
 
     @property
     def name_common_header(self) -> str:
-        return 'rmsnorm2d_fwd_api_common'
+        return "rmsnorm2d_fwd_api_common"
 
     @property
     def content_api(self) -> str:
@@ -422,40 +431,65 @@ float rmsnorm2d_fwd(rmsnorm2d_fwd_traits t,
                 t_dtype_dict[blob.F_DataTypePair][blob.F_N] = []
             t_dtype_dict[blob.F_DataTypePair][blob.F_N].append(blob)
 
-        d_str = ''
+        d_str = ""
         for i_d, dtype_ in enumerate(t_dtype_dict):
             blob_per_t = t_dtype_dict[dtype_]
-            n_str = ''
+            n_str = ""
             for i_n, n_ in enumerate(blob_per_t):
                 blob_per_n = blob_per_t[n_]
                 inner_str = ""
                 for i_b, b_ in enumerate(blob_per_n):
                     # generate single kernel instance file
-                    #vec_str = ""
+                    # vec_str = ""
                     for i_ins, ins in enumerate(b_.instance_list):
                         idx_in_n = i_b * len(b_.instance_list) + i_ins
                         len_in_n = len(blob_per_n) * len(b_.instance_list)
                         # _if = 'if' if i_ins == 0 else 'else if'
                         if ins.F_kFusedQuant == 0:
-                            _sweep_cond = 't.fused_quant == {f_fused_sweep}'.format(f_fused_sweep = ins.F_kFusedQuant)
+                            _sweep_cond = "t.fused_quant == {f_fused_sweep}".format(
+                                f_fused_sweep=ins.F_kFusedQuant
+                            )
                         elif ins.F_kFusedQuant == 1:
-                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sm == \"{f_sx_type}\" && t.prec_sy == \"{f_sy_type}\" && t.save_unquant == {f_suq})'.format(
-                                f_fused_sweep = ins.F_kFusedQuant, f_sx_type=ins.F_SmoothScaleDataType, f_sy_type=ins.F_YScaleDataType, f_suq=BOOL_MAP(ins.F_kSaveUnquant))
+                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sm == "{f_sx_type}" && t.prec_sy == "{f_sy_type}" && t.save_unquant == {f_suq})'.format(
+                                f_fused_sweep=ins.F_kFusedQuant,
+                                f_sx_type=ins.F_SmoothScaleDataType,
+                                f_sy_type=ins.F_YScaleDataType,
+                                f_suq=BOOL_MAP(ins.F_kSaveUnquant),
+                            )
                         elif ins.F_kFusedQuant == 2:
-                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sy == \"{f_sy_type}\" && t.save_unquant == {f_suq})'.format(
-                                f_fused_sweep = ins.F_kFusedQuant, f_sy_type=ins.F_YScaleDataType, f_suq=BOOL_MAP(ins.F_kSaveUnquant))
-                        _cond = '((a.n % {f_vec_n} == 0) && (t.fused_add == {f_fused_add}) && ({f_sweep_cond}))'.format(
-                                        f_vec_n = ins.F_Vector_N, f_fused_add = ins.F_kFusedAdd,
-                                        f_sweep_cond = _sweep_cond)
-                        inner_str += self.API_INNER_CASE.format(F_if = get_if_str(idx_in_n, len_in_n, False),
-                                            F_VEC_COND = _cond, F_instance_func=ins.call_name)
-                    #inner_str = inner_str + vec_str
-                n_cnd = f'(a.n <= {n_})' if (i_n < len(blob_per_t) - 1) else ''
-                n_str += self.API_PER_N_CASE.format(F_if = get_if_str(i_n, len(blob_per_t)), F_N_COND=n_cnd, F_inner_dispatch=inner_str)
-            prec_i, prec_o = dtype_.split(',')
-            d_str += self.API_PER_DTYPE.format(F_if = get_if_str(i_d, len(t_dtype_dict), False), F_i_type=prec_i, F_o_type=prec_o, F_per_n_case=n_str)
+                            _sweep_cond = 't.fused_quant == {f_fused_sweep} && (t.prec_sy == "{f_sy_type}" && t.save_unquant == {f_suq})'.format(
+                                f_fused_sweep=ins.F_kFusedQuant,
+                                f_sy_type=ins.F_YScaleDataType,
+                                f_suq=BOOL_MAP(ins.F_kSaveUnquant),
+                            )
+                        _cond = "((a.n % {f_vec_n} == 0) && (t.fused_add == {f_fused_add}) && ({f_sweep_cond}))".format(
+                            f_vec_n=ins.F_Vector_N,
+                            f_fused_add=ins.F_kFusedAdd,
+                            f_sweep_cond=_sweep_cond,
+                        )
+                        inner_str += self.API_INNER_CASE.format(
+                            F_if=get_if_str(idx_in_n, len_in_n, False),
+                            F_VEC_COND=_cond,
+                            F_instance_func=ins.call_name,
+                        )
+                    # inner_str = inner_str + vec_str
+                n_cnd = f"(a.n <= {n_})" if (i_n < len(blob_per_t) - 1) else ""
+                n_str += self.API_PER_N_CASE.format(
+                    F_if=get_if_str(i_n, len(blob_per_t)),
+                    F_N_COND=n_cnd,
+                    F_inner_dispatch=inner_str,
+                )
+            prec_i, prec_o = dtype_.split(",")
+            d_str += self.API_PER_DTYPE.format(
+                F_if=get_if_str(i_d, len(t_dtype_dict), False),
+                F_i_type=prec_i,
+                F_o_type=prec_o,
+                F_per_n_case=n_str,
+            )
 
-        api_base = self.API_BASE.format(F_traits_define=self.API_TRAITS_DEFINE, F_dispatch=d_str)
+        api_base = self.API_BASE.format(
+            F_traits_define=self.API_TRAITS_DEFINE, F_dispatch=d_str
+        )
         return api_base
 
     @property
@@ -466,86 +500,987 @@ float rmsnorm2d_fwd(rmsnorm2d_fwd_traits t,
         h_traits = rmsnorm_fwd_codegen.h_traits
         h_instance = rmsnorm_fwd_codegen.h_instance
 
-        dynamic_quant_out_dtype = ['int8', 'fp8']
+        dynamic_quant_out_dtype = ["int8", "fp8"]
         # some predefined support range
         # (prec_i,prec_o) for simplicity this string will be used as key for dict
-        scale_list = [('fp32,fp32')]
-        dtype_list = [('fp16,fp16'), ('bf16,bf16'),
-                        ('fp16,int8'), ('bf16,int8'),
-                        ('fp16,fp8'), ('bf16,fp8')] # NOTE: only fused-dynamic-quant use int8 out
-        #fused_add_list = [0, 1, 2]
-        #fused_sweep_list = [0, 1, 2] # NOTE: only single pass can use fused (smooth) dynamic quant
+        scale_list = [("fp32,fp32")]
+        dtype_list = [
+            ("fp16,fp16"),
+            ("bf16,bf16"),
+            ("fp16,int8"),
+            ("bf16,int8"),
+            ("fp16,fp8"),
+            ("bf16,fp8"),
+        ]  # NOTE: only fused-dynamic-quant use int8 out
+        # fused_add_list = [0, 1, 2]
+        # fused_sweep_list = [0, 1, 2] # NOTE: only single pass can use fused (smooth) dynamic quant
         fused_add_list = [0, 1]
-        fused_sweep_list = [0, 1, 2] # NOTE: only single pass can use fused (smooth) dynamic quant
+        fused_sweep_list = [
+            0,
+            1,
+            2,
+        ]  # NOTE: only single pass can use fused (smooth) dynamic quant
         bool_list = [False, True]
 
         #                                                              rm  rn  tm   tn  vn  pd     mv     unquant  2p     add    sweep
-        h_trait_dict = {'64'  : [ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  8,  8,  8,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  16, 4,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  64, 1,  True,  False, False,   False,   0,    0)],
-                        '128' : [ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  16, 8,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  64, 2,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  4,  64, 1,  True,  False, False,   False,   0,    0)],
-                        '256' : [ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  64, 4,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  4,  64, 2,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  4,  64, 1,  True,  False, False,   False,   0,    0)],
-                        '512' : [ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  4,  64, 8,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  4,  64, 4,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  4,  64, 2,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  8,  4,  64, 1,  True,  False, False,   False,   0,    0)],
-                        '640' : [ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  5,  4,  64, 2,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  5,  4, 128, 1,  True,  False, False,   False,   0,    0)],
-                        '768' : [ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  4,  64, 4,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  6,  4,  64, 2,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1, 12,  4,  64, 1,  True,  False, False,   False,   0,    0)],
-                        '1024' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  2,  64, 8,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  2,  64, 4,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  8,  2,  64, 2,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 1,  True,  False, False,   False,   0,    0)],
-                        '1536' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  4,  64, 8,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  2, 128, 4,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1, 256, 2,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  6,  1, 256, 1,  True,  False, False,   False,   0,    0)],
-                        '2048' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  1, 256, 8,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  1, 256, 4,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 2,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  8,  1, 256, 1,  True,  False, False,   False,   0,    0)],
-                        '3072' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1, 128, 8,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1, 256, 4,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  6,  1, 256, 2,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1,1024, 1,  True,  False, False,   False,   0,    0)],
-                        '4096' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  1, 256, 8,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 4,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  2,  1,1024, 2,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1,1024, 1,  True,  False, False,   False,   0,    0)],
-                        '6144' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1, 256, 8,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1, 512, 4,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  3,  1,1024, 2,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  6,  1,1024, 1,  True,  False, False,   False,   0,    0)],
-                        '8192' :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 8,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 512, 4,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1,1024, 2,  True,  False, False,   False,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  8,  1,1024, 1,  True,  False, False,   False,   0,    0)],
-                        'big'  :[ h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  1,  1,1024, 8,  True,  False, False,    True,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1, 256, 4,  True,  False, False,    True,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1, 12,  1, 256, 2,  True,  False, False,    True,   0,    0),
-                                  h_traits('x', 'y', 'xs', 'ys', 'uqy', 1,  4,  1,1024, 1,  True,  False, False,    True,   0,    0)]}
+        h_trait_dict = {
+            "64": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    1,
+                    8,
+                    8,
+                    8,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    1,
+                    4,
+                    16,
+                    4,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    1,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+            ],
+            "128": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    1,
+                    4,
+                    16,
+                    8,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    1,
+                    4,
+                    64,
+                    2,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    2,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+            ],
+            "256": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    1,
+                    4,
+                    64,
+                    4,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    2,
+                    4,
+                    64,
+                    2,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    4,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+            ],
+            "512": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    1,
+                    4,
+                    64,
+                    8,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    2,
+                    4,
+                    64,
+                    4,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    4,
+                    4,
+                    64,
+                    2,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    8,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+            ],
+            "640": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    5,
+                    4,
+                    64,
+                    2,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    5,
+                    4,
+                    128,
+                    1,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+            ],
+            "768": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    3,
+                    4,
+                    64,
+                    4,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    6,
+                    4,
+                    64,
+                    2,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    12,
+                    4,
+                    64,
+                    1,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+            ],
+            "1024": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    2,
+                    2,
+                    64,
+                    8,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    4,
+                    2,
+                    64,
+                    4,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    8,
+                    2,
+                    64,
+                    2,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    4,
+                    1,
+                    256,
+                    1,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+            ],
+            "1536": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    3,
+                    4,
+                    64,
+                    8,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    3,
+                    2,
+                    128,
+                    4,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    3,
+                    1,
+                    256,
+                    2,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    6,
+                    1,
+                    256,
+                    1,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+            ],
+            "2048": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    1,
+                    1,
+                    256,
+                    8,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    2,
+                    1,
+                    256,
+                    4,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    4,
+                    1,
+                    256,
+                    2,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    8,
+                    1,
+                    256,
+                    1,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+            ],
+            "3072": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    3,
+                    1,
+                    128,
+                    8,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    3,
+                    1,
+                    256,
+                    4,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    6,
+                    1,
+                    256,
+                    2,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    3,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+            ],
+            "4096": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    2,
+                    1,
+                    256,
+                    8,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    4,
+                    1,
+                    256,
+                    4,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    2,
+                    1,
+                    1024,
+                    2,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    4,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+            ],
+            "6144": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    3,
+                    1,
+                    256,
+                    8,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    3,
+                    1,
+                    512,
+                    4,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    3,
+                    1,
+                    1024,
+                    2,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    6,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+            ],
+            "8192": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    4,
+                    1,
+                    256,
+                    8,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    4,
+                    1,
+                    512,
+                    4,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    4,
+                    1,
+                    1024,
+                    2,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    8,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    False,
+                    False,
+                    0,
+                    0,
+                ),
+            ],
+            "big": [
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    1,
+                    1,
+                    1024,
+                    8,
+                    True,
+                    False,
+                    False,
+                    True,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    4,
+                    1,
+                    256,
+                    4,
+                    True,
+                    False,
+                    False,
+                    True,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    12,
+                    1,
+                    256,
+                    2,
+                    True,
+                    False,
+                    False,
+                    True,
+                    0,
+                    0,
+                ),
+                h_traits(
+                    "x",
+                    "y",
+                    "xs",
+                    "ys",
+                    "uqy",
+                    1,
+                    4,
+                    1,
+                    1024,
+                    1,
+                    True,
+                    False,
+                    False,
+                    True,
+                    0,
+                    0,
+                ),
+            ],
+        }
         total_blob = list()
         for hs_key in h_trait_dict:
             hs = h_trait_dict[hs_key]
             current_n = hs[0].F_Repeat_N * hs[0].F_ThreadPerBlock_N * hs[0].F_Vector_N
-            for dtype, scale_type, fused_add, fused_quant, save_unquant in itertools.product(dtype_list, scale_list, fused_add_list, fused_sweep_list, bool_list):
-                prec_i, prec_o = dtype.split(',')
-                scale_sm, scale_y = scale_type.split(',')
-                if prec_o in dynamic_quant_out_dtype and fused_quant != 1 and fused_quant != 2:
-                    continue # skip non dynamic quant case
-                if (fused_quant == 1 or fused_quant == 2) and hs_key == 'big':
+            for (
+                dtype,
+                scale_type,
+                fused_add,
+                fused_quant,
+                save_unquant,
+            ) in itertools.product(
+                dtype_list, scale_list, fused_add_list, fused_sweep_list, bool_list
+            ):
+                prec_i, prec_o = dtype.split(",")
+                scale_sm, scale_y = scale_type.split(",")
+                if (
+                    prec_o in dynamic_quant_out_dtype
+                    and fused_quant != 1
+                    and fused_quant != 2
+                ):
+                    continue  # skip non dynamic quant case
+                if (fused_quant == 1 or fused_quant == 2) and hs_key == "big":
                     continue
-                if (fused_quant == 0 and save_unquant == True):
-                    continue # save_unquant should always be false when there is no quant enabled
+                if fused_quant == 0 and save_unquant:
+                    continue  # save_unquant should always be false when there is no quant enabled
                 current_hs = list()
                 for chs_ in hs:
-                    h_ = copy.copy(chs_) # copy the base instance out
+                    h_ = copy.copy(chs_)  # copy the base instance out
                     h_.F_XDataType = prec_i
                     h_.F_YDataType = prec_o
                     h_.F_SmoothScaleDataType = scale_sm
@@ -554,20 +1489,29 @@ float rmsnorm2d_fwd(rmsnorm2d_fwd_traits t,
                     h_.F_kFusedAdd = fused_add
                     h_.F_kFusedQuant = fused_quant
                     h_.F_kSaveUnquant = save_unquant
-                    current_hs.append(h_) # + "\n"
-                #f.write(str(f.parent / GEN_DIR / (blobs.api_common_header_
-                current_n_str = 'big' if hs_key == 'big' else current_n
-                total_blob.append(h_instance(dtype, current_n_str, fused_add, fused_quant, save_unquant, current_hs))
+                    current_hs.append(h_)  # + "\n"
+                # f.write(str(f.parent / GEN_DIR / (blobs.api_common_header_
+                current_n_str = "big" if hs_key == "big" else current_n
+                total_blob.append(
+                    h_instance(
+                        dtype,
+                        current_n_str,
+                        fused_add,
+                        fused_quant,
+                        save_unquant,
+                        current_hs,
+                    )
+                )
         return total_blob
 
     def list_blobs(self) -> None:
         w_p = Path(self.working_path)
-        list_p = w_p / 'rmsnorm2d_fwd_blobs.txt'
+        list_p = w_p / "rmsnorm2d_fwd_blobs.txt"
         blobs = self.get_blobs()
-        with list_p.open('w') as list_f:
+        with list_p.open("w") as list_f:
             # api related file
-            list_f.write(str(w_p / (self.name_api + ".cpp"))  + "\n")
-            list_f.write(str(w_p / (self.name_common_header + ".hpp"))  + "\n")
+            list_f.write(str(w_p / (self.name_api + ".cpp")) + "\n")
+            list_f.write(str(w_p / (self.name_common_header + ".hpp")) + "\n")
             # kernel instance file
             for b in blobs:
                 list_f.write(str(w_p / (b.name + ".cpp")) + "\n")
@@ -575,23 +1519,25 @@ float rmsnorm2d_fwd(rmsnorm2d_fwd_traits t,
     def gen_blobs(self) -> None:
         w_p = Path(self.working_path)
         (w_p / (self.name_api + ".cpp")).write_text(self.content_api)
-        (w_p / (self.name_common_header + ".hpp")).write_text(self.content_common_header)
+        (w_p / (self.name_common_header + ".hpp")).write_text(
+            self.content_common_header
+        )
         blobs = self.get_blobs()
         for b in blobs:
             (w_p / (b.name + ".cpp")).write_text(b.content)
 
 
 def list_blobs(args):
-    api_list = args.api.split(',')
+    api_list = args.api.split(",")
     for api in api_list:
-        if api == 'fwd':
+        if api == "fwd":
             rmsnorm_fwd_codegen(args.working_path, args.filter).list_blobs()
 
 
 def gen_blobs(args):
-    api_list = args.api.split(',')
+    api_list = args.api.split(",")
     for api in api_list:
-        if api == 'fwd':
+        if api == "fwd":
             rmsnorm_fwd_codegen(args.working_path, args.filter).gen_blobs()
 
 
@@ -603,9 +1549,9 @@ if __name__ == "__main__":
     parser.add_argument(
         "-a",
         "--api",
-        default='fwd[all]',
+        default="fwd[all]",
         required=False,
-        help="supply API(s) to generate (default: fwd). separated by comma."
+        help="supply API(s) to generate (default: fwd). separated by comma.",
     )
 
     # the directory for list_blobs/gen_blobs to write files into
@@ -614,7 +1560,7 @@ if __name__ == "__main__":
         "--working_path",
         default="./",
         required=False,
-        help="the path where all the blobs are going to be generated"
+        help="the path where all the blobs are going to be generated",
     )
 
     # this script have 2 modes
@@ -626,15 +1572,15 @@ if __name__ == "__main__":
     parser.add_argument(
         "-l",
         "--list_blobs",
-        action='store_true',
-        help="list all the kernels to a file, "
+        action="store_true",
+        help="list all the kernels to a file, ",
     )
 
     parser.add_argument(
         "-g",
         "--gen_blobs",
-        action='store_true',
-        help="generate all kernels into different tile"
+        action="store_true",
+        help="generate all kernels into different tile",
     )
 
     # TODO: if using filter, must apply same value to output_dir and list_blobs
@@ -642,7 +1588,7 @@ if __name__ == "__main__":
         "-f",
         "--filter",
         required=False,
-        help="filter out kernels that need to generate, using fnmatch module"
+        help="filter out kernels that need to generate, using fnmatch module",
     )
 
     parser.add_argument(
@@ -650,22 +1596,20 @@ if __name__ == "__main__":
         "--traits",
         default="all",
         required=False,
-        help="enable/disable some feature. default generate all"
+        help="enable/disable some feature. default generate all",
     )
 
     parser.add_argument(
-        "-r",
-        "--receipt",
-        default=0,
-        required=False,
-        help="codegen receipt."
+        "-r", "--receipt", default=0, required=False, help="codegen receipt."
     )
 
     args = parser.parse_args()
 
     # print(f'{args.list_blobs}-{args.gen_blobs}')
-    if (args.gen_blobs and args.list_blobs) or ((not args.gen_blobs) and (not args.list_blobs)):
-        print('gen_blobs/list_blobs must specify only one option')
+    if (args.gen_blobs and args.list_blobs) or (
+        (not args.gen_blobs) and (not args.list_blobs)
+    ):
+        print("gen_blobs/list_blobs must specify only one option")
         sys.exit()
 
     p = Path(args.working_path)
diff --git a/test_data/generate_model_configs.py b/test_data/generate_model_configs.py
index f852d781d6..567870fd73 100644
--- a/test_data/generate_model_configs.py
+++ b/test_data/generate_model_configs.py
@@ -10,28 +10,37 @@ and saves them as CSV files that can be read by the shell script.
 """
 
 import csv
-import itertools
 import argparse
 
-def generate_2d_configs(mode='full'):
+
+def generate_2d_configs(mode="full"):
     """Generate all 2D model configuration combinations
-    
+
     Args:
         mode: 'small' for minimal set (~50 configs), 'half' for reduced set (~250 configs), 'full' for comprehensive set (~500 configs)
     """
-    
+
     # Define parameter ranges
     models_2d = [
-        'resnet18', 'resnet34', 'resnet50', 
-        'mobilenet_v2', 'mobilenet_v3_large', 'mobilenet_v3_small',
-        'vgg11', 'vgg16', 'vgg19',
-        'alexnet', 'googlenet',
-        'densenet121', 'densenet161',
-        'squeezenet1_0', 'squeezenet1_1',
-        'shufflenet_v2_x1_0'
+        "resnet18",
+        "resnet34",
+        "resnet50",
+        "mobilenet_v2",
+        "mobilenet_v3_large",
+        "mobilenet_v3_small",
+        "vgg11",
+        "vgg16",
+        "vgg19",
+        "alexnet",
+        "googlenet",
+        "densenet121",
+        "densenet161",
+        "squeezenet1_0",
+        "squeezenet1_1",
+        "shufflenet_v2_x1_0",
     ]
-    
-    if mode == 'small':
+
+    if mode == "small":
         # Minimal set for quick testing
         batch_sizes = [1, 8]  # Just two batch sizes
         # Very limited input dimensions - only 2 key sizes
@@ -41,12 +50,12 @@ def generate_2d_configs(mode='full'):
         ]
         # Use only first 3 models for minimal testing
         models_2d = models_2d[:3]  # Only resnet18, resnet34, resnet50
-    elif mode == 'half':
+    elif mode == "half":
         # Reduced set for faster testing
         batch_sizes = [1, 8, 32]  # Small, medium, large
         # Reduced input dimensions - 5 key sizes
         input_dims = [
-            (64, 64),    # Small
+            (64, 64),  # Small
             (224, 224),  # Standard (most common)
             (512, 512),  # Large
             (224, 320),  # Rectangular
@@ -57,18 +66,23 @@ def generate_2d_configs(mode='full'):
         batch_sizes = [1, 4, 8, 16, 32]
         # More dimensions but skip some redundant ones
         input_dims = [
-            (64, 64), (128, 128), (224, 224), (256, 256), (512, 512),  # Square
-            (224, 320), (320, 224),  # Rectangular (reduced from 4)
+            (64, 64),
+            (128, 128),
+            (224, 224),
+            (256, 256),
+            (512, 512),  # Square
+            (224, 320),
+            (320, 224),  # Rectangular (reduced from 4)
             (227, 227),  # AlexNet preferred
-            (299, 299)   # Inception preferred
+            (299, 299),  # Inception preferred
         ]
-    
-    precisions = ['fp32'] #, 'fp16', 'bf16']
+
+    precisions = ["fp32"]  # , 'fp16', 'bf16']
     channels = [3]  # Most models expect RGB
-    
+
     configs = []
     config_id = 1
-    
+
     # Generate all combinations (but limit to reasonable subset)
     for model in models_2d:
         for batch_size in batch_sizes:
@@ -77,36 +91,37 @@ def generate_2d_configs(mode='full'):
                     # Skip some combinations to keep dataset manageable
                     if batch_size > 16 and height > 256:
                         continue  # Skip large batch + large image combinations
-                    if precision != 'fp32' and batch_size < 8:
+                    if precision != "fp32" and batch_size < 8:
                         continue  # Skip mixed precision with tiny batches
-                    
+
                     config_name = f"{model}_b{batch_size}_{height}x{width}_{precision}"
-                    
+
                     config = {
-                        'config_name': config_name,
-                        'model': model,
-                        'batch_size': batch_size,
-                        'channels': channels[0],
-                        'height': height,
-                        'width': width,
-                        'precision': precision
+                        "config_name": config_name,
+                        "model": model,
+                        "batch_size": batch_size,
+                        "channels": channels[0],
+                        "height": height,
+                        "width": width,
+                        "precision": precision,
                     }
-                    
+
                     configs.append(config)
                     config_id += 1
-    
+
     return configs
 
-def generate_3d_configs(mode='full'):
+
+def generate_3d_configs(mode="full"):
     """Generate all 3D model configuration combinations
-    
+
     Args:
         mode: 'small' for minimal set (~10 configs), 'half' for reduced set (~50 configs), 'full' for comprehensive set (~100 configs)
     """
-    
-    models_3d = ['r3d_18', 'mc3_18', 'r2plus1d_18']
-    
-    if mode == 'small':
+
+    models_3d = ["r3d_18", "mc3_18", "r2plus1d_18"]
+
+    if mode == "small":
         # Minimal set for quick testing
         batch_sizes = [1, 4]  # Just two batch sizes
         temporal_sizes = [8]  # Only smallest temporal size
@@ -116,7 +131,7 @@ def generate_3d_configs(mode='full'):
         ]
         # Use only first model for minimal testing
         models_3d = models_3d[:1]  # Only r3d_18
-    elif mode == 'half':
+    elif mode == "half":
         # Reduced set for faster testing
         batch_sizes = [1, 4, 8]  # Skip batch_size=2
         temporal_sizes = [8, 16]  # Skip 32 (most expensive)
@@ -124,7 +139,7 @@ def generate_3d_configs(mode='full'):
         input_dims = [
             (112, 112),  # Small (common for video)
             (224, 224),  # Standard
-            (224, 320)   # Rectangular
+            (224, 320),  # Rectangular
         ]
     else:  # full mode
         # More comprehensive but still reasonable
@@ -132,15 +147,18 @@ def generate_3d_configs(mode='full'):
         temporal_sizes = [8, 16, 32]
         # More dimensions
         input_dims = [
-            (112, 112), (224, 224), (256, 256),  # Standard sizes
-            (224, 320), (320, 224)               # Rectangular
+            (112, 112),
+            (224, 224),
+            (256, 256),  # Standard sizes
+            (224, 320),
+            (320, 224),  # Rectangular
         ]
-    
-    precisions = ['fp32'] #, 'fp16']  # Skip bf16 for 3D to reduce combinations
+
+    precisions = ["fp32"]  # , 'fp16']  # Skip bf16 for 3D to reduce combinations
     channels = [3]
-    
+
     configs = []
-    
+
     for model in models_3d:
         for batch_size in batch_sizes:
             for temporal_size in temporal_sizes:
@@ -151,75 +169,97 @@ def generate_3d_configs(mode='full'):
                             continue
                         if batch_size > 2 and height > 224:
                             continue
-                            
+
                         config_name = f"{model}_b{batch_size}_t{temporal_size}_{height}x{width}_{precision}"
-                        
+
                         config = {
-                            'config_name': config_name,
-                            'model': model,
-                            'batch_size': batch_size,
-                            'channels': channels[0],
-                            'temporal_size': temporal_size,
-                            'height': height,
-                            'width': width,
-                                'precision': precision
-                            }
-                        
+                            "config_name": config_name,
+                            "model": model,
+                            "batch_size": batch_size,
+                            "channels": channels[0],
+                            "temporal_size": temporal_size,
+                            "height": height,
+                            "width": width,
+                            "precision": precision,
+                        }
+
                         configs.append(config)
-    
+
     return configs
 
+
 def save_configs_to_csv(configs, filename, config_type):
     """Save configurations to CSV file"""
-    
+
     if not configs:
         print(f"No {config_type} configurations generated")
         return
-    
+
     fieldnames = list(configs[0].keys())
-    
-    with open(filename, 'w', newline='\n', encoding='utf-8') as csvfile:
+
+    with open(filename, "w", newline="\n", encoding="utf-8") as csvfile:
         csvfile.write(f"# {config_type} Model Configurations\n")
         csvfile.write(f"# Generated {len(configs)} configurations\n")
-        
-        writer = csv.DictWriter(csvfile, fieldnames=fieldnames, lineterminator='\n')
+
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames, lineterminator="\n")
         writer.writeheader()
-        
+
         for config in configs:
             writer.writerow(config)
-    
+
     print(f"Generated {len(configs)} {config_type} configurations → {filename}")
 
+
 def main():
-    parser = argparse.ArgumentParser(description='Generate model configuration combinations')
-    parser.add_argument('--output-2d', type=str, default='model_configs_2d.csv',
-                       help='Output file for 2D configurations')
-    parser.add_argument('--output-3d', type=str, default='model_configs_3d.csv', 
-                       help='Output file for 3D configurations')
-    parser.add_argument('--mode', choices=['small', 'half', 'full'], default='full',
-                       help='Configuration mode: small (~60 total), half (~300 total) or full (~600 total) (default: half)')
-    parser.add_argument('--limit', type=int, 
-                       help='Limit number of configurations per type (for testing)')
-    
+    parser = argparse.ArgumentParser(
+        description="Generate model configuration combinations"
+    )
+    parser.add_argument(
+        "--output-2d",
+        type=str,
+        default="model_configs_2d.csv",
+        help="Output file for 2D configurations",
+    )
+    parser.add_argument(
+        "--output-3d",
+        type=str,
+        default="model_configs_3d.csv",
+        help="Output file for 3D configurations",
+    )
+    parser.add_argument(
+        "--mode",
+        choices=["small", "half", "full"],
+        default="full",
+        help="Configuration mode: small (~60 total), half (~300 total) or full (~600 total) (default: half)",
+    )
+    parser.add_argument(
+        "--limit",
+        type=int,
+        help="Limit number of configurations per type (for testing)",
+    )
+
     args = parser.parse_args()
-    
+
     print(f"Generating {args.mode} model configurations...")
-    
+
     print("Generating 2D model configurations...")
     configs_2d = generate_2d_configs(mode=args.mode)
     if args.limit:
-        configs_2d = configs_2d[:args.limit]
+        configs_2d = configs_2d[: args.limit]
     save_configs_to_csv(configs_2d, args.output_2d, "2D")
-    
+
     print("Generating 3D model configurations...")
     configs_3d = generate_3d_configs(mode=args.mode)
     if args.limit:
-        configs_3d = configs_3d[:args.limit]
+        configs_3d = configs_3d[: args.limit]
     save_configs_to_csv(configs_3d, args.output_3d, "3D")
-    
-    print(f"\nTotal configurations: {len(configs_2d)} 2D + {len(configs_3d)} 3D = {len(configs_2d) + len(configs_3d)}")
+
+    print(
+        f"\nTotal configurations: {len(configs_2d)} 2D + {len(configs_3d)} 3D = {len(configs_2d) + len(configs_3d)}"
+    )
     print("\nTo use these configurations:")
     print("  Update generate_test_dataset.sh to read from these CSV files")
 
+
 if __name__ == "__main__":
     main()
diff --git a/test_data/miopen_to_csv.py b/test_data/miopen_to_csv.py
index 3292584548..d6a85e1e3f 100644
--- a/test_data/miopen_to_csv.py
+++ b/test_data/miopen_to_csv.py
@@ -18,301 +18,428 @@ import csv
 import re
 import os
 
+
 def parse_miopen_command(command_line):
     """
     Parse MIOpen driver command line into parameter dictionary
-    
+
     Example input:
     ./bin/MIOpenDriver conv -n 4 -c 3 -H 224 -W 224 -k 64 -y 3 -x 3 -p 1 -q 1 -u 1 -v 1 -l 1 -j 1 -m conv -g 1 -F 1 -t 1
-    
+
     Returns dict with parsed parameters or None if parsing fails
     """
-    if not command_line.strip().startswith('./bin/MIOpenDriver conv'):
+    if not command_line.strip().startswith("./bin/MIOpenDriver conv"):
         return None
-    
+
     # Extract parameters using regex
     params = {}
-    
+
     # Parameter mapping: flag -> description
     # Support both short (-D) and long (--in_d) parameter formats
     param_patterns = {
-        'n': r'-n\s+(\d+)',      # batch size
-        'c': r'-c\s+(\d+)',      # input channels  
-        'k': r'-k\s+(\d+)',      # output channels
-        'H': r'-H\s+(\d+)',      # input height
-        'W': r'-W\s+(\d+)',      # input width
-        'D': r'(?:-D|--in_d)\s+(\d+)',      # input depth (3D only) - supports both -D and --in_d
-        'y': r'-y\s+(\d+)',      # kernel height
-        'x': r'-x\s+(\d+)',      # kernel width  
-        'z': r'(?:-z|--fil_d)\s+(\d+)',      # kernel depth (3D only) - supports both -z and --fil_d
-        'u': r'-u\s+(\d+)',      # stride height
-        'v': r'-v\s+(\d+)',      # stride width
-        'w': r'(?:-w|--conv_stride_d)\s+(\d+)',      # stride depth (3D only) - supports both -w and --conv_stride_d
-        'p': r'-p\s+(\d+)',      # pad height
-        'q': r'-q\s+(\d+)',      # pad width
-        's': r'(?:-s|--pad_d)\s+(\d+)',      # pad depth (3D only) - supports both -s and --pad_d
-        'l': r'-l\s+(\d+)',      # dilation height
-        'j': r'-j\s+(\d+)',      # dilation width
-        'r': r'(?:-r|--dilation_d)\s+(\d+)',      # dilation depth (3D only) - supports both -r and --dilation_d
-        'g': r'-g\s+(\d+)',      # groups
-        'F': r'-F\s+(\d+)',      # direction (1=fwd, 2=bwd_weight, 4=bwd_data)
+        "n": r"-n\s+(\d+)",  # batch size
+        "c": r"-c\s+(\d+)",  # input channels
+        "k": r"-k\s+(\d+)",  # output channels
+        "H": r"-H\s+(\d+)",  # input height
+        "W": r"-W\s+(\d+)",  # input width
+        "D": r"(?:-D|--in_d)\s+(\d+)",  # input depth (3D only) - supports both -D and --in_d
+        "y": r"-y\s+(\d+)",  # kernel height
+        "x": r"-x\s+(\d+)",  # kernel width
+        "z": r"(?:-z|--fil_d)\s+(\d+)",  # kernel depth (3D only) - supports both -z and --fil_d
+        "u": r"-u\s+(\d+)",  # stride height
+        "v": r"-v\s+(\d+)",  # stride width
+        "w": r"(?:-w|--conv_stride_d)\s+(\d+)",  # stride depth (3D only) - supports both -w and --conv_stride_d
+        "p": r"-p\s+(\d+)",  # pad height
+        "q": r"-q\s+(\d+)",  # pad width
+        "s": r"(?:-s|--pad_d)\s+(\d+)",  # pad depth (3D only) - supports both -s and --pad_d
+        "l": r"-l\s+(\d+)",  # dilation height
+        "j": r"-j\s+(\d+)",  # dilation width
+        "r": r"(?:-r|--dilation_d)\s+(\d+)",  # dilation depth (3D only) - supports both -r and --dilation_d
+        "g": r"-g\s+(\d+)",  # groups
+        "F": r"-F\s+(\d+)",  # direction (1=fwd, 2=bwd_weight, 4=bwd_data)
     }
-    
+
     for param, pattern in param_patterns.items():
         match = re.search(pattern, command_line)
         if match:
             params[param] = int(match.group(1))
-    
+
     return params if params else None
 
+
 def miopen_to_conv_param(miopen_params):
     """
     Convert MIOpen parameters to CK ConvParam format
-    
+
     Returns dictionary in CSV format or None if conversion fails
     """
     if not miopen_params:
         return None
-    
+
     # Determine if 2D or 3D convolution
-    is_3d = 'D' in miopen_params or 'z' in miopen_params or 'w' in miopen_params or 'r' in miopen_params or 's' in miopen_params
-    
+    is_3d = (
+        "D" in miopen_params
+        or "z" in miopen_params
+        or "w" in miopen_params
+        or "r" in miopen_params
+        or "s" in miopen_params
+    )
+
     # Extract basic parameters with defaults
     ndim = 3 if is_3d else 2
-    groups = miopen_params.get('g', 1)
-    batch_size = miopen_params.get('n', 1)
+    groups = miopen_params.get("g", 1)
+    batch_size = miopen_params.get("n", 1)
     # MIOpen uses total channels (C*G), CK uses channels per group
-    out_channels_total = miopen_params.get('k', 64)
-    in_channels_total = miopen_params.get('c', 3)
+    out_channels_total = miopen_params.get("k", 64)
+    in_channels_total = miopen_params.get("c", 3)
     out_channels = out_channels_total // groups  # CK format: channels per group
-    in_channels = in_channels_total // groups    # CK format: channels per group
-    
+    in_channels = in_channels_total // groups  # CK format: channels per group
+
     if is_3d:
         # 3D convolution
-        kernel_d = miopen_params.get('z', 3)
-        kernel_h = miopen_params.get('y', 3)
-        kernel_w = miopen_params.get('x', 3)
-        
-        input_d = miopen_params.get('D', 16)
-        input_h = miopen_params.get('H', 32)
-        input_w = miopen_params.get('W', 32)
-        
-        stride_d = miopen_params.get('w', 1)
-        stride_h = miopen_params.get('u', 1)
-        stride_w = miopen_params.get('v', 1)
-        
-        dilation_d = miopen_params.get('r', 1)
-        dilation_h = miopen_params.get('l', 1)
-        dilation_w = miopen_params.get('j', 1)
-        
-        pad_d = miopen_params.get('s', 0)
-        pad_h = miopen_params.get('p', 0)
-        pad_w = miopen_params.get('q', 0)
-        
+        kernel_d = miopen_params.get("z", 3)
+        kernel_h = miopen_params.get("y", 3)
+        kernel_w = miopen_params.get("x", 3)
+
+        input_d = miopen_params.get("D", 16)
+        input_h = miopen_params.get("H", 32)
+        input_w = miopen_params.get("W", 32)
+
+        stride_d = miopen_params.get("w", 1)
+        stride_h = miopen_params.get("u", 1)
+        stride_w = miopen_params.get("v", 1)
+
+        dilation_d = miopen_params.get("r", 1)
+        dilation_h = miopen_params.get("l", 1)
+        dilation_w = miopen_params.get("j", 1)
+
+        pad_d = miopen_params.get("s", 0)
+        pad_h = miopen_params.get("p", 0)
+        pad_w = miopen_params.get("q", 0)
+
         # Calculate output dimensions
-        output_d = (input_d + 2 * pad_d - dilation_d * (kernel_d - 1) - 1) // stride_d + 1
-        output_h = (input_h + 2 * pad_h - dilation_h * (kernel_h - 1) - 1) // stride_h + 1
-        output_w = (input_w + 2 * pad_w - dilation_w * (kernel_w - 1) - 1) // stride_w + 1
-        
+        output_d = (
+            input_d + 2 * pad_d - dilation_d * (kernel_d - 1) - 1
+        ) // stride_d + 1
+        output_h = (
+            input_h + 2 * pad_h - dilation_h * (kernel_h - 1) - 1
+        ) // stride_h + 1
+        output_w = (
+            input_w + 2 * pad_w - dilation_w * (kernel_w - 1) - 1
+        ) // stride_w + 1
+
         # Skip invalid configurations
         if output_d <= 0 or output_h <= 0 or output_w <= 0:
             return None
-        
-        direction = miopen_params.get('F', 1)  # 1=fwd, 2=bwd_weight, 4=bwd_data
-        direction_name = {1: 'fwd', 2: 'bwd_weight', 4: 'bwd_data'}.get(direction, 'fwd')
-        
+
+        direction = miopen_params.get("F", 1)  # 1=fwd, 2=bwd_weight, 4=bwd_data
+        direction_name = {1: "fwd", 2: "bwd_weight", 4: "bwd_data"}.get(
+            direction, "fwd"
+        )
+
         return {
-            'NDim': ndim,
-            'Groups': groups,
-            'BatchSize': batch_size,
-            'OutChannels': out_channels,
-            'InChannels': in_channels,
-            'KernelD': kernel_d, 'KernelH': kernel_h, 'KernelW': kernel_w,
-            'InputD': input_d, 'InputH': input_h, 'InputW': input_w,
-            'OutputD': output_d, 'OutputH': output_h, 'OutputW': output_w,
-            'StrideD': stride_d, 'StrideH': stride_h, 'StrideW': stride_w,
-            'DilationD': dilation_d, 'DilationH': dilation_h, 'DilationW': dilation_w,
-            'LeftPadD': pad_d, 'LeftPadH': pad_h, 'LeftPadW': pad_w,
-            'RightPadD': pad_d, 'RightPadH': pad_h, 'RightPadW': pad_w,
-            'TestName': f'MIOpen_3D_{direction_name}'
+            "NDim": ndim,
+            "Groups": groups,
+            "BatchSize": batch_size,
+            "OutChannels": out_channels,
+            "InChannels": in_channels,
+            "KernelD": kernel_d,
+            "KernelH": kernel_h,
+            "KernelW": kernel_w,
+            "InputD": input_d,
+            "InputH": input_h,
+            "InputW": input_w,
+            "OutputD": output_d,
+            "OutputH": output_h,
+            "OutputW": output_w,
+            "StrideD": stride_d,
+            "StrideH": stride_h,
+            "StrideW": stride_w,
+            "DilationD": dilation_d,
+            "DilationH": dilation_h,
+            "DilationW": dilation_w,
+            "LeftPadD": pad_d,
+            "LeftPadH": pad_h,
+            "LeftPadW": pad_w,
+            "RightPadD": pad_d,
+            "RightPadH": pad_h,
+            "RightPadW": pad_w,
+            "TestName": f"MIOpen_3D_{direction_name}",
         }
-    
+
     else:
         # 2D convolution
-        kernel_h = miopen_params.get('y', 3)
-        kernel_w = miopen_params.get('x', 3)
-        
-        input_h = miopen_params.get('H', 32)
-        input_w = miopen_params.get('W', 32)
-        
-        stride_h = miopen_params.get('u', 1)
-        stride_w = miopen_params.get('v', 1)
-        
-        dilation_h = miopen_params.get('l', 1)
-        dilation_w = miopen_params.get('j', 1)
-        
-        pad_h = miopen_params.get('p', 0)
-        pad_w = miopen_params.get('q', 0)
-        
+        kernel_h = miopen_params.get("y", 3)
+        kernel_w = miopen_params.get("x", 3)
+
+        input_h = miopen_params.get("H", 32)
+        input_w = miopen_params.get("W", 32)
+
+        stride_h = miopen_params.get("u", 1)
+        stride_w = miopen_params.get("v", 1)
+
+        dilation_h = miopen_params.get("l", 1)
+        dilation_w = miopen_params.get("j", 1)
+
+        pad_h = miopen_params.get("p", 0)
+        pad_w = miopen_params.get("q", 0)
+
         # Calculate output dimensions
-        output_h = (input_h + 2 * pad_h - dilation_h * (kernel_h - 1) - 1) // stride_h + 1
-        output_w = (input_w + 2 * pad_w - dilation_w * (kernel_w - 1) - 1) // stride_w + 1
-        
+        output_h = (
+            input_h + 2 * pad_h - dilation_h * (kernel_h - 1) - 1
+        ) // stride_h + 1
+        output_w = (
+            input_w + 2 * pad_w - dilation_w * (kernel_w - 1) - 1
+        ) // stride_w + 1
+
         # Skip invalid configurations
         if output_h <= 0 or output_w <= 0:
             return None
-        
-        direction = miopen_params.get('F', 1)
-        direction_name = {1: 'fwd', 2: 'bwd_weight', 4: 'bwd_data'}.get(direction, 'fwd')
-        
+
+        direction = miopen_params.get("F", 1)
+        direction_name = {1: "fwd", 2: "bwd_weight", 4: "bwd_data"}.get(
+            direction, "fwd"
+        )
+
         return {
-            'NDim': ndim,
-            'Groups': groups,
-            'BatchSize': batch_size,
-            'OutChannels': out_channels,
-            'InChannels': in_channels,
-            'KernelH': kernel_h, 'KernelW': kernel_w,
-            'InputH': input_h, 'InputW': input_w,
-            'OutputH': output_h, 'OutputW': output_w,
-            'StrideH': stride_h, 'StrideW': stride_w,
-            'DilationH': dilation_h, 'DilationW': dilation_w,
-            'LeftPadH': pad_h, 'LeftPadW': pad_w,
-            'RightPadH': pad_h, 'RightPadW': pad_w,
-            'TestName': f'MIOpen_2D_{direction_name}'
+            "NDim": ndim,
+            "Groups": groups,
+            "BatchSize": batch_size,
+            "OutChannels": out_channels,
+            "InChannels": in_channels,
+            "KernelH": kernel_h,
+            "KernelW": kernel_w,
+            "InputH": input_h,
+            "InputW": input_w,
+            "OutputH": output_h,
+            "OutputW": output_w,
+            "StrideH": stride_h,
+            "StrideW": stride_w,
+            "DilationH": dilation_h,
+            "DilationW": dilation_w,
+            "LeftPadH": pad_h,
+            "LeftPadW": pad_w,
+            "RightPadH": pad_h,
+            "RightPadW": pad_w,
+            "TestName": f"MIOpen_2D_{direction_name}",
         }
 
+
 def write_csv_cases(test_cases, output_file, ndim):
     """Write test cases to CSV file"""
     if not test_cases:
         print(f"No {ndim}D test cases to write")
         return
-    
+
     print(f"Writing {len(test_cases)} {ndim}D test cases to {output_file}")
-    
+
     # Define CSV headers based on dimension
     if ndim == 2:
-        headers = ['NDim', 'Groups', 'BatchSize', 'OutChannels', 'InChannels',
-                  'KernelH', 'KernelW', 'InputH', 'InputW', 'OutputH', 'OutputW',
-                  'StrideH', 'StrideW', 'DilationH', 'DilationW', 
-                  'LeftPadH', 'LeftPadW', 'RightPadH', 'RightPadW', 'TestName']
+        headers = [
+            "NDim",
+            "Groups",
+            "BatchSize",
+            "OutChannels",
+            "InChannels",
+            "KernelH",
+            "KernelW",
+            "InputH",
+            "InputW",
+            "OutputH",
+            "OutputW",
+            "StrideH",
+            "StrideW",
+            "DilationH",
+            "DilationW",
+            "LeftPadH",
+            "LeftPadW",
+            "RightPadH",
+            "RightPadW",
+            "TestName",
+        ]
     else:  # 3D
-        headers = ['NDim', 'Groups', 'BatchSize', 'OutChannels', 'InChannels',
-                  'KernelD', 'KernelH', 'KernelW', 'InputD', 'InputH', 'InputW', 
-                  'OutputD', 'OutputH', 'OutputW', 'StrideD', 'StrideH', 'StrideW',
-                  'DilationD', 'DilationH', 'DilationW', 
-                  'LeftPadD', 'LeftPadH', 'LeftPadW', 'RightPadD', 'RightPadH', 'RightPadW', 'TestName']
-    
-    with open(output_file, 'w', newline='') as csvfile:
+        headers = [
+            "NDim",
+            "Groups",
+            "BatchSize",
+            "OutChannels",
+            "InChannels",
+            "KernelD",
+            "KernelH",
+            "KernelW",
+            "InputD",
+            "InputH",
+            "InputW",
+            "OutputD",
+            "OutputH",
+            "OutputW",
+            "StrideD",
+            "StrideH",
+            "StrideW",
+            "DilationD",
+            "DilationH",
+            "DilationW",
+            "LeftPadD",
+            "LeftPadH",
+            "LeftPadW",
+            "RightPadD",
+            "RightPadH",
+            "RightPadW",
+            "TestName",
+        ]
+
+    with open(output_file, "w", newline="") as csvfile:
         # Write header comment
         csvfile.write(f"# {ndim}D Convolution Test Cases from MIOpen Commands\n")
         csvfile.write(f"# Generated {len(test_cases)} test cases\n")
-        
+
         writer = csv.DictWriter(csvfile, fieldnames=headers)
         writer.writeheader()
-        
+
         for test_case in test_cases:
             # Only write fields that exist in headers
             filtered_case = {k: v for k, v in test_case.items() if k in headers}
             writer.writerow(filtered_case)
 
+
 def main():
-    parser = argparse.ArgumentParser(description='Convert MIOpen commands to CSV test cases')
-    
-    parser.add_argument('--input', type=str, required=True,
-                       help='Input file with MIOpen driver commands')
-    parser.add_argument('--output', type=str,
-                       help='Output CSV file (for mixed 2D/3D cases)')
-    parser.add_argument('--output-2d', type=str, default='miopen_conv_2d.csv',
-                       help='Output CSV file for 2D cases')
-    parser.add_argument('--output-3d', type=str, default='miopen_conv_3d.csv',
-                       help='Output CSV file for 3D cases')
-    parser.add_argument('--filter-duplicates', action='store_true',
-                       help='Remove duplicate test cases')
-    parser.add_argument('--model-name', type=str, default='MIOpen',
-                       help='Model name to use in test case names (default: MIOpen)')
-    
+    parser = argparse.ArgumentParser(
+        description="Convert MIOpen commands to CSV test cases"
+    )
+
+    parser.add_argument(
+        "--input",
+        type=str,
+        required=True,
+        help="Input file with MIOpen driver commands",
+    )
+    parser.add_argument(
+        "--output", type=str, help="Output CSV file (for mixed 2D/3D cases)"
+    )
+    parser.add_argument(
+        "--output-2d",
+        type=str,
+        default="miopen_conv_2d.csv",
+        help="Output CSV file for 2D cases",
+    )
+    parser.add_argument(
+        "--output-3d",
+        type=str,
+        default="miopen_conv_3d.csv",
+        help="Output CSV file for 3D cases",
+    )
+    parser.add_argument(
+        "--filter-duplicates", action="store_true", help="Remove duplicate test cases"
+    )
+    parser.add_argument(
+        "--model-name",
+        type=str,
+        default="MIOpen",
+        help="Model name to use in test case names (default: MIOpen)",
+    )
+
     args = parser.parse_args()
-    
+
     if not os.path.exists(args.input):
         print(f"ERROR: Input file not found: {args.input}")
         return 1
-    
+
     print(f"Parsing MIOpen commands from {args.input}...")
-    
+
     test_cases_2d = []
     test_cases_3d = []
     total_lines = 0
     parsed_lines = 0
-    
-    with open(args.input, 'r') as f:
+
+    with open(args.input, "r") as f:
         for line_num, line in enumerate(f, 1):
             total_lines += 1
             line = line.strip()
-            
+
             # Skip empty lines and non-MIOpen commands
             # Handle both direct commands and logged commands with MIOpen prefix
             if not line:
                 continue
-            
+
             # Extract the actual MIOpenDriver command from logged format
-            if 'MIOpenDriver conv' in line:
+            if "MIOpenDriver conv" in line:
                 # Extract command after finding MIOpenDriver
-                command_start = line.find('./bin/MIOpenDriver conv')
+                command_start = line.find("./bin/MIOpenDriver conv")
                 if command_start != -1:
                     line = line[command_start:]
                 else:
                     # Handle cases where path might be different - create standard format
-                    driver_start = line.find('MIOpenDriver conv')
+                    driver_start = line.find("MIOpenDriver conv")
                     if driver_start != -1:
-                        line = './bin/' + line[driver_start:]
+                        line = "./bin/" + line[driver_start:]
                     else:
                         continue
-            elif not line.startswith('./bin/MIOpenDriver conv'):
+            elif not line.startswith("./bin/MIOpenDriver conv"):
                 continue
-            
+
             try:
                 # Parse MIOpen command
                 miopen_params = parse_miopen_command(line)
                 if not miopen_params:
                     continue
-                
+
                 # Convert to ConvParam format
                 conv_param = miopen_to_conv_param(miopen_params)
                 if not conv_param:
                     continue
-                
+
                 # Add model name to test name
-                conv_param['TestName'] = f"{args.model_name}_{conv_param['NDim']}D_fwd"
-                
+                conv_param["TestName"] = f"{args.model_name}_{conv_param['NDim']}D_fwd"
+
                 # Separate 2D and 3D cases
-                if conv_param['NDim'] == 2:
+                if conv_param["NDim"] == 2:
                     test_cases_2d.append(conv_param)
                 else:
                     test_cases_3d.append(conv_param)
-                
+
                 parsed_lines += 1
-                
+
             except Exception as e:
                 print(f"WARNING: Failed to parse line {line_num}: {e}")
                 continue
-    
+
     print(f"Processed {total_lines} lines, parsed {parsed_lines} commands")
     print(f"Found {len(test_cases_2d)} 2D cases, {len(test_cases_3d)} 3D cases")
-    
+
     # Remove duplicates if requested
     if args.filter_duplicates:
         # Simple duplicate removal based on key parameters
         def make_key(case):
-            if case['NDim'] == 2:
-                return (case['Groups'], case['BatchSize'], case['OutChannels'], case['InChannels'],
-                       case['KernelH'], case['KernelW'], case['InputH'], case['InputW'],
-                       case['StrideH'], case['StrideW'])
+            if case["NDim"] == 2:
+                return (
+                    case["Groups"],
+                    case["BatchSize"],
+                    case["OutChannels"],
+                    case["InChannels"],
+                    case["KernelH"],
+                    case["KernelW"],
+                    case["InputH"],
+                    case["InputW"],
+                    case["StrideH"],
+                    case["StrideW"],
+                )
             else:
-                return (case['Groups'], case['BatchSize'], case['OutChannels'], case['InChannels'],
-                       case['KernelD'], case['KernelH'], case['KernelW'], 
-                       case['InputD'], case['InputH'], case['InputW'],
-                       case['StrideD'], case['StrideH'], case['StrideW'])
-        
+                return (
+                    case["Groups"],
+                    case["BatchSize"],
+                    case["OutChannels"],
+                    case["InChannels"],
+                    case["KernelD"],
+                    case["KernelH"],
+                    case["KernelW"],
+                    case["InputD"],
+                    case["InputH"],
+                    case["InputW"],
+                    case["StrideD"],
+                    case["StrideH"],
+                    case["StrideW"],
+                )
+
         seen_2d = set()
         unique_2d = []
         for case in test_cases_2d:
@@ -320,7 +447,7 @@ def main():
             if key not in seen_2d:
                 seen_2d.add(key)
                 unique_2d.append(case)
-        
+
         seen_3d = set()
         unique_3d = []
         for case in test_cases_3d:
@@ -328,11 +455,13 @@ def main():
             if key not in seen_3d:
                 seen_3d.add(key)
                 unique_3d.append(case)
-        
-        print(f"After deduplication: {len(unique_2d)} 2D cases, {len(unique_3d)} 3D cases")
+
+        print(
+            f"After deduplication: {len(unique_2d)} 2D cases, {len(unique_3d)} 3D cases"
+        )
         test_cases_2d = unique_2d
         test_cases_3d = unique_3d
-    
+
     # Write output files
     if args.output:
         # Write mixed cases to single file
@@ -340,14 +469,36 @@ def main():
         if all_cases:
             print(f"Writing {len(all_cases)} total cases to {args.output}")
             # Use 2D headers for mixed file, extend as needed
-            mixed_headers = ['NDim', 'Groups', 'BatchSize', 'OutChannels', 'InChannels',
-                           'KernelH', 'KernelW', 'InputH', 'InputW', 'OutputH', 'OutputW',
-                           'StrideH', 'StrideW', 'DilationH', 'DilationW', 
-                           'LeftPadH', 'LeftPadW', 'RightPadH', 'RightPadW', 'TestName']
-            
-            with open(args.output, 'w', newline='') as csvfile:
-                csvfile.write(f"# Mixed 2D/3D Convolution Test Cases from MIOpen Commands\n")
-                writer = csv.DictWriter(csvfile, fieldnames=mixed_headers, extrasaction='ignore')
+            mixed_headers = [
+                "NDim",
+                "Groups",
+                "BatchSize",
+                "OutChannels",
+                "InChannels",
+                "KernelH",
+                "KernelW",
+                "InputH",
+                "InputW",
+                "OutputH",
+                "OutputW",
+                "StrideH",
+                "StrideW",
+                "DilationH",
+                "DilationW",
+                "LeftPadH",
+                "LeftPadW",
+                "RightPadH",
+                "RightPadW",
+                "TestName",
+            ]
+
+            with open(args.output, "w", newline="") as csvfile:
+                csvfile.write(
+                    "# Mixed 2D/3D Convolution Test Cases from MIOpen Commands\n"
+                )
+                writer = csv.DictWriter(
+                    csvfile, fieldnames=mixed_headers, extrasaction="ignore"
+                )
                 writer.writeheader()
                 for case in all_cases:
                     writer.writerow(case)
@@ -355,12 +506,13 @@ def main():
         # Write separate files for 2D and 3D
         if test_cases_2d:
             write_csv_cases(test_cases_2d, args.output_2d, 2)
-        
+
         if test_cases_3d:
             write_csv_cases(test_cases_3d, args.output_3d, 3)
-    
+
     print("Conversion completed!")
     return 0
 
+
 if __name__ == "__main__":
     exit(main())
diff --git a/test_data/run_model_with_miopen.py b/test_data/run_model_with_miopen.py
index 596f6a4a37..9eee3b53fb 100644
--- a/test_data/run_model_with_miopen.py
+++ b/test_data/run_model_with_miopen.py
@@ -7,13 +7,12 @@ PyTorch Model Runner with MIOpen Command Logging using torchvision models
 
 Usage:
     MIOPEN_ENABLE_LOGGING_CMD=1 python3 run_model_with_miopen.py --model resnet18 2> miopen_commands.txt
-    
+
 Available 2D models: alexnet, vgg11, vgg16, resnet18, resnet50, mobilenet_v2, etc.
 Available 3D models: r3d_18, mc3_18, r2plus1d_18
 """
 
 import torch
-import torch.nn as nn
 import torchvision.models as models
 import torchvision.models.video as video_models
 import argparse
@@ -21,94 +20,145 @@ import os
 
 # Define available models
 MODELS_2D = [
-    'alexnet', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19', 'vgg19_bn',
-    'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152',
-    'resnext50_32x4d', 'resnext101_32x8d', 'resnext101_64x4d',
-    'wide_resnet50_2', 'wide_resnet101_2',
-    'densenet121', 'densenet161', 'densenet169', 'densenet201',
-    'inception_v3', 'googlenet',
-    'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0',
-    'mobilenet_v2', 'mobilenet_v3_large', 'mobilenet_v3_small',
-    'mnasnet0_5', 'mnasnet0_75', 'mnasnet1_0', 'mnasnet1_3',
-    'squeezenet1_0', 'squeezenet1_1'
+    "alexnet",
+    "vgg11",
+    "vgg11_bn",
+    "vgg13",
+    "vgg13_bn",
+    "vgg16",
+    "vgg16_bn",
+    "vgg19",
+    "vgg19_bn",
+    "resnet18",
+    "resnet34",
+    "resnet50",
+    "resnet101",
+    "resnet152",
+    "resnext50_32x4d",
+    "resnext101_32x8d",
+    "resnext101_64x4d",
+    "wide_resnet50_2",
+    "wide_resnet101_2",
+    "densenet121",
+    "densenet161",
+    "densenet169",
+    "densenet201",
+    "inception_v3",
+    "googlenet",
+    "shufflenet_v2_x0_5",
+    "shufflenet_v2_x1_0",
+    "shufflenet_v2_x1_5",
+    "shufflenet_v2_x2_0",
+    "mobilenet_v2",
+    "mobilenet_v3_large",
+    "mobilenet_v3_small",
+    "mnasnet0_5",
+    "mnasnet0_75",
+    "mnasnet1_0",
+    "mnasnet1_3",
+    "squeezenet1_0",
+    "squeezenet1_1",
 ]
 
-MODELS_3D = [
-    'r3d_18', 'mc3_18', 'r2plus1d_18'
-]
+MODELS_3D = ["r3d_18", "mc3_18", "r2plus1d_18"]
 
 ALL_MODELS = MODELS_2D + MODELS_3D
 
+
 def main():
-    parser = argparse.ArgumentParser(description='PyTorch Model Runner with MIOpen Command Logging')
-    
+    parser = argparse.ArgumentParser(
+        description="PyTorch Model Runner with MIOpen Command Logging"
+    )
+
     # Model selection
-    parser.add_argument('--model', choices=ALL_MODELS, default='resnet18', 
-                       help='Model to run')
-    
+    parser.add_argument(
+        "--model", choices=ALL_MODELS, default="resnet18", help="Model to run"
+    )
+
     # Input tensor dimensions
-    parser.add_argument('--batch-size', type=int, default=4, 
-                       help='Batch size')
-    parser.add_argument('--channels', type=int, default=3, 
-                       help='Input channels (e.g., 3 for RGB, 1 for grayscale)')
-    parser.add_argument('--height', type=int, default=224, 
-                       help='Input height')
-    parser.add_argument('--width', type=int, default=224, 
-                       help='Input width')
-    parser.add_argument('--input-size', type=int, 
-                       help='Input size (sets both height and width to same value)')
-    parser.add_argument('--temporal-size', type=int, default=16, 
-                       help='Temporal dimension for 3D models')
-    
+    parser.add_argument("--batch-size", type=int, default=4, help="Batch size")
+    parser.add_argument(
+        "--channels",
+        type=int,
+        default=3,
+        help="Input channels (e.g., 3 for RGB, 1 for grayscale)",
+    )
+    parser.add_argument("--height", type=int, default=224, help="Input height")
+    parser.add_argument("--width", type=int, default=224, help="Input width")
+    parser.add_argument(
+        "--input-size",
+        type=int,
+        help="Input size (sets both height and width to same value)",
+    )
+    parser.add_argument(
+        "--temporal-size", type=int, default=16, help="Temporal dimension for 3D models"
+    )
+
     # Device and precision
-    parser.add_argument('--device', choices=['cuda', 'cpu', 'auto'], default='auto',
-                       help='Device to run on')
-    parser.add_argument('--precision', choices=['fp32', 'fp16', 'bf16'], default='fp32',
-                       help='Floating point precision')
-    
-    
+    parser.add_argument(
+        "--device",
+        choices=["cuda", "cpu", "auto"],
+        default="auto",
+        help="Device to run on",
+    )
+    parser.add_argument(
+        "--precision",
+        choices=["fp32", "fp16", "bf16"],
+        default="fp32",
+        help="Floating point precision",
+    )
+
     # Output control
-    parser.add_argument('--quiet', action='store_true',
-                       help='Suppress output except errors')
-    parser.add_argument('--verbose', action='store_true',
-                       help='Verbose output')
-    
+    parser.add_argument(
+        "--quiet", action="store_true", help="Suppress output except errors"
+    )
+    parser.add_argument("--verbose", action="store_true", help="Verbose output")
+
     args = parser.parse_args()
-    
+
     # Handle input-size override
     if args.input_size:
         args.height = args.input_size
         args.width = args.input_size
-    
+
     # Check MIOpen logging
-    if not os.environ.get('MIOPEN_ENABLE_LOGGING_CMD') and not args.quiet:
+    if not os.environ.get("MIOPEN_ENABLE_LOGGING_CMD") and not args.quiet:
         print("WARNING: Set MIOPEN_ENABLE_LOGGING_CMD=1 to capture commands")
-    
+
     # Device selection
-    if args.device == 'auto':
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    if args.device == "auto":
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     else:
         device = torch.device(args.device)
-    
+
     # Check if actually running on GPU
-    if device.type == 'cpu':
+    if device.type == "cpu":
         import sys
-        print(f"WARNING: Running on CPU, MIOpen commands will not be generated!", file=sys.stderr)
+
+        print(
+            "WARNING: Running on CPU, MIOpen commands will not be generated!",
+            file=sys.stderr,
+        )
         print(f"CUDA/ROCm available: {torch.cuda.is_available()}", file=sys.stderr)
         if torch.cuda.is_available():
             print(f"GPU device count: {torch.cuda.device_count()}", file=sys.stderr)
-            print(f"GPU name: {torch.cuda.get_device_name(0) if torch.cuda.device_count() > 0 else 'N/A'}", file=sys.stderr)
+            print(
+                f"GPU name: {torch.cuda.get_device_name(0) if torch.cuda.device_count() > 0 else 'N/A'}",
+                file=sys.stderr,
+            )
         # Continue anyway for testing purposes
-    
+
     if not args.quiet:
         print(f"Using device: {device}")
-    
+
     # Create model using torchvision
     if args.model in MODELS_3D:
         # 3D Video models
         model = getattr(video_models, args.model)(weights=None)
         # 3D input: (batch, channels, temporal, height, width)
-        input_tensor = torch.randn(args.batch_size, args.channels, args.temporal_size, args.height, args.width)
+        input_tensor = torch.randn(
+            args.batch_size, args.channels, args.temporal_size, args.height, args.width
+        )
         if not args.quiet:
             print(f"3D model: {args.model}")
             print(f"Input shape: {input_tensor.shape} (B, C, T, H, W)")
@@ -116,34 +166,37 @@ def main():
         # 2D Image models
         model = getattr(models, args.model)(weights=None)
         # 2D input: (batch, channels, height, width)
-        input_tensor = torch.randn(args.batch_size, args.channels, args.height, args.width)
+        input_tensor = torch.randn(
+            args.batch_size, args.channels, args.height, args.width
+        )
         if not args.quiet:
             print(f"2D model: {args.model}")
             print(f"Input shape: {input_tensor.shape} (B, C, H, W)")
-    
+
     # Set precision
-    if args.precision == 'fp16':
+    if args.precision == "fp16":
         model = model.half()
         input_tensor = input_tensor.half()
-    elif args.precision == 'bf16':
+    elif args.precision == "bf16":
         model = model.bfloat16()
         input_tensor = input_tensor.bfloat16()
-    
+
     model = model.to(device)
     input_tensor = input_tensor.to(device)
-    
+
     if not args.quiet:
         print(f"Running {args.model} model...")
-    
+
     # Run inference
     model.eval()
     with torch.no_grad():
         output = model(input_tensor)
         if not args.quiet:
             print(f"Output shape: {output.shape}")
-    
+
     if not args.quiet:
         print("Done! MIOpen commands logged to stderr")
 
+
 if __name__ == "__main__":
     main()
diff --git a/tile_engine/ops/gemm/codegen_utils.py b/tile_engine/ops/gemm/codegen_utils.py
index 98595933b8..186ebf2d02 100644
--- a/tile_engine/ops/gemm/codegen_utils.py
+++ b/tile_engine/ops/gemm/codegen_utils.py
@@ -170,11 +170,11 @@ warp_tile_supported_combinations = {
             [16, 16, 128],
             [32, 32, 64],
         ],
-        "fp8_bf8_fp16":  [
+        "fp8_bf8_fp16": [
             [16, 16, 128],
             [32, 32, 64],
         ],
-        "bf8_fp8_fp16":  [
+        "bf8_fp8_fp16": [
             [16, 16, 128],
             [32, 32, 64],
         ],
diff --git a/tile_engine/ops/gemm/validation_utils.py b/tile_engine/ops/gemm/validation_utils.py
index c0e109bf11..3f66ef2714 100644
--- a/tile_engine/ops/gemm/validation_utils.py
+++ b/tile_engine/ops/gemm/validation_utils.py
@@ -107,32 +107,32 @@ WARP_TILE_SUPPORTED_COMBINATIONS = {
         "fp16_fp16_fp16": [
             [16, 16, 16],
         ],
-    },    
+    },
 }
 
 # Supported warp tile combinations for different GPU architectures and data types
 WARP_SUPPORTED_COMBINATIONS = {
     "gfx90a": [
-        [1, 4, 1], 
-        [2, 2, 1], 
+        [1, 4, 1],
+        [2, 2, 1],
         [4, 1, 1],
     ],
     "gfx942": [
-        [1, 4, 1], 
-        [2, 2, 1], 
+        [1, 4, 1],
+        [2, 2, 1],
         [4, 1, 1],
     ],
     "gfx950": [
-        [1, 4, 1], 
-        [2, 2, 1], 
+        [1, 4, 1],
+        [2, 2, 1],
         [4, 1, 1],
     ],
     "gfx1201": [
-        [2, 4, 1], 
-        [1, 8, 1], 
-        [8, 1, 1], 
+        [2, 4, 1],
+        [1, 8, 1],
+        [8, 1, 1],
         [4, 2, 1],
-    ],    
+    ],
 }
 
 # Unsupported trait combinations
@@ -186,14 +186,14 @@ def is_trait_combination_valid(pipeline: str, epilogue: str, scheduler: str) ->
 
 
 def validate_warp_configuration(
-    warp_m: int, 
-    warp_n: int, 
+    warp_m: int,
+    warp_n: int,
     warp_k: int,
     gpu_name: str = None,
 ) -> bool:
     """Validate warp configuration."""
     if gpu_name is None:
-        gpu_name = get_gpu_name_by_id(0)    
+        gpu_name = get_gpu_name_by_id(0)
 
     current_combination = [warp_m, warp_n, warp_k]
 
@@ -205,11 +205,8 @@ def validate_warp_configuration(
 
     # Check if current combination is in the allowed list
     if current_combination not in allowed_combinations:
-        error_msg = (
-            f"Invalid warp tile combination: {current_combination} not in allowed list. "
-        )
         return False
-                
+
     return True