From bb66c2af3e0abe5714cf914de5d9d6f438a050eb Mon Sep 17 00:00:00 2001 From: Yanxing-Shi Date: Wed, 21 May 2025 09:47:57 +0000 Subject: [PATCH] disable warning output & enable default config --- tile_engine/ops/gemm/CMakeLists.txt | 4 +- tile_engine/ops/gemm/benchmark_gemm.cpp | 1 - tile_engine/ops/gemm/benchmark_gemm.hpp | 9 +---- .../ops/gemm/configs/default_config.json | 39 ++++++++++--------- tile_engine/ops/gemm/gemm_instance_builder.py | 38 ++++++++++++------ 5 files changed, 50 insertions(+), 41 deletions(-) diff --git a/tile_engine/ops/gemm/CMakeLists.txt b/tile_engine/ops/gemm/CMakeLists.txt index 0952f12a94..72bf1aa8a4 100644 --- a/tile_engine/ops/gemm/CMakeLists.txt +++ b/tile_engine/ops/gemm/CMakeLists.txt @@ -3,7 +3,7 @@ execute_process( COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/gemm_instance_builder.py --working_path ${CMAKE_CURRENT_BINARY_DIR} - --config_json ${CMAKE_CURRENT_LIST_DIR}/configs/user_provided_config.json + # --config_json ${CMAKE_CURRENT_LIST_DIR}/configs/user_provided_config.json --list_blobs RESULT_VARIABLE ret ) @@ -31,7 +31,7 @@ add_custom_command( OUTPUT ${GEMM_CODEGEN_BLOBS} COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/gemm_instance_builder.py --working_path ${CMAKE_CURRENT_BINARY_DIR} - --config_json ${CMAKE_CURRENT_LIST_DIR}/configs/user_provided_config.json + # --config_json ${CMAKE_CURRENT_LIST_DIR}/configs/user_provided_config.json --gen_blobs ) diff --git a/tile_engine/ops/gemm/benchmark_gemm.cpp b/tile_engine/ops/gemm/benchmark_gemm.cpp index 874939f4b6..cd4df27de5 100644 --- a/tile_engine/ops/gemm/benchmark_gemm.cpp +++ b/tile_engine/ops/gemm/benchmark_gemm.cpp @@ -6,7 +6,6 @@ #include #include "ck_tile/host.hpp" - #include "gemm_profiler.hpp" #include "gemm_host_api.hpp" #include "benchmark_gemm.hpp" diff --git a/tile_engine/ops/gemm/benchmark_gemm.hpp b/tile_engine/ops/gemm/benchmark_gemm.hpp index 941e969920..c82de1a07c 100644 --- a/tile_engine/ops/gemm/benchmark_gemm.hpp +++ b/tile_engine/ops/gemm/benchmark_gemm.hpp @@ -128,7 +128,7 @@ struct Setting std::string csv_filename_; }; -std::string get_rocm_version() +inline std::string get_rocm_version() { std::ifstream version_file("/opt/rocm/.info/version"); if(version_file.is_open()) @@ -185,13 +185,6 @@ bool compare(ck_tile::index_t K, } /// @brief Function to get the kernel output with reference implementation on CPU/GPU -template void gemm_host_reference(int verify, ck_tile::HostTensor& a_m_k, ck_tile::HostTensor& b_k_n, diff --git a/tile_engine/ops/gemm/configs/default_config.json b/tile_engine/ops/gemm/configs/default_config.json index e2e05e57d4..bac4d4a4d4 100644 --- a/tile_engine/ops/gemm/configs/default_config.json +++ b/tile_engine/ops/gemm/configs/default_config.json @@ -33,32 +33,36 @@ }, "tile_config": { "tile_m": { - "max": 256, - "min": 128, + "max": 512, + "min": 64, "step": 64, "exclude": [] }, "tile_n": { - "max": 256, - "min": 128, - "step": 64, + "max": 512, + "min": 64, + "step": 32, "exclude": [] }, "tile_k": { - "max": 256, - "min": 128, - "step": 64, + "max": 512, + "min": 64, + "step": 32, "exclude": [] }, "warp_m": { - "max": 4, - "min": 1, - "step": 1 + "values": [ + 4, + 2, + 1 + ] }, "warp_n": { - "max": 4, - "min": 1, - "step": 1 + "values": [ + 4, + 2, + 1 + ] }, "warp_k": { "values": [ @@ -71,8 +75,7 @@ 8, 16, 32, - 64, - 128 + 64 ] }, "warp_tile_n": { @@ -81,13 +84,11 @@ 8, 16, 32, - 64, - 128 + 64 ] }, "warp_tile_k": { "values": [ - 4, 8, 16, 32, diff --git a/tile_engine/ops/gemm/gemm_instance_builder.py b/tile_engine/ops/gemm/gemm_instance_builder.py index 961b70584b..2e6a32a5d9 100755 --- a/tile_engine/ops/gemm/gemm_instance_builder.py +++ b/tile_engine/ops/gemm/gemm_instance_builder.py @@ -32,6 +32,9 @@ from codegen_utils import ( get_gpu_name_by_id ) import logging +import time + +logging.basicConfig(level=logging.INFO) class GemmCodeGenerator: @@ -99,7 +102,7 @@ class GemmCodeGenerator: ) self.valid_trait_names.append(trait_name) else: - logging.warning( + logging.debug( f"Invalid combination: {pipeline}-{epilogue}-{scheduler}" ) @@ -334,7 +337,7 @@ struct GemmKernel {{ f"warp_k({warp_k}) * warp_tile_k({warp_tile_k})") if invalid_params: - logging.warning( + logging.debug( f"Trait: [{trait}], Invalid warp configuratio: {', '.join(invalid_params)}. " f"Parameter combination: warp=({warp_m},{warp_n},{warp_k}), " f"warp_tile=({warp_tile_m},{warp_tile_n},{warp_tile_k})" @@ -354,7 +357,7 @@ struct GemmKernel {{ f"tile_k({tile_k}) % [{warp_k}x{warp_tile_k}] = {tile_k % (warp_k * warp_tile_k)}") if alignment_issues: - logging.warning( + logging.debug( f"Trait: [{trait}], Dimension alignment failed: {', '.join(alignment_issues)}. " f"Tile dimensions {tile_m}x{tile_n}x{tile_k} must be divisible by " f"[warp]: {warp_m}x{warp_n}x{warp_k} x [warp_tile]: {warp_tile_m}x{warp_tile_n}x{warp_tile_k}" @@ -370,7 +373,7 @@ struct GemmKernel {{ max_tile_size = 2**16 if pipeline == "compv4" else 2**15 if total_tile_in_lds > max_tile_size: - logging.warning( + logging.debug( f"LDS capacity exceeded [{trait}]: Total required {total_tile_in_lds:,}B ({total_tile_in_lds/1024:.1f}KB) > " f"maximum allowed {max_tile_size:,}B ({max_tile_size/1024}KB). Breakdown:\n" f"- Matrix A ({self.config.problem.datatype_map['matrix_a']}): {tile_m}x{tile_k} = {matrix_a_size:,}B\n" @@ -385,18 +388,18 @@ struct GemmKernel {{ gpu_name = get_gpu_name_by_id(0) gpu_warp_tile_key = warp_tile_supported_combinations.get(gpu_name, {}) if not gpu_warp_tile_key: - logging.warning( + logging.debug( f"Trait: [{trait}], No valid warp tile combinations found for {gpu_name}/{warp_tile_key}, skip this check.") return False allowed_combinations = gpu_warp_tile_key.get(warp_tile_key, []) if not allowed_combinations: - logging.warning( + logging.debug( f"Trait: [{trait}], No valid warp tile combinations found for {gpu_name}/{warp_tile_key}, skip this check.") return False if current_combination not in allowed_combinations: - logging.warning( + logging.debug( f"Trait: [{trait}], Invalid warp combination: {current_combination} not in allowed list. " f"Valid combinations for data type '{warp_tile_key}': {allowed_combinations}" ) @@ -408,18 +411,31 @@ struct GemmKernel {{ def get_tile_value(tile_param): return tile_param.generate_candidates( ) if isinstance(tile_param, RangeConfigParam) else tile_param.values - tile_params = set(itertools.product( + tile_group = list(itertools.product( get_tile_value(self.config.tile_config.tile_m), get_tile_value(self.config.tile_config.tile_n), - get_tile_value(self.config.tile_config.tile_k), + get_tile_value(self.config.tile_config.tile_k) + )) + + warp_group = list(itertools.product( get_tile_value(self.config.tile_config.warp_m), get_tile_value(self.config.tile_config.warp_n), - get_tile_value(self.config.tile_config.warp_k), + get_tile_value(self.config.tile_config.warp_k) + )) + + warp_tile_group = list(itertools.product( get_tile_value(self.config.tile_config.warp_tile_m), get_tile_value(self.config.tile_config.warp_tile_n), - get_tile_value(self.config.tile_config.warp_tile_k), + get_tile_value(self.config.tile_config.warp_tile_k) )) + tile_params = { + t + w + wt + for t in tile_group + for w in warp_group + for wt in warp_tile_group + } + for trait in self.valid_trait_names: tile_valid_params = list( filter(lambda t: self.is_tile_valid(t, trait), tile_params))