From c5faecd894c2d3122ae2bc29fe98da54874426a7 Mon Sep 17 00:00:00 2001 From: "assistant-librarian[bot]" Date: Wed, 17 Dec 2025 17:16:17 +0000 Subject: [PATCH] Merge commit '92653168c2b276d4467320f5bdff5ec6cbddf4e6' into develop --- CHANGELOG.md | 25 ++-- Jenkinsfile | 41 +++++- codegen/CMakeLists.txt | 1 + example/ck_tile/01_fmha/CMakeLists.txt | 2 +- .../ck_tile/01_fmha/codegen/ops/fmha_fwd.py | 20 ++- .../core/arch/amd_buffer_addressing.hpp | 58 ++++++++- .../arch/amd_buffer_addressing_builtins.hpp | 58 ++++++++- include/ck_tile/core/numeric/vector_type.hpp | 1 + .../ops/fmha/pipeline/tile_fmha_shape.hpp | 2 + script/cmake-ck-dev.sh | 8 +- script/monitor_sccache_during_build.sh | 119 ++++++++++++++++++ 11 files changed, 309 insertions(+), 26 deletions(-) create mode 100644 script/monitor_sccache_during_build.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index a69ce2260e..d9fad8c6d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,21 +17,22 @@ Documentation for Composable Kernel available at [https://rocm.docs.amd.com/proj ## Composable Kernel 1.2.0 for ROCm 7.2.0 ### Added +* Added support for fp8 dynamic tensor-wise quantization of fp8 fmha fwd kernel. * Added support for bf16 data type to grouped_gemm and grouped_gemm_preshuffle. * Added Col-Col-Row-Col layout support for aquant mode in blockscale GEMM. -* Added support for mixed precision fp8 x bf8 universal GEMM and weight preshuffle GEMM -* Added a compute async pipeline in the CK TILE universal GEMM on gfx950 -* Added support for B Tensor type pk_int4_t in the CK TILE weight preshuffle GEMM. +* Added support for mixed precision fp8 x bf8 universal GEMM and weight preshuffle GEMM. +* Added a compute async pipeline in the CK Tile universal GEMM on gfx950. +* Added support for B Tensor type `pk_int4_t` in the CK Tile weight preshuffle GEMM. * Added the new api to load different memory sizes to SGPR. -* Added support for B Tensor Preshuffle in CK TILE Grouped GEMM. +* Added support for B Tensor preshuffle in CK Tile grouped GEMM. * Added a basic copy kernel example and supporting documentation for new CK Tile developers. -* Added support for grouped_gemm kernels to perform multi_d elementwise operation. -* Added support for Multiple ABD GEMM +* Added support for grouped GEMM kernels to perform Multi D elementwise operation. +* Added support for multiple ABD GEMM. * Added benchmarking support for tile engine GEMM Multi D. -* Added block scaling support in CK_TILE GEMM, allowing flexible use of quantization matrices from either A or B operands. -* Added the row-wise column-wise quantization for CK_TILE GEMM & CK_TILE Grouped GEMM. -* Added support for f32 to FMHA (fwd/bwd). -* Added tensor-wise quantization for CK_TILE GEMM. +* Added block scaling support in CK Tile GEMM, allowing flexible use of quantization matrices from either A or B operands. +* Added the row-wise column-wise quantization for CK Tile GEMM and CK Tile grouped GEMM. +* Added support for f32 to FMHA (forward and backward). +* Added tensor-wise quantization for CK Tile GEMM. * Added support for batched contraction kernel. * Added WMMA (gfx12) support for FMHA. * Added pooling kernel in CK_TILE @@ -41,7 +42,7 @@ Documentation for Composable Kernel available at [https://rocm.docs.amd.com/proj ### Changed -* Removed `BlockSize` in `make_kernel` and `CShuffleEpilogueProblem` to support Wave32 in CK_TILE (#2594) +* Removed `BlockSize` in `make_kernel` and `CShuffleEpilogueProblem` to support Wave32 in CK Tile (#2594) * Added an optional template parameter `Arch` (`gfx9_t`, `gfx12_t` etc.) to `make_kernel` to support linking multiple object files that have the same kernel compiled for different architectures. * FMHA examples and tests can be built for multiple architectures (gfx9, gfx950, gfx12) at the same time. @@ -91,7 +92,7 @@ Documentation for Composable Kernel available at [https://rocm.docs.amd.com/proj ### Optimized * Optimize the gemm multiply multiply preshuffle & lds bypass with Pack of KGroup and better instruction layout. -* Added Vectorize Transpose optimization for CK Tile +* Added Vectorize Transpose optimization for CK Tile * Added the asynchronous copy for gfx950 ### Changed diff --git a/Jenkinsfile b/Jenkinsfile index 2a1d1fd904..cf4f13eff1 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -622,8 +622,45 @@ def cmake_build(Map conf=[:]){ echo cmd dir("build"){ - //build CK - sh cmd + // Start sccache monitoring + if(check_host() && params.USE_SCCACHE && "${env.CK_SCCACHE}" != "null" && "${invocation_tag}" != "") { + sh """ + chmod +x ../script/monitor_sccache_during_build.sh + mkdir -p logs + export SCCACHE_C_CUSTOM_CACHE_BUSTER="${invocation_tag}" + ../script/monitor_sccache_during_build.sh build_monitor & + MONITOR_PID=\$! + echo "Monitor PID: \$MONITOR_PID" + echo \$MONITOR_PID > monitor.pid + """ + } + try { + //build CK + sh cmd + } catch (Exception buildError) { + echo "Build failed: ${buildError.getMessage()}" + throw buildError + } finally { + // Stop sccache monitoring + if(check_host() && params.USE_SCCACHE && "${env.CK_SCCACHE}" != "null" && "${invocation_tag}" != "") { + sh """ + # Stop monitoring + if [ -f monitor.pid ]; then + MONITOR_PID=\$(cat monitor.pid) + kill \$MONITOR_PID 2>/dev/null || echo "Monitor already stopped" + rm -f monitor.pid + fi + """ + + // Archive the monitoring logs + try { + archiveArtifacts artifacts: "logs/*monitor*.log", allowEmptyArchive: true + } catch (Exception e) { + echo "Could not archive sccache monitoring logs: ${e.getMessage()}" + } + } + } + //run tests except when NO_CK_BUILD or BUILD_LEGACY_OS are set if(!setup_args.contains("NO_CK_BUILD") && !params.BUILD_LEGACY_OS){ sh "python3 ../script/ninja_json_converter.py .ninja_log --legacy-format --output ck_build_trace_${check_arch_name()}.json" diff --git a/codegen/CMakeLists.txt b/codegen/CMakeLists.txt index 22d8e58d10..69a6a71de2 100644 --- a/codegen/CMakeLists.txt +++ b/codegen/CMakeLists.txt @@ -15,6 +15,7 @@ configure_file(${CK_ROOT}/include/ck/config.h.in ${CK_ROOT}/include/ck/config.h) find_package(ROCM) include(ROCMInstallTargets) include(ROCMTest) +list(APPEND CMAKE_PREFIX_PATH /opt/rocm $ENV{ROCM_PATH}) find_package(hiprtc REQUIRED) rocm_setup_version(VERSION 1.0) diff --git a/example/ck_tile/01_fmha/CMakeLists.txt b/example/ck_tile/01_fmha/CMakeLists.txt index 9c81207361..fbd6551091 100644 --- a/example/ck_tile/01_fmha/CMakeLists.txt +++ b/example/ck_tile/01_fmha/CMakeLists.txt @@ -47,7 +47,7 @@ set(FMHA_FWD_CODE_GEN_COMMON_ARGS ${CMAKE_CURRENT_LIST_DIR}/generate.py --targets ${FMHA_TARGETS_ARG} --api ${FMHA_FWD_APIS} - --optdim 32,64,128,256 + --optdim 32,64,80,128,256 # --filter fmha_fwd... ) set(FMHA_BWD_CODE_GEN_COMMON_ARGS diff --git a/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py b/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py index 4d6900a802..0cffb2642c 100644 --- a/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py +++ b/example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py @@ -40,7 +40,16 @@ DTYPE_BITS = { "bf8": 8, } -K0_MAX_SUBMAX_MAP = {32: 32, 48: 48, 64: 64, 96: 128, 128: 128, 192: 192, 256: 256} +K0_MAX_SUBMAX_MAP = { + 32: 32, + 48: 48, + 64: 64, + 80: 96, + 96: 128, + 128: 128, + 192: 192, + 256: 256, +} FMHA_FWD_KERNEL_HEADER = """// SPDX-License-Identifier: MIT // Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.\n @@ -930,6 +939,7 @@ class KernelComponentFactoryGfx9(CompatibilityRuleFactoryGfx9): ( 64, 64) : [FmhaFwdTileSize( 16, 32, 64, 64, 32, 64, 1, 1, 1, 1, 1, 1, 16, 16, 32, 16, 16, 32, -1), FmhaFwdTileSize( 32, 32, 64, 64, 32, 64, 1, 1, 1, 1, 1, 1, 32, 32, 16, 32, 32, 16, -1), FmhaFwdTileSize(128, 64, 32, 64, 32, 64, 4, 1, 1, 4, 1, 1, 32, 32, 16, 32, 32, 16, -1)], + ( 80, 96) : [FmhaFwdTileSize(128, 128, 16, 96, 32, 80, 4, 1, 1, 4, 1, 1, 32, 32, 16, 32, 32, 16, -1)], ( 96, 128) : [FmhaFwdTileSize(128, 128, 32, 128, 32, 96, 4, 1, 1, 4, 1, 1, 32, 32, 16, 32, 32, 16, -1)], (128, 128) : [FmhaFwdTileSize( 16, 32, 64, 128, 32, 128, 1, 1, 1, 1, 1, 1, 16, 16, 32, 16, 16, 32, -1), FmhaFwdTileSize( 32, 32, 128, 128, 32, 128, 1, 1, 1, 1, 1, 1, 32, 32, 16, 32, 32, 16, -1), @@ -1014,8 +1024,12 @@ class KernelComponentFactoryGfx9(CompatibilityRuleFactoryGfx9): ["no"], ["f", "t"], ): - pipelines.append(FmhaFwdPipeline("qr_async", "row", "t", "f", "t", "t", logits, bias, "f", "f", qscale, mask, "f", "f", sink)) # fmt: skip - pipelines.append(FmhaFwdPipeline("qr_async", "row", "t", "t", "t", "t", logits, bias, "f", "f", qscale, mask, "f", "f", sink)) # fmt: skip + if hdim == 64: + pipelines.append(FmhaFwdPipeline("qr", "row", "t", "f", "t", "t", logits, bias, "f", "f", qscale, mask, "f", "f", sink)) # fmt: skip + pipelines.append(FmhaFwdPipeline("qr", "row", "t", "t", "t", "t", logits, bias, "f", "f", qscale, mask, "f", "f", sink)) # fmt: skip + else: + pipelines.append(FmhaFwdPipeline("qr_async", "row", "t", "f", "t", "t", logits, bias, "f", "f", qscale, mask, "f", "f", sink)) # fmt: skip + pipelines.append(FmhaFwdPipeline("qr_async", "row", "t", "t", "t", "t", logits, bias, "f", "f", qscale, mask, "f", "f", sink)) # fmt: skip elif dtype in ["fp8", "fp8fp16", "bf8"]: # TODO pass diff --git a/include/ck_tile/core/arch/amd_buffer_addressing.hpp b/include/ck_tile/core/arch/amd_buffer_addressing.hpp index 9c2ce62856..9f79bdbee6 100644 --- a/include/ck_tile/core/arch/amd_buffer_addressing.hpp +++ b/include/ck_tile/core/arch/amd_buffer_addressing.hpp @@ -1121,6 +1121,20 @@ llvm_amdgcn_raw_buffer_load_i32x2(int32x4_t srsrc, index_t soffset, index_t glc_slc) __asm("llvm.amdgcn.raw.buffer.load.v2i32"); +// dwordx3 - use union to convert between int32x3 and fp16/bf16 types +union dwordx3_union +{ + int32_t as_i32[3]; + fp16_t as_fp16[6]; + bf16_t as_bf16[6]; +}; + +CK_TILE_DEVICE_EXTERN int32x3_t +llvm_amdgcn_raw_buffer_load_i32x3(int32x4_t srsrc, + index_t voffset, + index_t soffset, + index_t glc_slc) __asm("llvm.amdgcn.raw.buffer.load.v3i32"); + CK_TILE_DEVICE_EXTERN int32x4_t llvm_amdgcn_raw_buffer_load_i32x4(int32x4_t srsrc, index_t voffset, @@ -1540,9 +1554,9 @@ CK_TILE_DEVICE thread_buffer amd_buffer_load_impl(int32x4_t src_wave_buffe (std::is_same::value && (N == 1 || N == 2 || N == 4 || N == 8)) || (std::is_same::value && (N == 1 || N == 2 || N == 4 || N == 8 || N == 16)) || (std::is_same::value && - (N == 1 || N == 2 || N == 4 || N == 8 || N == 16 || N == 32)) || + (N == 1 || N == 2 || N == 4 || N == 6 || N == 8 || N == 16 || N == 32)) || (std::is_same::value && - (N == 1 || N == 2 || N == 4 || N == 8 || N == 16 || N == 32)) || + (N == 1 || N == 2 || N == 4 || N == 6 || N == 8 || N == 16 || N == 32)) || (std::is_same::value && (N == 1 || N == 2 || N == 4 || N == 8 || N == 16)) || (std::is_same::value && (N == 1 || N == 2 || N == 4 || N == 8 || N == 16)) || @@ -1659,6 +1673,26 @@ CK_TILE_DEVICE thread_buffer amd_buffer_load_impl(int32x4_t src_wave_buffe src_wave_addr_offset, static_cast(coherence))); } + else if constexpr(N == 6) + { + // N = 6: load as dwordx3 (12 bytes = 6 fp16), using buffer_load_dwordx3 instruction + int32x3_t tmp_i32x3 = + llvm_amdgcn_raw_buffer_load_i32x3(src_wave_buffer_resource, + src_thread_addr_offset, + src_wave_addr_offset, + static_cast(coherence)); + + // Use union to reinterpret int32x3 as fp16x6 + dwordx3_union tmp_union; + tmp_union.as_i32[0] = tmp_i32x3[0]; + tmp_union.as_i32[1] = tmp_i32x3[1]; + tmp_union.as_i32[2] = tmp_i32x3[2]; + + thread_buffer result; + static_for<0, N, 1>{}([&](auto i) { result[i] = tmp_union.as_fp16[i]; }); + + return result; + } else if constexpr(N == 8) { // use fp32 load to mimic fp16 load @@ -1744,6 +1778,26 @@ CK_TILE_DEVICE thread_buffer amd_buffer_load_impl(int32x4_t src_wave_buffe src_wave_addr_offset, static_cast(coherence))); } + else if constexpr(N == 6) + { + // N = 6: load as dwordx3 (12 bytes = 6 bf16), using buffer_load_dwordx3 instruction + int32x3_t tmp_i32x3 = + llvm_amdgcn_raw_buffer_load_i32x3(src_wave_buffer_resource, + src_thread_addr_offset, + src_wave_addr_offset, + static_cast(coherence)); + + // Use union to reinterpret int32x3 as bf16x6 + dwordx3_union tmp_union; + tmp_union.as_i32[0] = tmp_i32x3[0]; + tmp_union.as_i32[1] = tmp_i32x3[1]; + tmp_union.as_i32[2] = tmp_i32x3[2]; + + thread_buffer result; + static_for<0, N, 1>{}([&](auto i) { result[i] = tmp_union.as_bf16[i]; }); + + return result; + } else if constexpr(N == 8) { int32x4_t tmp = llvm_amdgcn_raw_buffer_load_i32x4(src_wave_buffer_resource, diff --git a/include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp b/include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp index 764df83539..4627b249d6 100644 --- a/include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp +++ b/include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp @@ -989,6 +989,20 @@ llvm_amdgcn_raw_buffer_load_i32x2(int32x4_t srsrc, index_t soffset, index_t glc_slc) __asm("llvm.amdgcn.raw.buffer.load.v2i32"); +// dwordx3 - use union to convert between int32x3 and fp16/bf16 types +union dwordx3_union +{ + int32_t as_i32[3]; + fp16_t as_fp16[6]; + bf16_t as_bf16[6]; +}; + +CK_TILE_DEVICE_EXTERN int32x3_t +llvm_amdgcn_raw_buffer_load_i32x3(int32x4_t srsrc, + index_t voffset, + index_t soffset, + index_t glc_slc) __asm("llvm.amdgcn.raw.buffer.load.v3i32"); + CK_TILE_DEVICE_EXTERN int32x4_t llvm_amdgcn_raw_buffer_load_i32x4(int32x4_t srsrc, index_t voffset, @@ -1408,9 +1422,9 @@ CK_TILE_DEVICE thread_buffer amd_buffer_load_impl(int32x4_t src_wave_buffe (std::is_same::value && (N == 1 || N == 2 || N == 4 || N == 8)) || (std::is_same::value && (N == 1 || N == 2 || N == 4 || N == 8 || N == 16)) || (std::is_same::value && - (N == 1 || N == 2 || N == 4 || N == 8 || N == 16 || N == 32)) || + (N == 1 || N == 2 || N == 4 || N == 6 || N == 8 || N == 16 || N == 32)) || (std::is_same::value && - (N == 1 || N == 2 || N == 4 || N == 8 || N == 16 || N == 32)) || + (N == 1 || N == 2 || N == 4 || N == 6 || N == 8 || N == 16 || N == 32)) || (std::is_same::value && (N == 1 || N == 2 || N == 4 || N == 8 || N == 16)) || (std::is_same::value && (N == 1 || N == 2 || N == 4 || N == 8 || N == 16)) || @@ -1529,6 +1543,26 @@ CK_TILE_DEVICE thread_buffer amd_buffer_load_impl(int32x4_t src_wave_buffe src_wave_addr_offset, static_cast(coherence))); } + else if constexpr(N == 6) + { + // N = 6: load as dwordx3 (12 bytes = 6 fp16), using buffer_load_dwordx3 instruction + int32x3_t tmp_i32x3 = + llvm_amdgcn_raw_buffer_load_i32x3(src_wave_buffer_resource, + src_thread_addr_offset, + src_wave_addr_offset, + static_cast(coherence)); + + // Use union to reinterpret int32x3 as fp16x6 + dwordx3_union tmp_union; + tmp_union.as_i32[0] = tmp_i32x3[0]; + tmp_union.as_i32[1] = tmp_i32x3[1]; + tmp_union.as_i32[2] = tmp_i32x3[2]; + + thread_buffer result; + static_for<0, N, 1>{}([&](auto i) { result[i] = tmp_union.as_fp16[i]; }); + + return result; + } else { // N >= 8: build from fp32x4 chunks @@ -1571,6 +1605,26 @@ CK_TILE_DEVICE thread_buffer amd_buffer_load_impl(int32x4_t src_wave_buffe src_wave_addr_offset, static_cast(coherence))); } + else if constexpr(N == 6) + { + // N = 6: load as dwordx3 (12 bytes = 6 bf16), using buffer_load_dwordx3 instruction + int32x3_t tmp_i32x3 = + llvm_amdgcn_raw_buffer_load_i32x3(src_wave_buffer_resource, + src_thread_addr_offset, + src_wave_addr_offset, + static_cast(coherence)); + + // Use union to reinterpret int32x3 as bf16x6 + dwordx3_union tmp_union; + tmp_union.as_i32[0] = tmp_i32x3[0]; + tmp_union.as_i32[1] = tmp_i32x3[1]; + tmp_union.as_i32[2] = tmp_i32x3[2]; + + thread_buffer result; + static_for<0, N, 1>{}([&](auto i) { result[i] = tmp_union.as_bf16[i]; }); + + return result; + } else { // N >= 8: build from fp32x4 chunks diff --git a/include/ck_tile/core/numeric/vector_type.hpp b/include/ck_tile/core/numeric/vector_type.hpp index 6921210b34..90ddc2a56e 100644 --- a/include/ck_tile/core/numeric/vector_type.hpp +++ b/include/ck_tile/core/numeric/vector_type.hpp @@ -152,6 +152,7 @@ using bf16x64_t = bfloat16_t __attribute__((ext_vector_type(64))); // i32 // using int32_t = ... using int32x2_t = int32_t __attribute__((ext_vector_type(2))); +using int32x3_t = int32_t __attribute__((ext_vector_type(3))); using int32x4_t = int32_t __attribute__((ext_vector_type(4))); using int32x8_t = int32_t __attribute__((ext_vector_type(8))); using int32x16_t = int32_t __attribute__((ext_vector_type(16))); diff --git a/include/ck_tile/ops/fmha/pipeline/tile_fmha_shape.hpp b/include/ck_tile/ops/fmha/pipeline/tile_fmha_shape.hpp index ee5238869f..4045e31b17 100644 --- a/include/ck_tile/ops/fmha/pipeline/tile_fmha_shape.hpp +++ b/include/ck_tile/ops/fmha/pipeline/tile_fmha_shape.hpp @@ -12,6 +12,8 @@ static CK_TILE_HOST_DEVICE constexpr index_t ceil_to_qualified_tile_length() { if constexpr(Headdim == 48) return 48; + else if constexpr(Headdim == 80) + return 96; else if constexpr(Headdim == 96) return 128; else if constexpr(Headdim == 160) diff --git a/script/cmake-ck-dev.sh b/script/cmake-ck-dev.sh index 9643af1de0..31d724deb6 100755 --- a/script/cmake-ck-dev.sh +++ b/script/cmake-ck-dev.sh @@ -25,7 +25,7 @@ GPU_TARGETS="gfx908;gfx90a;gfx942" if [ $# -ge 1 ]; then case "$1" in gfx*) - GPU_TARGETS=$1 + GPU_TARGETS="$1" shift 1 echo "GPU targets provided: $GPU_TARGETS" REST_ARGS=("$@") @@ -44,8 +44,8 @@ cmake -D CMAKE_CXX_FLAGS="-ftemplate-backtrace-limit=0 -fPIE -Wno-gnu-line-marker -fbracket-depth=512" \ -D CMAKE_BUILD_TYPE=Release \ -D BUILD_DEV=ON \ --D GPU_TARGETS=$GPU_TARGETS \ +-D GPU_TARGETS="$GPU_TARGETS" \ -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \ -D USE_BITINT_EXTENSION_INT4=OFF \ -"${REST_ARGS[@]}" \ \ -${MY_PROJECT_SOURCE} +"${REST_ARGS[@]}" \ +"${MY_PROJECT_SOURCE}" diff --git a/script/monitor_sccache_during_build.sh b/script/monitor_sccache_during_build.sh new file mode 100644 index 0000000000..3f52d73c4c --- /dev/null +++ b/script/monitor_sccache_during_build.sh @@ -0,0 +1,119 @@ +#!/bin/bash +# Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +# Continuous monitoring script for sccache during builds +# Usage: ./monitor_sccache_during_build.sh [log_prefix] & + +LOG_PREFIX=${1:-"sccache_monitor"} + +# Include stage name in log filename if available +STAGE_SUFFIX="" +if [ -n "${STAGE_NAME}" ]; then + # Convert stage name to filename-safe format (replace spaces and special chars with underscores) + STAGE_SAFE=$(echo "${STAGE_NAME}" | sed 's/[^a-zA-Z0-9]/_/g' | sed 's/__*/_/g' | sed 's/^_\|_$//g') + STAGE_SUFFIX="_${STAGE_SAFE}" +fi + +MONITOR_LOG="logs/${LOG_PREFIX}_$(date +%Y%m%d_%H%M%S)${STAGE_SUFFIX}.log" +MONITOR_INTERVAL=30 # seconds + +echo "Starting sccache monitoring - logging to $MONITOR_LOG" +echo "Monitor interval: $MONITOR_INTERVAL seconds" + +# Function to log with timestamp +log_with_timestamp() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$MONITOR_LOG" +} + +# Function to get sccache stats safely +get_sccache_stats() { + if command -v sccache &> /dev/null; then + sccache --show-stats 2>/dev/null || echo "sccache stats unavailable" + else + echo "sccache command not found" + fi +} + +# Function to check if sccache server is running +is_sccache_running() { + if command -v sccache &> /dev/null; then + sccache --show-stats &> /dev/null + return $? + else + return 1 + fi +} + +# Function to test Redis connectivity +test_redis_connectivity() { + # Use SCCACHE_REDIS if set, otherwise construct from CK_SCCACHE + local REDIS_URL="" + if [ -n "${SCCACHE_REDIS}" ]; then + REDIS_URL="${SCCACHE_REDIS}" + elif [ -n "${CK_SCCACHE}" ]; then + REDIS_URL="redis://${CK_SCCACHE}" + fi + + if [ -n "${REDIS_URL}" ]; then + local start_time=$(date +%s%N) + local response=$(timeout 5 redis-cli -u "${REDIS_URL}" ping 2>&1) || response="TIMEOUT" + local end_time=$(date +%s%N) + local latency=$(( (end_time - start_time) / 1000000 )) + echo "Redis: $response (${latency}ms)" + else + echo "Redis: No Redis URL available" + fi +} + +# Gets the last sccache stats before exiting +cleanup() { + log_with_timestamp "=== FINAL SCCACHE STATS EXIT ===" + log_with_timestamp "$(get_sccache_stats)" + echo "=== CONTINUOUS MONITORING STOPPED ===" + # List monitoring logs + echo "=== MONITORING LOGS ===" + ls -la logs/*monitor*.log 2>/dev/null || echo "No monitoring logs found" +} +trap cleanup EXIT + +log_with_timestamp "=== SCCACHE MONITORING STARTED ===" +log_with_timestamp "PID: $$" +log_with_timestamp "Node: ${NODE_NAME:-$(hostname)}" +log_with_timestamp "Stage: ${STAGE_NAME:-unknown}" +log_with_timestamp "WORKSPACE_PATH: ${WORKSPACE:-not set}" +log_with_timestamp "SCCACHE_C_CUSTOM_CACHE_BUSTER: ${SCCACHE_C_CUSTOM_CACHE_BUSTER:-not set}" +log_with_timestamp "CK_SCCACHE: ${CK_SCCACHE:-not set}" + +# Initial state +log_with_timestamp "=== INITIAL STATE ===" +# Reset sscache stats +sccache --zero-stats +log_with_timestamp "$(get_sccache_stats) $(test_redis_connectivity)" + +# Monitor loop +while true; do + sleep $MONITOR_INTERVAL + + # Check if sccache server is still running + if ! is_sccache_running; then + log_with_timestamp "WARNING: sccache server not running!" + fi + + # Get current stats + current_stats=$(get_sccache_stats) + redis_status=$(test_redis_connectivity) + + # Log current cache hit information + log_with_timestamp "$(get_sccache_stats) $(test_redis_connectivity)" + + # Check for Redis latency issues + if echo "$redis_status" | grep -E "[0-9]{3,}" > /dev/null; then # >100ms latency + log_with_timestamp "HIGH REDIS LATENCY detected" + fi + + # Check for Redis connection failures + if echo "$redis_status" | grep -E "(TIMEOUT|Connection refused|No route)" > /dev/null; then + log_with_timestamp "REDIS CONNECTION FAILURE detected" + fi +done \ No newline at end of file