Merge commit '92653168c2b276d4467320f5bdff5ec6cbddf4e6' into develop

This commit is contained in:
assistant-librarian[bot]
2025-12-17 17:16:17 +00:00
parent 28b493d331
commit c5faecd894
11 changed files with 309 additions and 26 deletions

View File

@@ -17,21 +17,22 @@ Documentation for Composable Kernel available at [https://rocm.docs.amd.com/proj
## Composable Kernel 1.2.0 for ROCm 7.2.0
### Added
* Added support for fp8 dynamic tensor-wise quantization of fp8 fmha fwd kernel.
* Added support for bf16 data type to grouped_gemm and grouped_gemm_preshuffle.
* Added Col-Col-Row-Col layout support for aquant mode in blockscale GEMM.
* Added support for mixed precision fp8 x bf8 universal GEMM and weight preshuffle GEMM
* Added a compute async pipeline in the CK TILE universal GEMM on gfx950
* Added support for B Tensor type pk_int4_t in the CK TILE weight preshuffle GEMM.
* Added support for mixed precision fp8 x bf8 universal GEMM and weight preshuffle GEMM.
* Added a compute async pipeline in the CK Tile universal GEMM on gfx950.
* Added support for B Tensor type `pk_int4_t` in the CK Tile weight preshuffle GEMM.
* Added the new api to load different memory sizes to SGPR.
* Added support for B Tensor Preshuffle in CK TILE Grouped GEMM.
* Added support for B Tensor preshuffle in CK Tile grouped GEMM.
* Added a basic copy kernel example and supporting documentation for new CK Tile developers.
* Added support for grouped_gemm kernels to perform multi_d elementwise operation.
* Added support for Multiple ABD GEMM
* Added support for grouped GEMM kernels to perform Multi D elementwise operation.
* Added support for multiple ABD GEMM.
* Added benchmarking support for tile engine GEMM Multi D.
* Added block scaling support in CK_TILE GEMM, allowing flexible use of quantization matrices from either A or B operands.
* Added the row-wise column-wise quantization for CK_TILE GEMM & CK_TILE Grouped GEMM.
* Added support for f32 to FMHA (fwd/bwd).
* Added tensor-wise quantization for CK_TILE GEMM.
* Added block scaling support in CK Tile GEMM, allowing flexible use of quantization matrices from either A or B operands.
* Added the row-wise column-wise quantization for CK Tile GEMM and CK Tile grouped GEMM.
* Added support for f32 to FMHA (forward and backward).
* Added tensor-wise quantization for CK Tile GEMM.
* Added support for batched contraction kernel.
* Added WMMA (gfx12) support for FMHA.
* Added pooling kernel in CK_TILE
@@ -41,7 +42,7 @@ Documentation for Composable Kernel available at [https://rocm.docs.amd.com/proj
### Changed
* Removed `BlockSize` in `make_kernel` and `CShuffleEpilogueProblem` to support Wave32 in CK_TILE (#2594)
* Removed `BlockSize` in `make_kernel` and `CShuffleEpilogueProblem` to support Wave32 in CK Tile (#2594)
* Added an optional template parameter `Arch` (`gfx9_t`, `gfx12_t` etc.) to `make_kernel` to support linking multiple object files that have the same kernel compiled for different architectures.
* FMHA examples and tests can be built for multiple architectures (gfx9, gfx950, gfx12) at the same time.
@@ -91,7 +92,7 @@ Documentation for Composable Kernel available at [https://rocm.docs.amd.com/proj
### Optimized
* Optimize the gemm multiply multiply preshuffle & lds bypass with Pack of KGroup and better instruction layout.
* Added Vectorize Transpose optimization for CK Tile
* Added Vectorize Transpose optimization for CK Tile
* Added the asynchronous copy for gfx950
### Changed

41
Jenkinsfile vendored
View File

@@ -622,8 +622,45 @@ def cmake_build(Map conf=[:]){
echo cmd
dir("build"){
//build CK
sh cmd
// Start sccache monitoring
if(check_host() && params.USE_SCCACHE && "${env.CK_SCCACHE}" != "null" && "${invocation_tag}" != "") {
sh """
chmod +x ../script/monitor_sccache_during_build.sh
mkdir -p logs
export SCCACHE_C_CUSTOM_CACHE_BUSTER="${invocation_tag}"
../script/monitor_sccache_during_build.sh build_monitor &
MONITOR_PID=\$!
echo "Monitor PID: \$MONITOR_PID"
echo \$MONITOR_PID > monitor.pid
"""
}
try {
//build CK
sh cmd
} catch (Exception buildError) {
echo "Build failed: ${buildError.getMessage()}"
throw buildError
} finally {
// Stop sccache monitoring
if(check_host() && params.USE_SCCACHE && "${env.CK_SCCACHE}" != "null" && "${invocation_tag}" != "") {
sh """
# Stop monitoring
if [ -f monitor.pid ]; then
MONITOR_PID=\$(cat monitor.pid)
kill \$MONITOR_PID 2>/dev/null || echo "Monitor already stopped"
rm -f monitor.pid
fi
"""
// Archive the monitoring logs
try {
archiveArtifacts artifacts: "logs/*monitor*.log", allowEmptyArchive: true
} catch (Exception e) {
echo "Could not archive sccache monitoring logs: ${e.getMessage()}"
}
}
}
//run tests except when NO_CK_BUILD or BUILD_LEGACY_OS are set
if(!setup_args.contains("NO_CK_BUILD") && !params.BUILD_LEGACY_OS){
sh "python3 ../script/ninja_json_converter.py .ninja_log --legacy-format --output ck_build_trace_${check_arch_name()}.json"

View File

@@ -15,6 +15,7 @@ configure_file(${CK_ROOT}/include/ck/config.h.in ${CK_ROOT}/include/ck/config.h)
find_package(ROCM)
include(ROCMInstallTargets)
include(ROCMTest)
list(APPEND CMAKE_PREFIX_PATH /opt/rocm $ENV{ROCM_PATH})
find_package(hiprtc REQUIRED)
rocm_setup_version(VERSION 1.0)

View File

@@ -47,7 +47,7 @@ set(FMHA_FWD_CODE_GEN_COMMON_ARGS
${CMAKE_CURRENT_LIST_DIR}/generate.py
--targets ${FMHA_TARGETS_ARG}
--api ${FMHA_FWD_APIS}
--optdim 32,64,128,256
--optdim 32,64,80,128,256
# --filter fmha_fwd...
)
set(FMHA_BWD_CODE_GEN_COMMON_ARGS

View File

@@ -40,7 +40,16 @@ DTYPE_BITS = {
"bf8": 8,
}
K0_MAX_SUBMAX_MAP = {32: 32, 48: 48, 64: 64, 96: 128, 128: 128, 192: 192, 256: 256}
K0_MAX_SUBMAX_MAP = {
32: 32,
48: 48,
64: 64,
80: 96,
96: 128,
128: 128,
192: 192,
256: 256,
}
FMHA_FWD_KERNEL_HEADER = """// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.\n
@@ -930,6 +939,7 @@ class KernelComponentFactoryGfx9(CompatibilityRuleFactoryGfx9):
( 64, 64) : [FmhaFwdTileSize( 16, 32, 64, 64, 32, 64, 1, 1, 1, 1, 1, 1, 16, 16, 32, 16, 16, 32, -1),
FmhaFwdTileSize( 32, 32, 64, 64, 32, 64, 1, 1, 1, 1, 1, 1, 32, 32, 16, 32, 32, 16, -1),
FmhaFwdTileSize(128, 64, 32, 64, 32, 64, 4, 1, 1, 4, 1, 1, 32, 32, 16, 32, 32, 16, -1)],
( 80, 96) : [FmhaFwdTileSize(128, 128, 16, 96, 32, 80, 4, 1, 1, 4, 1, 1, 32, 32, 16, 32, 32, 16, -1)],
( 96, 128) : [FmhaFwdTileSize(128, 128, 32, 128, 32, 96, 4, 1, 1, 4, 1, 1, 32, 32, 16, 32, 32, 16, -1)],
(128, 128) : [FmhaFwdTileSize( 16, 32, 64, 128, 32, 128, 1, 1, 1, 1, 1, 1, 16, 16, 32, 16, 16, 32, -1),
FmhaFwdTileSize( 32, 32, 128, 128, 32, 128, 1, 1, 1, 1, 1, 1, 32, 32, 16, 32, 32, 16, -1),
@@ -1014,8 +1024,12 @@ class KernelComponentFactoryGfx9(CompatibilityRuleFactoryGfx9):
["no"],
["f", "t"],
):
pipelines.append(FmhaFwdPipeline("qr_async", "row", "t", "f", "t", "t", logits, bias, "f", "f", qscale, mask, "f", "f", sink)) # fmt: skip
pipelines.append(FmhaFwdPipeline("qr_async", "row", "t", "t", "t", "t", logits, bias, "f", "f", qscale, mask, "f", "f", sink)) # fmt: skip
if hdim == 64:
pipelines.append(FmhaFwdPipeline("qr", "row", "t", "f", "t", "t", logits, bias, "f", "f", qscale, mask, "f", "f", sink)) # fmt: skip
pipelines.append(FmhaFwdPipeline("qr", "row", "t", "t", "t", "t", logits, bias, "f", "f", qscale, mask, "f", "f", sink)) # fmt: skip
else:
pipelines.append(FmhaFwdPipeline("qr_async", "row", "t", "f", "t", "t", logits, bias, "f", "f", qscale, mask, "f", "f", sink)) # fmt: skip
pipelines.append(FmhaFwdPipeline("qr_async", "row", "t", "t", "t", "t", logits, bias, "f", "f", qscale, mask, "f", "f", sink)) # fmt: skip
elif dtype in ["fp8", "fp8fp16", "bf8"]:
# TODO
pass

View File

@@ -1121,6 +1121,20 @@ llvm_amdgcn_raw_buffer_load_i32x2(int32x4_t srsrc,
index_t soffset,
index_t glc_slc) __asm("llvm.amdgcn.raw.buffer.load.v2i32");
// dwordx3 - use union to convert between int32x3 and fp16/bf16 types
union dwordx3_union
{
int32_t as_i32[3];
fp16_t as_fp16[6];
bf16_t as_bf16[6];
};
CK_TILE_DEVICE_EXTERN int32x3_t
llvm_amdgcn_raw_buffer_load_i32x3(int32x4_t srsrc,
index_t voffset,
index_t soffset,
index_t glc_slc) __asm("llvm.amdgcn.raw.buffer.load.v3i32");
CK_TILE_DEVICE_EXTERN int32x4_t
llvm_amdgcn_raw_buffer_load_i32x4(int32x4_t srsrc,
index_t voffset,
@@ -1540,9 +1554,9 @@ CK_TILE_DEVICE thread_buffer<T, N> amd_buffer_load_impl(int32x4_t src_wave_buffe
(std::is_same<T, double>::value && (N == 1 || N == 2 || N == 4 || N == 8)) ||
(std::is_same<T, float>::value && (N == 1 || N == 2 || N == 4 || N == 8 || N == 16)) ||
(std::is_same<T, fp16_t>::value &&
(N == 1 || N == 2 || N == 4 || N == 8 || N == 16 || N == 32)) ||
(N == 1 || N == 2 || N == 4 || N == 6 || N == 8 || N == 16 || N == 32)) ||
(std::is_same<T, bf16_t>::value &&
(N == 1 || N == 2 || N == 4 || N == 8 || N == 16 || N == 32)) ||
(N == 1 || N == 2 || N == 4 || N == 6 || N == 8 || N == 16 || N == 32)) ||
(std::is_same<T, int32_t>::value &&
(N == 1 || N == 2 || N == 4 || N == 8 || N == 16)) ||
(std::is_same<T, fp8_t>::value && (N == 1 || N == 2 || N == 4 || N == 8 || N == 16)) ||
@@ -1659,6 +1673,26 @@ CK_TILE_DEVICE thread_buffer<T, N> amd_buffer_load_impl(int32x4_t src_wave_buffe
src_wave_addr_offset,
static_cast<index_t>(coherence)));
}
else if constexpr(N == 6)
{
// N = 6: load as dwordx3 (12 bytes = 6 fp16), using buffer_load_dwordx3 instruction
int32x3_t tmp_i32x3 =
llvm_amdgcn_raw_buffer_load_i32x3(src_wave_buffer_resource,
src_thread_addr_offset,
src_wave_addr_offset,
static_cast<index_t>(coherence));
// Use union to reinterpret int32x3 as fp16x6
dwordx3_union tmp_union;
tmp_union.as_i32[0] = tmp_i32x3[0];
tmp_union.as_i32[1] = tmp_i32x3[1];
tmp_union.as_i32[2] = tmp_i32x3[2];
thread_buffer<fp16_t, N> result;
static_for<0, N, 1>{}([&](auto i) { result[i] = tmp_union.as_fp16[i]; });
return result;
}
else if constexpr(N == 8)
{
// use fp32 load to mimic fp16 load
@@ -1744,6 +1778,26 @@ CK_TILE_DEVICE thread_buffer<T, N> amd_buffer_load_impl(int32x4_t src_wave_buffe
src_wave_addr_offset,
static_cast<index_t>(coherence)));
}
else if constexpr(N == 6)
{
// N = 6: load as dwordx3 (12 bytes = 6 bf16), using buffer_load_dwordx3 instruction
int32x3_t tmp_i32x3 =
llvm_amdgcn_raw_buffer_load_i32x3(src_wave_buffer_resource,
src_thread_addr_offset,
src_wave_addr_offset,
static_cast<index_t>(coherence));
// Use union to reinterpret int32x3 as bf16x6
dwordx3_union tmp_union;
tmp_union.as_i32[0] = tmp_i32x3[0];
tmp_union.as_i32[1] = tmp_i32x3[1];
tmp_union.as_i32[2] = tmp_i32x3[2];
thread_buffer<bf16_t, N> result;
static_for<0, N, 1>{}([&](auto i) { result[i] = tmp_union.as_bf16[i]; });
return result;
}
else if constexpr(N == 8)
{
int32x4_t tmp = llvm_amdgcn_raw_buffer_load_i32x4(src_wave_buffer_resource,

View File

@@ -989,6 +989,20 @@ llvm_amdgcn_raw_buffer_load_i32x2(int32x4_t srsrc,
index_t soffset,
index_t glc_slc) __asm("llvm.amdgcn.raw.buffer.load.v2i32");
// dwordx3 - use union to convert between int32x3 and fp16/bf16 types
union dwordx3_union
{
int32_t as_i32[3];
fp16_t as_fp16[6];
bf16_t as_bf16[6];
};
CK_TILE_DEVICE_EXTERN int32x3_t
llvm_amdgcn_raw_buffer_load_i32x3(int32x4_t srsrc,
index_t voffset,
index_t soffset,
index_t glc_slc) __asm("llvm.amdgcn.raw.buffer.load.v3i32");
CK_TILE_DEVICE_EXTERN int32x4_t
llvm_amdgcn_raw_buffer_load_i32x4(int32x4_t srsrc,
index_t voffset,
@@ -1408,9 +1422,9 @@ CK_TILE_DEVICE thread_buffer<T, N> amd_buffer_load_impl(int32x4_t src_wave_buffe
(std::is_same<T, double>::value && (N == 1 || N == 2 || N == 4 || N == 8)) ||
(std::is_same<T, float>::value && (N == 1 || N == 2 || N == 4 || N == 8 || N == 16)) ||
(std::is_same<T, fp16_t>::value &&
(N == 1 || N == 2 || N == 4 || N == 8 || N == 16 || N == 32)) ||
(N == 1 || N == 2 || N == 4 || N == 6 || N == 8 || N == 16 || N == 32)) ||
(std::is_same<T, bf16_t>::value &&
(N == 1 || N == 2 || N == 4 || N == 8 || N == 16 || N == 32)) ||
(N == 1 || N == 2 || N == 4 || N == 6 || N == 8 || N == 16 || N == 32)) ||
(std::is_same<T, int32_t>::value &&
(N == 1 || N == 2 || N == 4 || N == 8 || N == 16)) ||
(std::is_same<T, fp8_t>::value && (N == 1 || N == 2 || N == 4 || N == 8 || N == 16)) ||
@@ -1529,6 +1543,26 @@ CK_TILE_DEVICE thread_buffer<T, N> amd_buffer_load_impl(int32x4_t src_wave_buffe
src_wave_addr_offset,
static_cast<index_t>(coherence)));
}
else if constexpr(N == 6)
{
// N = 6: load as dwordx3 (12 bytes = 6 fp16), using buffer_load_dwordx3 instruction
int32x3_t tmp_i32x3 =
llvm_amdgcn_raw_buffer_load_i32x3(src_wave_buffer_resource,
src_thread_addr_offset,
src_wave_addr_offset,
static_cast<index_t>(coherence));
// Use union to reinterpret int32x3 as fp16x6
dwordx3_union tmp_union;
tmp_union.as_i32[0] = tmp_i32x3[0];
tmp_union.as_i32[1] = tmp_i32x3[1];
tmp_union.as_i32[2] = tmp_i32x3[2];
thread_buffer<fp16_t, N> result;
static_for<0, N, 1>{}([&](auto i) { result[i] = tmp_union.as_fp16[i]; });
return result;
}
else
{
// N >= 8: build from fp32x4 chunks
@@ -1571,6 +1605,26 @@ CK_TILE_DEVICE thread_buffer<T, N> amd_buffer_load_impl(int32x4_t src_wave_buffe
src_wave_addr_offset,
static_cast<index_t>(coherence)));
}
else if constexpr(N == 6)
{
// N = 6: load as dwordx3 (12 bytes = 6 bf16), using buffer_load_dwordx3 instruction
int32x3_t tmp_i32x3 =
llvm_amdgcn_raw_buffer_load_i32x3(src_wave_buffer_resource,
src_thread_addr_offset,
src_wave_addr_offset,
static_cast<index_t>(coherence));
// Use union to reinterpret int32x3 as bf16x6
dwordx3_union tmp_union;
tmp_union.as_i32[0] = tmp_i32x3[0];
tmp_union.as_i32[1] = tmp_i32x3[1];
tmp_union.as_i32[2] = tmp_i32x3[2];
thread_buffer<bf16_t, N> result;
static_for<0, N, 1>{}([&](auto i) { result[i] = tmp_union.as_bf16[i]; });
return result;
}
else
{
// N >= 8: build from fp32x4 chunks

View File

@@ -152,6 +152,7 @@ using bf16x64_t = bfloat16_t __attribute__((ext_vector_type(64)));
// i32
// using int32_t = ...
using int32x2_t = int32_t __attribute__((ext_vector_type(2)));
using int32x3_t = int32_t __attribute__((ext_vector_type(3)));
using int32x4_t = int32_t __attribute__((ext_vector_type(4)));
using int32x8_t = int32_t __attribute__((ext_vector_type(8)));
using int32x16_t = int32_t __attribute__((ext_vector_type(16)));

View File

@@ -12,6 +12,8 @@ static CK_TILE_HOST_DEVICE constexpr index_t ceil_to_qualified_tile_length()
{
if constexpr(Headdim == 48)
return 48;
else if constexpr(Headdim == 80)
return 96;
else if constexpr(Headdim == 96)
return 128;
else if constexpr(Headdim == 160)

View File

@@ -25,7 +25,7 @@ GPU_TARGETS="gfx908;gfx90a;gfx942"
if [ $# -ge 1 ]; then
case "$1" in
gfx*)
GPU_TARGETS=$1
GPU_TARGETS="$1"
shift 1
echo "GPU targets provided: $GPU_TARGETS"
REST_ARGS=("$@")
@@ -44,8 +44,8 @@ cmake
-D CMAKE_CXX_FLAGS="-ftemplate-backtrace-limit=0 -fPIE -Wno-gnu-line-marker -fbracket-depth=512" \
-D CMAKE_BUILD_TYPE=Release \
-D BUILD_DEV=ON \
-D GPU_TARGETS=$GPU_TARGETS \
-D GPU_TARGETS="$GPU_TARGETS" \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
-D USE_BITINT_EXTENSION_INT4=OFF \
"${REST_ARGS[@]}" \ \
${MY_PROJECT_SOURCE}
"${REST_ARGS[@]}" \
"${MY_PROJECT_SOURCE}"

View File

@@ -0,0 +1,119 @@
#!/bin/bash
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
# Continuous monitoring script for sccache during builds
# Usage: ./monitor_sccache_during_build.sh [log_prefix] &
LOG_PREFIX=${1:-"sccache_monitor"}
# Include stage name in log filename if available
STAGE_SUFFIX=""
if [ -n "${STAGE_NAME}" ]; then
# Convert stage name to filename-safe format (replace spaces and special chars with underscores)
STAGE_SAFE=$(echo "${STAGE_NAME}" | sed 's/[^a-zA-Z0-9]/_/g' | sed 's/__*/_/g' | sed 's/^_\|_$//g')
STAGE_SUFFIX="_${STAGE_SAFE}"
fi
MONITOR_LOG="logs/${LOG_PREFIX}_$(date +%Y%m%d_%H%M%S)${STAGE_SUFFIX}.log"
MONITOR_INTERVAL=30 # seconds
echo "Starting sccache monitoring - logging to $MONITOR_LOG"
echo "Monitor interval: $MONITOR_INTERVAL seconds"
# Function to log with timestamp
log_with_timestamp() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$MONITOR_LOG"
}
# Function to get sccache stats safely
get_sccache_stats() {
if command -v sccache &> /dev/null; then
sccache --show-stats 2>/dev/null || echo "sccache stats unavailable"
else
echo "sccache command not found"
fi
}
# Function to check if sccache server is running
is_sccache_running() {
if command -v sccache &> /dev/null; then
sccache --show-stats &> /dev/null
return $?
else
return 1
fi
}
# Function to test Redis connectivity
test_redis_connectivity() {
# Use SCCACHE_REDIS if set, otherwise construct from CK_SCCACHE
local REDIS_URL=""
if [ -n "${SCCACHE_REDIS}" ]; then
REDIS_URL="${SCCACHE_REDIS}"
elif [ -n "${CK_SCCACHE}" ]; then
REDIS_URL="redis://${CK_SCCACHE}"
fi
if [ -n "${REDIS_URL}" ]; then
local start_time=$(date +%s%N)
local response=$(timeout 5 redis-cli -u "${REDIS_URL}" ping 2>&1) || response="TIMEOUT"
local end_time=$(date +%s%N)
local latency=$(( (end_time - start_time) / 1000000 ))
echo "Redis: $response (${latency}ms)"
else
echo "Redis: No Redis URL available"
fi
}
# Gets the last sccache stats before exiting
cleanup() {
log_with_timestamp "=== FINAL SCCACHE STATS EXIT ==="
log_with_timestamp "$(get_sccache_stats)"
echo "=== CONTINUOUS MONITORING STOPPED ==="
# List monitoring logs
echo "=== MONITORING LOGS ==="
ls -la logs/*monitor*.log 2>/dev/null || echo "No monitoring logs found"
}
trap cleanup EXIT
log_with_timestamp "=== SCCACHE MONITORING STARTED ==="
log_with_timestamp "PID: $$"
log_with_timestamp "Node: ${NODE_NAME:-$(hostname)}"
log_with_timestamp "Stage: ${STAGE_NAME:-unknown}"
log_with_timestamp "WORKSPACE_PATH: ${WORKSPACE:-not set}"
log_with_timestamp "SCCACHE_C_CUSTOM_CACHE_BUSTER: ${SCCACHE_C_CUSTOM_CACHE_BUSTER:-not set}"
log_with_timestamp "CK_SCCACHE: ${CK_SCCACHE:-not set}"
# Initial state
log_with_timestamp "=== INITIAL STATE ==="
# Reset sscache stats
sccache --zero-stats
log_with_timestamp "$(get_sccache_stats) $(test_redis_connectivity)"
# Monitor loop
while true; do
sleep $MONITOR_INTERVAL
# Check if sccache server is still running
if ! is_sccache_running; then
log_with_timestamp "WARNING: sccache server not running!"
fi
# Get current stats
current_stats=$(get_sccache_stats)
redis_status=$(test_redis_connectivity)
# Log current cache hit information
log_with_timestamp "$(get_sccache_stats) $(test_redis_connectivity)"
# Check for Redis latency issues
if echo "$redis_status" | grep -E "[0-9]{3,}" > /dev/null; then # >100ms latency
log_with_timestamp "HIGH REDIS LATENCY detected"
fi
# Check for Redis connection failures
if echo "$redis_status" | grep -E "(TIMEOUT|Connection refused|No route)" > /dev/null; then
log_with_timestamp "REDIS CONNECTION FAILURE detected"
fi
done