From 18d37379d264f5b08e409ca3b99b3fd4c24f67cc Mon Sep 17 00:00:00 2001 From: Qinghua Zhou Date: Sat, 16 May 2026 23:23:30 +0000 Subject: [PATCH] Tighten NVML IPC domain hash lookup Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/core/utils_internal.cc | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/src/core/utils_internal.cc b/src/core/utils_internal.cc index 2e620b66..adbf8e5b 100644 --- a/src/core/utils_internal.cc +++ b/src/core/utils_internal.cc @@ -208,30 +208,18 @@ uint64_t getFabricHash(const nvmlGpuFabricInfo_t& fabricInfo) { bool tryGetNvmlIpcDomainHash(uint64_t& ipcDomainHash) { // Use the current CUDA device; callers must set the rank's device before querying. int deviceId; - if (cudaGetDevice(&deviceId) != cudaSuccess) { - return false; - } - char pciBusId[] = "00000000:00:00.0"; - if (cudaDeviceGetPCIBusId(pciBusId, sizeof(pciBusId), deviceId) != cudaSuccess) { + if (cudaGetDevice(&deviceId) != cudaSuccess || + cudaDeviceGetPCIBusId(pciBusId, sizeof(pciBusId), deviceId) != cudaSuccess) { return false; } static NvmlState nvml; - if (!nvml.isInitialized()) { - return false; - } - nvmlDevice_t nvmlDevice; - if (nvmlDeviceGetHandleByPciBusId_v2(pciBusId, &nvmlDevice) != NVML_SUCCESS) { - return false; - } - nvmlGpuFabricInfo_t fabricInfo = {}; - if (nvmlDeviceGetGpuFabricInfo(nvmlDevice, &fabricInfo) != NVML_SUCCESS) { - return false; - } - if (fabricInfo.state != NVML_GPU_FABRIC_STATE_COMPLETED || fabricInfo.status != NVML_SUCCESS) { + if (!nvml.isInitialized() || nvmlDeviceGetHandleByPciBusId_v2(pciBusId, &nvmlDevice) != NVML_SUCCESS || + nvmlDeviceGetGpuFabricInfo(nvmlDevice, &fabricInfo) != NVML_SUCCESS || + fabricInfo.state != NVML_GPU_FABRIC_STATE_COMPLETED || fabricInfo.status != NVML_SUCCESS) { return false; }