550.67

2026-02-02 22:47:25 +00:00 · 2024-03-19 16:56:28 +01:00
parent 12933b2d3c
commit 3bf16b890c
78 changed files with 1400 additions and 590 deletions
--- a/src/nvidia/Makefile
+++ b/src/nvidia/Makefile
@@ -90,6 +90,7 @@ ifeq ($(TARGET_ARCH),aarch64)
  CFLAGS += -mgeneral-regs-only
  CFLAGS += -march=armv8-a
  CFLAGS += -mstrict-align
+  CFLAGS += -ffixed-x18
  CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics)
 endif

--- a/src/nvidia/arch/nvalloc/unix/src/os-hypervisor.c
+++ b/src/nvidia/arch/nvalloc/unix/src/os-hypervisor.c
@@ -74,7 +74,7 @@ NV_STATUS hypervisorInjectInterrupt_IMPL
    NV_STATUS status = NV_ERR_NOT_SUPPORTED;

    if (pVgpuNsIntr->pVgpuVfioRef)
-        status = osVgpuInjectInterrupt(pVgpuNsIntr->pVgpuVfioRef);
+        return NV_ERR_NOT_SUPPORTED;
    else
    {
        if (pVgpuNsIntr->guestMSIAddr && pVgpuNsIntr->guestMSIData)
@@ -142,14 +142,22 @@ static NV_STATUS get_available_instances(

                swizzIdInUseMask = kmigmgrGetSwizzIdInUseMask(pGpu, pKernelMIGManager);

+                if (!vgpuTypeInfo->gpuInstanceSize)
+                {
+                    // Query for a non MIG vgpuType
+                    NV_PRINTF(LEVEL_INFO, "%s Query for a non MIG vGPU type \n",
+                              __FUNCTION__);
+                    rmStatus = NV_OK;
+                    goto exit;
+                }
+
                rmStatus = kvgpumgrGetPartitionFlag(vgpuTypeInfo->vgpuTypeId,
                                                   &partitionFlag);
                if (rmStatus != NV_OK)
                {
                    // Query for a non MIG vgpuType
-                    NV_PRINTF(LEVEL_ERROR, "%s Query for a non MIG vGPU type \n",
+                    NV_PRINTF(LEVEL_ERROR, "%s failed to get partition flags.\n",
                              __FUNCTION__);
-                    rmStatus = NV_OK;
                    goto exit;
                }

@@ -192,7 +200,7 @@ static NV_STATUS get_available_instances(
                if (vgpuTypeInfo->gpuInstanceSize)
                {
                    // Query for a MIG vgpuType
-                    NV_PRINTF(LEVEL_ERROR, "%s Query for a MIG vGPU type \n",
+                    NV_PRINTF(LEVEL_INFO, "%s Query for a MIG vGPU type \n",
                              __FUNCTION__);
                    rmStatus = NV_OK;
                    goto exit;
--- a/src/nvidia/generated/g_gpu_nvoc.h
+++ b/src/nvidia/generated/g_gpu_nvoc.h
@@ -1255,6 +1255,7 @@ struct OBJGPU {
    TMR_EVENT *pVideoTimerEvent;
    NVENC_SESSION_LIST nvencSessionList;
    NvU32 encSessionStatsReportingState;
+    NvBool bNvEncSessionDataProcessingWorkItemPending;
    NVFBC_SESSION_LIST nvfbcSessionList;
    struct OBJVASPACE *pFabricVAS;
    NvBool bPipelinedPteMemEnabled;
--- a/src/nvidia/generated/g_nv_name_released.h
+++ b/src/nvidia/generated/g_nv_name_released.h
@@ -1014,6 +1014,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x2702, 0x0000, 0x0000, "NVIDIA GeForce RTX 4080 SUPER" },
    { 0x2704, 0x0000, 0x0000, "NVIDIA GeForce RTX 4080" },
    { 0x2705, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 Ti SUPER" },
+    { 0x2709, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070" },
    { 0x2717, 0x0000, 0x0000, "NVIDIA GeForce RTX 4090 Laptop GPU" },
    { 0x2730, 0x0000, 0x0000, "NVIDIA RTX 5000 Ada Generation Laptop GPU" },
    { 0x2757, 0x0000, 0x0000, "NVIDIA GeForce RTX 4090 Laptop GPU" },
@@ -1021,6 +1022,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x2782, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 Ti" },
    { 0x2783, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 SUPER" },
    { 0x2786, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070" },
+    { 0x2788, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Ti" },
    { 0x27A0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4080 Laptop GPU" },
    { 0x27B0, 0x16fa, 0x1028, "NVIDIA RTX 4000 SFF Ada Generation" },
    { 0x27B0, 0x16fa, 0x103c, "NVIDIA RTX 4000 SFF Ada Generation" },
@@ -1043,6 +1045,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x27FB, 0x0000, 0x0000, "NVIDIA RTX 3500 Ada Generation Embedded GPU" },
    { 0x2803, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Ti" },
    { 0x2805, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Ti" },
+    { 0x2808, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060" },
    { 0x2820, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 Laptop GPU" },
    { 0x2838, 0x0000, 0x0000, "NVIDIA RTX 3000 Ada Generation Laptop GPU" },
    { 0x2860, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 Laptop GPU" },
--- a/src/nvidia/generated/g_spdm_nvoc.h
+++ b/src/nvidia/generated/g_spdm_nvoc.h
@@ -7,7 +7,7 @@ extern "C" {
 #endif

 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -108,6 +108,9 @@ struct Spdm {
    NvU32 sessionMsgCount;
    PTMR_EVENT pHeartbeatEvent;
    NvU32 heartbeatPeriodSec;
+    NvU8 *pTransportBuffer;
+    NvU32 transportBufferSize;
+    NvU32 pendingResponseSize;
 };

 #ifndef __NVOC_CLASS_Spdm_TYPEDEF__
--- a/src/nvidia/generated/g_vgpuconfigapi_nvoc.h
+++ b/src/nvidia/generated/g_vgpuconfigapi_nvoc.h
@@ -7,7 +7,7 @@ extern "C" {
 #endif

 /*
- * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
--- a/src/nvidia/inc/kernel/gpu/gsp/message_queue_priv.h
+++ b/src/nvidia/inc/kernel/gpu/gsp/message_queue_priv.h
@@ -103,4 +103,24 @@ typedef struct MESSAGE_QUEUE_COLLECTION
 #define GSP_MSG_QUEUE_HEADER_SIZE                                   RM_PAGE_SIZE
 #define GSP_MSG_QUEUE_HEADER_ALIGN                                             4   // 2 ^ 4 = 16

+/*!
+ * Calculate 32-bit checksum
+ *
+ * This routine assumes that the data is padded out with zeros to the next
+ * 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
+ */
+static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
+{
+    NvU64 *p        = (NvU64 *)pData;
+    NvU64 *pEnd     = (NvU64 *)((NvUPtr)pData + uLen);
+    NvU64  checkSum = 0;
+
+    NV_ASSERT_CHECKED(uLen > 0);
+
+    while (p < pEnd)
+        checkSum ^= *p++;
+
+    return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
+}
+
 #endif // _MESSAGE_QUEUE_PRIV_H_
--- a/src/nvidia/src/kernel/gpu/bif/arch/maxwell/kernel_bif_gm107.c
+++ b/src/nvidia/src/kernel/gpu/bif/arch/maxwell/kernel_bif_gm107.c
@@ -585,6 +585,13 @@ kbifRestorePcieConfigRegisters_GM107
    NvU64     timeStampStart;
    NvU64     timeStampEnd;

+    if (pKernelBif->xveRegmapRef[0].bufBootConfigSpace == NULL)
+    {
+        NV_PRINTF(LEVEL_ERROR, "Config space buffer is NULL!\n");
+        NV_ASSERT(0);
+        return NV_ERR_OBJECT_NOT_FOUND;
+    }
+
    // Restore pcie config space for function 0
    status = _kbifRestorePcieConfigRegisters_GM107(pGpu, pKernelBif,
                                                   &pKernelBif->xveRegmapRef[0]);
--- a/src/nvidia/src/kernel/gpu/device.c
+++ b/src/nvidia/src/kernel/gpu/device.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
--- a/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c
+++ b/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c
@@ -259,32 +259,50 @@ kfspPollForQueueEmpty_IMPL
    KernelFsp *pKernelFsp
 )
 {
+    NV_STATUS status = NV_OK;
    RMTIMEOUT timeout;

-    gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_OSTIMER | GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
+    gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout,
+        GPU_TIMEOUT_FLAGS_OSTIMER |
+        GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);

    while (!kfspIsQueueEmpty(pGpu, pKernelFsp))
    {
        //
-        // For now we assume that any response from FSP before RM message send is complete
-        // indicates an error and we should abort.
+        // For now we assume that any response from FSP before RM message
+        // send is complete indicates an error and we should abort.
+        //
+        // Ongoing dicussion on usefullness of this check. Bug to be filed.
        //
        if (!kfspIsMsgQueueEmpty(pGpu, pKernelFsp))
        {
            kfspReadMessage(pGpu, pKernelFsp, NULL, 0);
-            NV_PRINTF(LEVEL_ERROR, "Received error message from FSP while waiting for CMDQ to be empty.\n");
-            return NV_ERR_GENERIC;
+            NV_PRINTF(LEVEL_ERROR,
+                "Received error message from FSP while waiting for CMDQ to be empty.\n");
+            status = NV_ERR_GENERIC;
+            break;
        }

-        if (gpuCheckTimeout(pGpu, &timeout) == NV_ERR_TIMEOUT)
-        {
-            NV_PRINTF(LEVEL_ERROR, "Timed out waiting for FSP command queue to be empty.\n");
-            return NV_ERR_TIMEOUT;
-        }
        osSpinLoop();
+
+        status = gpuCheckTimeout(pGpu, &timeout);
+        if (status != NV_OK)
+        {
+            if ((status == NV_ERR_TIMEOUT) &&
+                kfspIsQueueEmpty(pGpu, pKernelFsp))
+            {
+                status = NV_OK;
+            }
+            else
+            {
+                NV_PRINTF(LEVEL_ERROR,
+                    "Timed out waiting for FSP command queue to be empty.\n");
+            }
+            break;
+        }
    }

-    return NV_OK;
+    return status;
 }

 /*!
--- a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c
+++ b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c
@@ -846,6 +846,14 @@ _kgspRpcEventIsGpuDegradedCallback
    OBJRPC  *pRpc
 )
 {
+    RPC_PARAMS(nvlink_is_gpu_degraded, _v17_00);
+    KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu);
+    NV2080_CTRL_NVLINK_IS_GPU_DEGRADED_PARAMS_v17_00 *dest = &rpc_params->params;
+
+    if(dest->bIsGpuDegraded)
+    {
+        knvlinkSetDegradedMode(pGpu, pKernelNvlink, dest->linkId);
+    }
 }

 static void
--- a/src/nvidia/src/kernel/gpu/gsp/message_queue_cpu.c
+++ b/src/nvidia/src/kernel/gpu/gsp/message_queue_cpu.c
@@ -476,24 +476,6 @@ void GspMsgQueuesCleanup(MESSAGE_QUEUE_COLLECTION **ppMQCollection)
    *ppMQCollection = NULL;
 }

-/*!
- * Calculate 32-bit checksum
- *
- * This routine assumes that the data is padded out with zeros to the next
- * 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
- */
-static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
-{
-    NvU64 *p        = (NvU64 *)pData;
-    NvU64 *pEnd     = (NvU64 *)((NvUPtr)pData + uLen);
-    NvU64  checkSum = 0;
-
-    while (p < pEnd)
-        checkSum ^= *p++;
-
-    return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
-}
-
 /*!
 * GspMsgQueueSendCommand
 *
@@ -532,7 +514,7 @@ NV_STATUS GspMsgQueueSendCommand(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)

    pCQE->seqNum    = pMQI->txSeqNum;
    pCQE->elemCount = GSP_MSG_QUEUE_BYTES_TO_ELEMENTS(uElementSize);
-    pCQE->checkSum  = 0;
+    pCQE->checkSum  = 0; // The checkSum field is included in the checksum calculation, so zero it.

    if (gpuIsCCFeatureEnabled(pGpu))
    {
@@ -666,7 +648,8 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
    NvU32       nRetries;
    NvU32       nMaxRetries  = 3;
    NvU32       nElements    = 1;  // Assume record fits in one queue element for now.
-    NvU32       uElementSize = 0;
+    NvU32       uElementSize;
+    NvU32       checkSum;
    NvU32       seqMismatchDiff = NV_U32_MAX;
    NV_STATUS   nvStatus     = NV_OK;

@@ -717,15 +700,23 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
        // Retry if checksum fails.
        if (gpuIsCCFeatureEnabled(pGpu))
        {
-            // In Confidential Compute scenario, checksum includes complete element range.
-            if (_checkSum32(pMQI->pCmdQueueElement, (nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN)) != 0)
-            {
-                NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
-                nvStatus = NV_ERR_INVALID_DATA;
-                continue;
-            }
+            //
+            // In the Confidential Compute scenario, the actual message length
+            // is inside the encrypted payload, and we can't access it before
+            // decryption, therefore the checksum encompasses the whole element
+            // range. This makes checksum verification significantly slower
+            // because messages are typically much smaller than element size.
+            //
+            checkSum = _checkSum32(pMQI->pCmdQueueElement,
+                                   (nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN));
        } else
-        if (_checkSum32(pMQI->pCmdQueueElement, uElementSize) != 0)
+        {
+            checkSum = _checkSum32(pMQI->pCmdQueueElement,
+                                   (GSP_MSG_QUEUE_ELEMENT_HDR_SIZE +
+                                    pMQI->pCmdQueueElement->rpc.length));
+        }
+
+        if (checkSum != 0)
        {
            NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
            nvStatus = NV_ERR_INVALID_DATA;
--- a/src/nvidia/src/kernel/gpu/mem_sys/kern_mem_sys.c
+++ b/src/nvidia/src/kernel/gpu/mem_sys/kern_mem_sys.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
--- a/src/nvidia/src/kernel/gpu/mem_sys/kern_mem_sys_ctrl.c
+++ b/src/nvidia/src/kernel/gpu/mem_sys/kern_mem_sys_ctrl.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -475,11 +475,14 @@ _kmemsysGetFbInfos
                        // It will be zero unless VGA display memory is reserved
                        if (pKernelMemorySystem->fbOverrideStartKb != 0)
                        {
+                            status = NV_OK;
                            data = NvU64_LO32(pKernelMemorySystem->fbOverrideStartKb);
-                            NV_ASSERT(((NvU64) data << 10ULL) == pKernelMemorySystem->fbOverrideStartKb);
+                            NV_ASSERT_OR_ELSE((NvU64) data == pKernelMemorySystem->fbOverrideStartKb,
+                                              status = NV_ERR_INVALID_DATA);
+                            
                        }
-					    else
-				    	{
+                        else
+                        {
                            //
                            // Returns start of heap in kbytes. This is zero unless
                            // VGA display memory is reserved.
--- a/src/nvidia/src/kernel/gpu/nvenc/nvencsession.c
+++ b/src/nvidia/src/kernel/gpu/nvenc/nvencsession.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2012-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2012-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -159,6 +159,7 @@ nvencsessionConstruct_IMPL
        (listCount(&(pGpu->nvencSessionList)) == 1))
    {
        // Register 1Hz timer callback for this GPU.
+        pGpu->bNvEncSessionDataProcessingWorkItemPending = NV_FALSE;
        status = osSchedule1HzCallback(pGpu,
                                       _gpuNvEncSessionDataProcessingCallback,
                                       NULL,
@@ -379,8 +380,7 @@ _gpuNvEncSessionProcessBuffer(POBJGPU pGpu, NvencSession *pNvencSession)
    portMemFree(pLocalSessionInfoBuffer);
 }

-static void
-_gpuNvEncSessionDataProcessingCallback(POBJGPU pGpu, void *data)
+static void _gpuNvEncSessionDataProcessing(OBJGPU *pGpu)
 {
    PNVENC_SESSION_LIST_ITEM  pNvencSessionListItem;
    PNVENC_SESSION_LIST_ITEM  pNvencSessionListItemNext;
@@ -416,3 +416,46 @@ _gpuNvEncSessionDataProcessingCallback(POBJGPU pGpu, void *data)
        }
    }
 }
+
+static void _gpuNvEncSessionDataProcessingWorkItem(NvU32 gpuInstance, void *pArgs)
+{
+    OBJGPU *pGpu;
+
+    pGpu = gpumgrGetGpu(gpuInstance);
+    if (pGpu == NULL)
+    {
+        NV_PRINTF(LEVEL_ERROR, "NVENC Sessions GPU instance is invalid\n");
+        return;
+    }
+
+    _gpuNvEncSessionDataProcessing(pGpu);
+    pGpu->bNvEncSessionDataProcessingWorkItemPending = NV_FALSE;
+}
+
+static void
+_gpuNvEncSessionDataProcessingCallback(POBJGPU pGpu, void *data)
+{
+    NV_STATUS   status;
+
+    if (!pGpu->bNvEncSessionDataProcessingWorkItemPending)
+    {
+        status = osQueueWorkItemWithFlags(pGpu,
+                                          _gpuNvEncSessionDataProcessingWorkItem,
+                                          NULL,
+                                          OS_QUEUE_WORKITEM_FLAGS_LOCK_SEMA
+                                          | OS_QUEUE_WORKITEM_FLAGS_LOCK_GPU_GROUP_DEVICE_RW);
+        if (status != NV_OK)
+        {
+            NV_PRINTF(LEVEL_ERROR,
+                      "NVENC session queuing async callback failed, status=%x\n",
+                      status);
+
+            // Call directly to do NVENC session data processing
+            _gpuNvEncSessionDataProcessing(pGpu);
+        }
+        else
+        {
+            pGpu->bNvEncSessionDataProcessingWorkItemPending = NV_TRUE;
+        }
+    }
+}
--- a/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlinkcorelibtrain.c
+++ b/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlinkcorelibtrain.c
@@ -1034,6 +1034,7 @@ knvlinkCoreShutdownDeviceLinks_IMPL
    OBJSYS      *pSys  = SYS_GET_INSTANCE();
    NvU32        count = 0;
    NvU32        linkId;
+    NvlStatus    status = NV_OK;

    // Skip link shutdown where fabric manager is present, for nvlink version bellow 4.0
    if ((pKernelNvlink->ipVerNvlink < NVLINK_VERSION_40 &&
@@ -1096,13 +1097,23 @@ knvlinkCoreShutdownDeviceLinks_IMPL
    // Trigger laneshutdown through core lib if shutdown is supported
    if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED) && (count > 0))
    {
-        if (nvlink_lib_powerdown_links_from_active_to_off(
-                        pLinks, count, NVLINK_STATE_CHANGE_SYNC))
+        status = nvlink_lib_powerdown_links_from_active_to_off(
+                        pLinks, count, NVLINK_STATE_CHANGE_SYNC);
+        if (status != NVL_SUCCESS)
        {
-            NV_PRINTF(LEVEL_ERROR, "Unable to turn off links for the GPU%d\n",
+            if (status == NVL_NOT_FOUND)
+            {
+                // Bug 4419022
+                NV_PRINTF(LEVEL_ERROR, "Need to shutdown all links unilaterally for GPU%d\n",
+                      pGpu->gpuInstance);
+            }
+            else
+            {
+                NV_PRINTF(LEVEL_ERROR, "Unable to turn off links for the GPU%d\n",
                      pGpu->gpuInstance);

-            return NV_ERR_INVALID_STATE;
+                return NV_ERR_INVALID_STATE;
+            }
        }
    }

--- a/src/nvidia/src/kernel/gpu/spdm/arch/hopper/spdm_gh100.c
+++ b/src/nvidia/src/kernel/gpu/spdm/arch/hopper/spdm_gh100.c
@@ -51,6 +51,14 @@
 // Regardless of whether Requester is configured to support these,
 // we only expect Responder to provide these capabilities.
 //
+
+//
+// TODO: SPDM_CAPABILITIES_FLAGS_GH100 and g_SpdmAlgoCheckTable_GH100 is expected capabilities flags
+//       and attributions what GH100 receive from responder. Currently, we have only 1 responder
+//       and return fixed capabilities flags and attributions.
+//       If we want to support different return capabilitis and attributions afterwards, we need
+//       to refactor spdmCheckConnection_GH100().
+//
 #define SPDM_CAPABILITIES_FLAGS_GH100 \
        SPDM_GET_CAPABILITIES_RESPONSE_FLAGS_CERT_CAP       | \
        SPDM_GET_CAPABILITIES_RESPONSE_FLAGS_MEAS_CAP_SIG   | \
@@ -64,21 +72,6 @@
        SPDM_GET_CAPABILITIES_RESPONSE_FLAGS_HBEAT_CAP;

 /* ------------------------ Static Variables ------------------------------- */
-//
-// For transport functionality, we require access to the GPU and Spdm objects,
-// as well as additional state (temporary response buffer).
-//
-// However, libspdm transport layer is implemented via callbacks which currently
-// do not support passing any custom parameters, meaning we must use static variables
-// to access these objects. If we ever require multiple instances of the Spdm object,
-// this will be an issue.
-//
-static OBJGPU *g_pGpu                = NULL;
-static Spdm   *g_pSpdm               = NULL;
-static NvU8   *g_pTransportBuffer    = NULL;
-static NvU32   g_transportBufferSize = 0;
-static NvU32   g_pendingResponseSize = 0;
-
 static SPDM_ALGO_CHECK_ENTRY g_SpdmAlgoCheckTable_GH100[] =
 {
    { LIBSPDM_DATA_MEASUREMENT_SPEC,       SPDM_MEASUREMENT_SPECIFICATION_DMTF },
@@ -127,7 +120,6 @@ static libspdm_return_t _spdmSendMessageGsp(void *spdm_context, size_t message_s
 static libspdm_return_t _spdmReceiveMessageGsp(void *spdm_context, size_t *message_size,
                                               void **message, uint64_t timeout);

-
 /* ------------------------ Static Functions ------------------------------- */
 //
 // Hardcoding check for libspdm secured message callbacks version.
@@ -311,6 +303,8 @@ _spdmEncodeMessageGsp
    void                                *pSecuredMessageContext = NULL;
    NV_SPDM_DESC_HEADER                 *pNvSpdmDescHdr         = NULL;
    NvU32                                payloadSize            = 0;
+    Spdm                                *pSpdm                  = NULL;
+    size_t                               dataSize               = sizeof(void *);

    // Check libspdm parameters.
    if (spdm_context == NULL || message == NULL || message_size == 0 ||
@@ -332,6 +326,21 @@ _spdmEncodeMessageGsp
        return LIBSPDM_STATUS_INVALID_MSG_FIELD;
    }

+    status = libspdm_get_data(spdm_context, LIBSPDM_DATA_APP_CONTEXT_DATA,
+                              NULL, (void *)&pSpdm, &dataSize);
+
+    if (status != LIBSPDM_STATUS_SUCCESS)
+    {
+        NV_PRINTF(LEVEL_ERROR, ", spdmStatus != LIBSPDM_STATUS_SUCCESS \n ");
+        return status;
+    }
+
+    if (pSpdm == NULL)
+    {
+        NV_PRINTF(LEVEL_ERROR, " pSpdm == NULL, SPDM context probably corrupted !! \n ");
+        return LIBSPDM_STATUS_INVALID_STATE_LOCAL;
+    }
+
    // Initialize descriptor header.
    pNvSpdmDescHdr = (NV_SPDM_DESC_HEADER *)*transport_message;
    portMemSet(pNvSpdmDescHdr, 0, sizeof(NV_SPDM_DESC_HEADER));
@@ -401,7 +410,7 @@ _spdmEncodeMessageGsp
    }

    // Check final encrypted message size.
-    if (*transport_message_size > g_pSpdm->payloadBufferSize)
+    if (*transport_message_size > pSpdm->payloadBufferSize)
    {
        return LIBSPDM_STATUS_BUFFER_TOO_SMALL;
    }
@@ -432,6 +441,8 @@ _spdmDecodeMessageGsp
    void                                  *pSecuredMessageContext = NULL;
    libspdm_return_t                       status                 = LIBSPDM_STATUS_SUCCESS;
    spdm_secured_message_a_data_header1_t *pSpdmSecuredMsgHdr     = NULL;
+    Spdm                                  *pSpdm                  = NULL;
+    size_t                                 dataSize               = sizeof(void *);

    // Check libspdm parameters.
    if (spdm_context == NULL || session_id == NULL || is_app_message == NULL ||
@@ -447,10 +458,25 @@ _spdmDecodeMessageGsp
        return LIBSPDM_STATUS_INVALID_PARAMETER;
    }

+    status = libspdm_get_data(spdm_context, LIBSPDM_DATA_APP_CONTEXT_DATA,
+                              NULL, (void *)&pSpdm, &dataSize);
+
+    if (status != LIBSPDM_STATUS_SUCCESS)
+    {
+        NV_PRINTF(LEVEL_ERROR, " spdmStatus != LIBSPDM_STATUS_SUCCESS \n ");
+        return status;
+    }
+
+    if (pSpdm == NULL)
+    {
+        NV_PRINTF(LEVEL_ERROR, " pSpdm == NULL, SPDM context probably corrupted !! \n ");
+        return LIBSPDM_STATUS_INVALID_STATE_LOCAL;
+    }
+
    // Retrieve NV-header from message, and perform basic validation.
    pNvSpdmDescHdr = (NV_SPDM_DESC_HEADER *)transport_message;
    if (transport_message_size < sizeof(NV_SPDM_DESC_HEADER) ||
-        transport_message_size > g_pSpdm->payloadBufferSize)
+        transport_message_size > pSpdm->payloadBufferSize)
    {
        return LIBSPDM_STATUS_INVALID_MSG_FIELD;
    }
@@ -566,11 +592,11 @@ _spdmSendMessageGsp
    uint64_t    timeout
 )
 {
-    NV_STATUS        nvStatus   = NV_OK;
-    libspdm_return_t spdmStatus = LIBSPDM_STATUS_SUCCESS;
-
-    // Ensure size is cleared to indicate no response pending in buffer yet
-    g_pendingResponseSize = 0;
+    NV_STATUS                   nvStatus   = NV_OK;
+    libspdm_return_t            spdmStatus = LIBSPDM_STATUS_SUCCESS;
+    Spdm                       *pSpdm      = NULL;
+    OBJGPU                     *pGpu       = NULL;
+    size_t                      dataSize   = sizeof(void *);

    // Check libspdm parameters.
    if (message_size == 0 || message == NULL)
@@ -578,23 +604,44 @@ _spdmSendMessageGsp
        return LIBSPDM_STATUS_INVALID_PARAMETER;
    }

-    if (g_pGpu == NULL || g_pSpdm == NULL)
+    spdmStatus = libspdm_get_data(spdm_context, LIBSPDM_DATA_APP_CONTEXT_DATA,
+                              NULL, (void *)&pSpdm, &dataSize);
+
+    if (spdmStatus != LIBSPDM_STATUS_SUCCESS)
    {
+        NV_PRINTF(LEVEL_ERROR,"  spdmStatus != LIBSPDM_STATUS_SUCCESS \n ");
+        return spdmStatus;
+    }
+
+    if (pSpdm == NULL)
+    {
+        NV_PRINTF(LEVEL_ERROR, " pSpdm == NULL, SPDM context probably corrupted !! \n ");
        return LIBSPDM_STATUS_INVALID_STATE_LOCAL;
    }

-    if (g_transportBufferSize < message_size)
+    pGpu = ENG_GET_GPU(pSpdm);
+
+    if (pGpu == NULL)
+    {
+        NV_PRINTF(LEVEL_ERROR, " pGpu == NULL, SPDM context probably corrupted !! \n ");
+        return LIBSPDM_STATUS_INVALID_STATE_LOCAL;
+    }
+
+    // Ensure size is cleared to indicate no response pending in buffer yet
+    pSpdm->pendingResponseSize = 0;
+
+    if (pSpdm->transportBufferSize < message_size)
    {
        return LIBSPDM_STATUS_BUFFER_TOO_SMALL;
    }

    // Fill transport buffer with message and send
-    g_pendingResponseSize = g_transportBufferSize;
-    portMemCopy(g_pTransportBuffer, g_transportBufferSize, message, message_size);
+    pSpdm->pendingResponseSize = pSpdm->transportBufferSize;
+    portMemCopy(pSpdm->pTransportBuffer, pSpdm->transportBufferSize, message, message_size);

-    nvStatus = spdmMessageProcess_HAL(g_pGpu, g_pSpdm,
-                                      g_pTransportBuffer, message_size,
-                                      g_pTransportBuffer, &g_pendingResponseSize);
+    nvStatus = spdmMessageProcess_HAL(pGpu, pSpdm,
+                                      pSpdm->pTransportBuffer, message_size,
+                                      pSpdm->pTransportBuffer, &pSpdm->pendingResponseSize);
    if (nvStatus != NV_OK)
    {
        spdmStatus = LIBSPDM_STATUS_SEND_FAIL;
@@ -603,7 +650,7 @@ _spdmSendMessageGsp
    if (spdmStatus != LIBSPDM_STATUS_SUCCESS)
    {
        // If message failed, size is cleared to indicate no response pending
-        g_pendingResponseSize = 0;
+        pSpdm->pendingResponseSize = 0;
    }

    return spdmStatus;
@@ -623,7 +670,9 @@ _spdmReceiveMessageGsp
    uint64_t   timeout
 )
 {
-    libspdm_return_t spdmStatus = LIBSPDM_STATUS_SUCCESS;
+    libspdm_return_t   spdmStatus = LIBSPDM_STATUS_SUCCESS;
+    Spdm              *pSpdm      = NULL;
+    size_t             dataSize   = sizeof(void *);

    // Check libspdm parameters.
    if (message_size == NULL || message == NULL || *message == NULL)
@@ -631,25 +680,36 @@ _spdmReceiveMessageGsp
        return LIBSPDM_STATUS_INVALID_PARAMETER;
    }

-    if (g_pGpu == NULL || g_pSpdm == NULL)
+    spdmStatus = libspdm_get_data(spdm_context, LIBSPDM_DATA_APP_CONTEXT_DATA,
+                              NULL, (void *)&pSpdm, &dataSize);
+
+    if (spdmStatus != LIBSPDM_STATUS_SUCCESS)
    {
-        return LIBSPDM_STATUS_INVALID_STATE_LOCAL;
+        NV_PRINTF(LEVEL_ERROR, " spdmStatus != LIBSPDM_STATUS_SUCCESS \n ");
+        return spdmStatus;
    }

+    if (pSpdm == NULL)
+    {
+        NV_PRINTF(LEVEL_ERROR, " pSpdm  == NULL, SPDM context probably corrupted !! \n ");
+        return LIBSPDM_STATUS_INVALID_STATE_LOCAL;
+    }
    // Basic validation to ensure we have a real response.
-    if (g_pendingResponseSize == 0 || g_pendingResponseSize > *message_size)
+    if (pSpdm->pendingResponseSize == 0 ||
+        pSpdm->pendingResponseSize > *message_size)
    {
        spdmStatus = LIBSPDM_STATUS_RECEIVE_FAIL;
        goto ErrorExit;
    }

-    portMemCopy(*message, *message_size, g_pTransportBuffer, g_pendingResponseSize);
-    *message_size = g_pendingResponseSize;
+    portMemCopy(*message, *message_size,
+                pSpdm->pTransportBuffer, pSpdm->pendingResponseSize);
+    *message_size = pSpdm->pendingResponseSize;

 ErrorExit:

    // Ensure size is cleared to indicate no response pending in buffer
-    g_pendingResponseSize = 0;
+    pSpdm->pendingResponseSize = 0;

    return spdmStatus;
 }
@@ -673,18 +733,14 @@ spdmDeviceInit_GH100
        return NV_ERR_INVALID_ARGUMENT;
    }

-    g_pGpu                = pGpu;
-    g_pSpdm               = pSpdm;
-    g_pendingResponseSize = 0;
-    g_pTransportBuffer    = portMemAllocNonPaged(pSpdm->payloadBufferSize);
-
-    if (g_pTransportBuffer == NULL)
+    pSpdm->pendingResponseSize = 0;
+    pSpdm->pTransportBuffer    = portMemAllocNonPaged(pSpdm->payloadBufferSize);
+    if (pSpdm->pTransportBuffer == NULL)
    {
-        g_transportBufferSize = 0;
+        pSpdm->transportBufferSize = 0;
        return NV_ERR_NO_MEMORY;
    }
-
-    g_transportBufferSize = pSpdm->payloadBufferSize;
+    pSpdm->transportBufferSize = pSpdm->payloadBufferSize;

    // Register transport layer functionality with library.
    libspdm_register_transport_layer_func(pSpdm->pLibspdmContext,
@@ -703,7 +759,6 @@ spdmDeviceInit_GH100
    return NV_OK;
 }

-
 /*!
 * To deinitialize the GSP SPDM Responder, we need to release the surface for
 * SPDM communication. GSP-RM will handle the rest.
@@ -717,10 +772,10 @@ spdmDeviceDeinit_GH100
 )
 {
    // Just-in-case, portMemFree handles NULL.
-    portMemFree(g_pTransportBuffer);
-    g_pTransportBuffer    = NULL;
-    g_transportBufferSize = 0;
-    g_pendingResponseSize = 0;
+    portMemFree(pSpdm->pTransportBuffer);
+    pSpdm->pTransportBuffer     = NULL;
+    pSpdm->transportBufferSize  = 0;
+    pSpdm->pendingResponseSize  = 0;

    return NV_OK;
 }
--- a/src/nvidia/src/kernel/gpu/spdm/spdm.c
+++ b/src/nvidia/src/kernel/gpu/spdm/spdm.c
@@ -432,6 +432,11 @@ spdmContextInit_IMPL

    libspdm_init_msg_log(pSpdm->pLibspdmContext, pSpdm->pMsgLog, pSpdm->msgLogMaxSize);

+
+    // Store SPDM object pointer to libspdm context
+    CHECK_SPDM_STATUS(libspdm_set_data(pSpdm->pLibspdmContext, LIBSPDM_DATA_APP_CONTEXT_DATA,
+                                       NULL, (void *)&pSpdm, sizeof(void *)));
+
    //
    // Perform any device-specific initialization. spdmDeviceInit is also
    // responsible for registering transport layer functions with libspdm.
--- a/src/nvidia/src/kernel/mem_mgr/mem_export.c
+++ b/src/nvidia/src/kernel/mem_mgr/mem_export.c
@@ -606,7 +606,8 @@ _memoryexportVerifyMem
    if (pGpu == NULL)
        return NV_OK;

-    if (pKernelMIGGpuInstance != NULL)
+    // MIG is about vidmem partitioning, so limit the check.
+    if ((pKernelMIGGpuInstance != NULL) && (addrSpace == ADDR_FBMEM))
    {
        if ((pKernelMIGGpuInstance->pMemoryPartitionHeap != pSrcMemory->pHeap))
            return NV_ERR_INVALID_OBJECT_PARENT;
--- a/src/nvidia/src/kernel/virtualization/kernel_vgpu_mgr.c
+++ b/src/nvidia/src/kernel/virtualization/kernel_vgpu_mgr.c
@@ -1396,15 +1396,9 @@ NvU32 kvgpumgrGetPgpuSubdevIdEncoding(OBJGPU *pGpu, NvU8 *pgpuString,
        return NV_U32_MAX;
    }

-    switch (chipID)
-    {
-        default:
-            // The encoding of the subdevice ID is its value converted to string
-            bytes = NvU32ToAsciiStr(subID, SUBDEVID_ENCODED_VALUE_SIZE,
+    // The encoding of the subdevice ID is its value converted to string
+    bytes = NvU32ToAsciiStr(subID, SUBDEVID_ENCODED_VALUE_SIZE,
                                    pgpuString, NV_FALSE);
-            break;
-    }
-
    return bytes;
 }