535.171.04

2026-01-30 04:59:46 +00:00 · 2024-03-21 14:22:31 +01:00
parent 044f70bbb8
commit c042c7903d
36 changed files with 691 additions and 265 deletions
--- a/src/nvidia/Makefile
+++ b/src/nvidia/Makefile
@@ -91,6 +91,7 @@ ifeq ($(TARGET_ARCH),aarch64)
  CFLAGS += -mgeneral-regs-only
  CFLAGS += -march=armv8-a
  CFLAGS += -mstrict-align
+  CFLAGS += -ffixed-x18
  CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics)
 endif

--- a/src/nvidia/arch/nvalloc/common/inc/dev_ctrl_defines.h
+++ b/src/nvidia/arch/nvalloc/common/inc/dev_ctrl_defines.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -77,6 +77,9 @@
 #define NV_CTRL_INTR_GPU_VECTOR_TO_SUBTREE(i) \
    ((NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(i)) / 2)

+// First index of doorbell which is controlled by VF
+#define NV_CTRL_INTR_GPU_DOORBELL_INDEX_VF_START 2048
+
 // The max number of leaf registers we expect
 #define NV_MAX_INTR_LEAVES 16

--- a/src/nvidia/generated/g_nv_name_released.h
+++ b/src/nvidia/generated/g_nv_name_released.h
@@ -1042,6 +1042,8 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x28A0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" },
    { 0x28A1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" },
    { 0x28B8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Laptop GPU" },
+    { 0x28B9, 0x0000, 0x0000, "NVIDIA RTX 1000 Ada Generation Laptop GPU" },
+    { 0x28BB, 0x0000, 0x0000, "NVIDIA RTX 500 Ada Generation Laptop GPU" },
    { 0x28E0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" },
    { 0x28E1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" },
    { 0x28F8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Embedded GPU" },
--- a/src/nvidia/generated/g_vgpuconfigapi_nvoc.h
+++ b/src/nvidia/generated/g_vgpuconfigapi_nvoc.h
@@ -7,7 +7,7 @@ extern "C" {
 #endif

 /*
- * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
--- a/src/nvidia/inc/kernel/gpu/gsp/message_queue_priv.h
+++ b/src/nvidia/inc/kernel/gpu/gsp/message_queue_priv.h
@@ -103,4 +103,24 @@ typedef struct MESSAGE_QUEUE_COLLECTION
 #define GSP_MSG_QUEUE_HEADER_SIZE                                   RM_PAGE_SIZE
 #define GSP_MSG_QUEUE_HEADER_ALIGN                                             4   // 2 ^ 4 = 16

+/*!
+ * Calculate 32-bit checksum
+ *
+ * This routine assumes that the data is padded out with zeros to the next
+ * 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
+ */
+static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
+{
+    NvU64 *p        = (NvU64 *)pData;
+    NvU64 *pEnd     = (NvU64 *)((NvUPtr)pData + uLen);
+    NvU64  checkSum = 0;
+
+    NV_ASSERT_CHECKED(uLen > 0);
+
+    while (p < pEnd)
+        checkSum ^= *p++;
+
+    return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
+}
+
 #endif // _MESSAGE_QUEUE_PRIV_H_
--- a/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c
+++ b/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c
@@ -244,32 +244,50 @@ kfspPollForQueueEmpty_IMPL
    KernelFsp *pKernelFsp
 )
 {
+    NV_STATUS status = NV_OK;
    RMTIMEOUT timeout;

-    gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_OSTIMER | GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
+    gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout,
+        GPU_TIMEOUT_FLAGS_OSTIMER |
+        GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);

    while (!kfspIsQueueEmpty(pGpu, pKernelFsp))
    {
        //
-        // For now we assume that any response from FSP before RM message send is complete
-        // indicates an error and we should abort.
+        // For now we assume that any response from FSP before RM message
+        // send is complete indicates an error and we should abort.
+        //
+        // Ongoing dicussion on usefullness of this check. Bug to be filed.
        //
        if (!kfspIsMsgQueueEmpty(pGpu, pKernelFsp))
        {
            kfspReadMessage(pGpu, pKernelFsp, NULL, 0);
-            NV_PRINTF(LEVEL_ERROR, "Received error message from FSP while waiting for CMDQ to be empty.\n");
-            return NV_ERR_GENERIC;
+            NV_PRINTF(LEVEL_ERROR,
+                "Received error message from FSP while waiting for CMDQ to be empty.\n");
+            status = NV_ERR_GENERIC;
+            break;
        }

-        if (gpuCheckTimeout(pGpu, &timeout) == NV_ERR_TIMEOUT)
-        {
-            NV_PRINTF(LEVEL_ERROR, "Timed out waiting for FSP command queue to be empty.\n");
-            return NV_ERR_TIMEOUT;
-        }
        osSpinLoop();
+
+        status = gpuCheckTimeout(pGpu, &timeout);
+        if (status != NV_OK)
+        {
+            if ((status == NV_ERR_TIMEOUT) &&
+                kfspIsQueueEmpty(pGpu, pKernelFsp))
+            {
+                status = NV_OK;
+            }
+            else
+            {
+                NV_PRINTF(LEVEL_ERROR,
+                    "Timed out waiting for FSP command queue to be empty.\n");
+            }
+            break;
+        }
    }

-    return NV_OK;
+    return status;
 }

 /*!
--- a/src/nvidia/src/kernel/gpu/gsp/message_queue_cpu.c
+++ b/src/nvidia/src/kernel/gpu/gsp/message_queue_cpu.c
@@ -476,24 +476,6 @@ void GspMsgQueuesCleanup(MESSAGE_QUEUE_COLLECTION **ppMQCollection)
    *ppMQCollection = NULL;
 }

-/*!
- * Calculate 32-bit checksum
- *
- * This routine assumes that the data is padded out with zeros to the next
- * 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
- */
-static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
-{
-    NvU64 *p        = (NvU64 *)pData;
-    NvU64 *pEnd     = (NvU64 *)((NvUPtr)pData + uLen);
-    NvU64  checkSum = 0;
-
-    while (p < pEnd)
-        checkSum ^= *p++;
-
-    return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
-}
-
 /*!
 * GspMsgQueueSendCommand
 *
@@ -533,7 +515,7 @@ NV_STATUS GspMsgQueueSendCommand(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)

    pCQE->seqNum    = pMQI->txSeqNum;
    pCQE->elemCount = GSP_MSG_QUEUE_BYTES_TO_ELEMENTS(uElementSize);
-    pCQE->checkSum  = 0;
+    pCQE->checkSum  = 0; // The checkSum field is included in the checksum calculation, so zero it.

    ConfidentialCompute *pCC = GPU_GET_CONF_COMPUTE(pGpu);
    if (pCC != NULL && pCC->getProperty(pCC, PDB_PROP_CONFCOMPUTE_ENCRYPT_ENABLED))
@@ -660,7 +642,8 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
    NvU32       nRetries;
    NvU32       nMaxRetries  = 3;
    NvU32       nElements    = 1;  // Assume record fits in one queue element for now.
-    NvU32       uElementSize = 0;
+    NvU32       uElementSize;
+    NvU32       checkSum;
    NvU32       seqMismatchDiff = NV_U32_MAX;
    NV_STATUS   nvStatus     = NV_OK;
    ConfidentialCompute *pCC = NULL;
@@ -713,15 +696,23 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
        pCC = GPU_GET_CONF_COMPUTE(pGpu);
        if (pCC != NULL && pCC->getProperty(pCC, PDB_PROP_CONFCOMPUTE_ENCRYPT_READY))
        {
-            // In Confidential Compute scenario, checksum includes complete element range.
-            if (_checkSum32(pMQI->pCmdQueueElement, (nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN)) != 0)
-            {
-                NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
-                nvStatus = NV_ERR_INVALID_DATA;
-                continue;
-            }
+            //
+            // In the Confidential Compute scenario, the actual message length
+            // is inside the encrypted payload, and we can't access it before
+            // decryption, therefore the checksum encompasses the whole element
+            // range. This makes checksum verification significantly slower
+            // because messages are typically much smaller than element size.
+            //
+            checkSum = _checkSum32(pMQI->pCmdQueueElement,
+                                   (nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN));
        } else
-        if (_checkSum32(pMQI->pCmdQueueElement, uElementSize) != 0)
+        {
+            checkSum = _checkSum32(pMQI->pCmdQueueElement,
+                                   (GSP_MSG_QUEUE_ELEMENT_HDR_SIZE +
+                                    pMQI->pCmdQueueElement->rpc.length));
+        }
+
+        if (checkSum != 0)
        {
            NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
            nvStatus = NV_ERR_INVALID_DATA;
--- a/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c
+++ b/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c
@@ -1587,6 +1587,7 @@ memdescFree
        }

        if (pMemDesc->_addressSpace != ADDR_FBMEM &&
+            pMemDesc->_addressSpace != ADDR_EGM &&
            pMemDesc->_addressSpace != ADDR_SYSMEM)
        {
            return;
@@ -1991,6 +1992,7 @@ memdescUnmap
    switch (pMemDesc->_addressSpace)
    {
        case ADDR_SYSMEM:
+        case ADDR_EGM:
        {
            osUnmapSystemMemory(pMemDesc, Kernel, ProcessId, Address, Priv);
            break;
--- a/src/nvidia/src/kernel/rmapi/mapping_cpu.c
+++ b/src/nvidia/src/kernel/rmapi/mapping_cpu.c
@@ -733,8 +733,9 @@ memUnmap_IMPL
        //
    }
    // System Memory case
-    else if ((pGpu == NULL) || ((memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM) &&
-                                 FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags)))
+    else if ((pGpu == NULL) || (((memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM)
+                                 || (memdescGetAddressSpace(pMemDesc) == ADDR_EGM)
+                                ) && FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags)))
    {
        if (FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags))
        {