535.171.04

This commit is contained in:
Bernhard Stoeckner
2024-03-21 14:22:31 +01:00
parent 044f70bbb8
commit c042c7903d
36 changed files with 691 additions and 265 deletions

View File

@@ -91,6 +91,7 @@ ifeq ($(TARGET_ARCH),aarch64)
CFLAGS += -mgeneral-regs-only
CFLAGS += -march=armv8-a
CFLAGS += -mstrict-align
CFLAGS += -ffixed-x18
CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics)
endif

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -77,6 +77,9 @@
#define NV_CTRL_INTR_GPU_VECTOR_TO_SUBTREE(i) \
((NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(i)) / 2)
// First index of doorbell which is controlled by VF
#define NV_CTRL_INTR_GPU_DOORBELL_INDEX_VF_START 2048
// The max number of leaf registers we expect
#define NV_MAX_INTR_LEAVES 16

View File

@@ -1042,6 +1042,8 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x28A0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" },
{ 0x28A1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" },
{ 0x28B8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Laptop GPU" },
{ 0x28B9, 0x0000, 0x0000, "NVIDIA RTX 1000 Ada Generation Laptop GPU" },
{ 0x28BB, 0x0000, 0x0000, "NVIDIA RTX 500 Ada Generation Laptop GPU" },
{ 0x28E0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" },
{ 0x28E1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" },
{ 0x28F8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Embedded GPU" },

View File

@@ -7,7 +7,7 @@ extern "C" {
#endif
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a

View File

@@ -103,4 +103,24 @@ typedef struct MESSAGE_QUEUE_COLLECTION
#define GSP_MSG_QUEUE_HEADER_SIZE RM_PAGE_SIZE
#define GSP_MSG_QUEUE_HEADER_ALIGN 4 // 2 ^ 4 = 16
/*!
* Calculate 32-bit checksum
*
* This routine assumes that the data is padded out with zeros to the next
* 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
*/
static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
{
NvU64 *p = (NvU64 *)pData;
NvU64 *pEnd = (NvU64 *)((NvUPtr)pData + uLen);
NvU64 checkSum = 0;
NV_ASSERT_CHECKED(uLen > 0);
while (p < pEnd)
checkSum ^= *p++;
return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
}
#endif // _MESSAGE_QUEUE_PRIV_H_

View File

@@ -244,32 +244,50 @@ kfspPollForQueueEmpty_IMPL
KernelFsp *pKernelFsp
)
{
NV_STATUS status = NV_OK;
RMTIMEOUT timeout;
gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_OSTIMER | GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout,
GPU_TIMEOUT_FLAGS_OSTIMER |
GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
while (!kfspIsQueueEmpty(pGpu, pKernelFsp))
{
//
// For now we assume that any response from FSP before RM message send is complete
// indicates an error and we should abort.
// For now we assume that any response from FSP before RM message
// send is complete indicates an error and we should abort.
//
// Ongoing dicussion on usefullness of this check. Bug to be filed.
//
if (!kfspIsMsgQueueEmpty(pGpu, pKernelFsp))
{
kfspReadMessage(pGpu, pKernelFsp, NULL, 0);
NV_PRINTF(LEVEL_ERROR, "Received error message from FSP while waiting for CMDQ to be empty.\n");
return NV_ERR_GENERIC;
NV_PRINTF(LEVEL_ERROR,
"Received error message from FSP while waiting for CMDQ to be empty.\n");
status = NV_ERR_GENERIC;
break;
}
if (gpuCheckTimeout(pGpu, &timeout) == NV_ERR_TIMEOUT)
{
NV_PRINTF(LEVEL_ERROR, "Timed out waiting for FSP command queue to be empty.\n");
return NV_ERR_TIMEOUT;
}
osSpinLoop();
status = gpuCheckTimeout(pGpu, &timeout);
if (status != NV_OK)
{
if ((status == NV_ERR_TIMEOUT) &&
kfspIsQueueEmpty(pGpu, pKernelFsp))
{
status = NV_OK;
}
else
{
NV_PRINTF(LEVEL_ERROR,
"Timed out waiting for FSP command queue to be empty.\n");
}
break;
}
}
return NV_OK;
return status;
}
/*!

View File

@@ -476,24 +476,6 @@ void GspMsgQueuesCleanup(MESSAGE_QUEUE_COLLECTION **ppMQCollection)
*ppMQCollection = NULL;
}
/*!
* Calculate 32-bit checksum
*
* This routine assumes that the data is padded out with zeros to the next
* 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
*/
static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
{
NvU64 *p = (NvU64 *)pData;
NvU64 *pEnd = (NvU64 *)((NvUPtr)pData + uLen);
NvU64 checkSum = 0;
while (p < pEnd)
checkSum ^= *p++;
return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
}
/*!
* GspMsgQueueSendCommand
*
@@ -533,7 +515,7 @@ NV_STATUS GspMsgQueueSendCommand(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
pCQE->seqNum = pMQI->txSeqNum;
pCQE->elemCount = GSP_MSG_QUEUE_BYTES_TO_ELEMENTS(uElementSize);
pCQE->checkSum = 0;
pCQE->checkSum = 0; // The checkSum field is included in the checksum calculation, so zero it.
ConfidentialCompute *pCC = GPU_GET_CONF_COMPUTE(pGpu);
if (pCC != NULL && pCC->getProperty(pCC, PDB_PROP_CONFCOMPUTE_ENCRYPT_ENABLED))
@@ -660,7 +642,8 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
NvU32 nRetries;
NvU32 nMaxRetries = 3;
NvU32 nElements = 1; // Assume record fits in one queue element for now.
NvU32 uElementSize = 0;
NvU32 uElementSize;
NvU32 checkSum;
NvU32 seqMismatchDiff = NV_U32_MAX;
NV_STATUS nvStatus = NV_OK;
ConfidentialCompute *pCC = NULL;
@@ -713,15 +696,23 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
pCC = GPU_GET_CONF_COMPUTE(pGpu);
if (pCC != NULL && pCC->getProperty(pCC, PDB_PROP_CONFCOMPUTE_ENCRYPT_READY))
{
// In Confidential Compute scenario, checksum includes complete element range.
if (_checkSum32(pMQI->pCmdQueueElement, (nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN)) != 0)
{
NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
nvStatus = NV_ERR_INVALID_DATA;
continue;
}
//
// In the Confidential Compute scenario, the actual message length
// is inside the encrypted payload, and we can't access it before
// decryption, therefore the checksum encompasses the whole element
// range. This makes checksum verification significantly slower
// because messages are typically much smaller than element size.
//
checkSum = _checkSum32(pMQI->pCmdQueueElement,
(nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN));
} else
if (_checkSum32(pMQI->pCmdQueueElement, uElementSize) != 0)
{
checkSum = _checkSum32(pMQI->pCmdQueueElement,
(GSP_MSG_QUEUE_ELEMENT_HDR_SIZE +
pMQI->pCmdQueueElement->rpc.length));
}
if (checkSum != 0)
{
NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
nvStatus = NV_ERR_INVALID_DATA;

View File

@@ -1587,6 +1587,7 @@ memdescFree
}
if (pMemDesc->_addressSpace != ADDR_FBMEM &&
pMemDesc->_addressSpace != ADDR_EGM &&
pMemDesc->_addressSpace != ADDR_SYSMEM)
{
return;
@@ -1991,6 +1992,7 @@ memdescUnmap
switch (pMemDesc->_addressSpace)
{
case ADDR_SYSMEM:
case ADDR_EGM:
{
osUnmapSystemMemory(pMemDesc, Kernel, ProcessId, Address, Priv);
break;

View File

@@ -733,8 +733,9 @@ memUnmap_IMPL
//
}
// System Memory case
else if ((pGpu == NULL) || ((memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM) &&
FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags)))
else if ((pGpu == NULL) || (((memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM)
|| (memdescGetAddressSpace(pMemDesc) == ADDR_EGM)
) && FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags)))
{
if (FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags))
{