mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-30 04:59:46 +00:00
535.171.04
This commit is contained in:
@@ -91,6 +91,7 @@ ifeq ($(TARGET_ARCH),aarch64)
|
||||
CFLAGS += -mgeneral-regs-only
|
||||
CFLAGS += -march=armv8-a
|
||||
CFLAGS += -mstrict-align
|
||||
CFLAGS += -ffixed-x18
|
||||
CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics)
|
||||
endif
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -77,6 +77,9 @@
|
||||
#define NV_CTRL_INTR_GPU_VECTOR_TO_SUBTREE(i) \
|
||||
((NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(i)) / 2)
|
||||
|
||||
// First index of doorbell which is controlled by VF
|
||||
#define NV_CTRL_INTR_GPU_DOORBELL_INDEX_VF_START 2048
|
||||
|
||||
// The max number of leaf registers we expect
|
||||
#define NV_MAX_INTR_LEAVES 16
|
||||
|
||||
|
||||
@@ -1042,6 +1042,8 @@ static const CHIPS_RELEASED sChipsReleased[] = {
|
||||
{ 0x28A0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" },
|
||||
{ 0x28A1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" },
|
||||
{ 0x28B8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Laptop GPU" },
|
||||
{ 0x28B9, 0x0000, 0x0000, "NVIDIA RTX 1000 Ada Generation Laptop GPU" },
|
||||
{ 0x28BB, 0x0000, 0x0000, "NVIDIA RTX 500 Ada Generation Laptop GPU" },
|
||||
{ 0x28E0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" },
|
||||
{ 0x28E1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" },
|
||||
{ 0x28F8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Embedded GPU" },
|
||||
|
||||
@@ -7,7 +7,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
|
||||
@@ -103,4 +103,24 @@ typedef struct MESSAGE_QUEUE_COLLECTION
|
||||
#define GSP_MSG_QUEUE_HEADER_SIZE RM_PAGE_SIZE
|
||||
#define GSP_MSG_QUEUE_HEADER_ALIGN 4 // 2 ^ 4 = 16
|
||||
|
||||
/*!
|
||||
* Calculate 32-bit checksum
|
||||
*
|
||||
* This routine assumes that the data is padded out with zeros to the next
|
||||
* 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
|
||||
*/
|
||||
static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
|
||||
{
|
||||
NvU64 *p = (NvU64 *)pData;
|
||||
NvU64 *pEnd = (NvU64 *)((NvUPtr)pData + uLen);
|
||||
NvU64 checkSum = 0;
|
||||
|
||||
NV_ASSERT_CHECKED(uLen > 0);
|
||||
|
||||
while (p < pEnd)
|
||||
checkSum ^= *p++;
|
||||
|
||||
return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
|
||||
}
|
||||
|
||||
#endif // _MESSAGE_QUEUE_PRIV_H_
|
||||
|
||||
@@ -244,32 +244,50 @@ kfspPollForQueueEmpty_IMPL
|
||||
KernelFsp *pKernelFsp
|
||||
)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
RMTIMEOUT timeout;
|
||||
|
||||
gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_OSTIMER | GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
|
||||
gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout,
|
||||
GPU_TIMEOUT_FLAGS_OSTIMER |
|
||||
GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
|
||||
|
||||
while (!kfspIsQueueEmpty(pGpu, pKernelFsp))
|
||||
{
|
||||
//
|
||||
// For now we assume that any response from FSP before RM message send is complete
|
||||
// indicates an error and we should abort.
|
||||
// For now we assume that any response from FSP before RM message
|
||||
// send is complete indicates an error and we should abort.
|
||||
//
|
||||
// Ongoing dicussion on usefullness of this check. Bug to be filed.
|
||||
//
|
||||
if (!kfspIsMsgQueueEmpty(pGpu, pKernelFsp))
|
||||
{
|
||||
kfspReadMessage(pGpu, pKernelFsp, NULL, 0);
|
||||
NV_PRINTF(LEVEL_ERROR, "Received error message from FSP while waiting for CMDQ to be empty.\n");
|
||||
return NV_ERR_GENERIC;
|
||||
NV_PRINTF(LEVEL_ERROR,
|
||||
"Received error message from FSP while waiting for CMDQ to be empty.\n");
|
||||
status = NV_ERR_GENERIC;
|
||||
break;
|
||||
}
|
||||
|
||||
if (gpuCheckTimeout(pGpu, &timeout) == NV_ERR_TIMEOUT)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Timed out waiting for FSP command queue to be empty.\n");
|
||||
return NV_ERR_TIMEOUT;
|
||||
}
|
||||
osSpinLoop();
|
||||
|
||||
status = gpuCheckTimeout(pGpu, &timeout);
|
||||
if (status != NV_OK)
|
||||
{
|
||||
if ((status == NV_ERR_TIMEOUT) &&
|
||||
kfspIsQueueEmpty(pGpu, pKernelFsp))
|
||||
{
|
||||
status = NV_OK;
|
||||
}
|
||||
else
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR,
|
||||
"Timed out waiting for FSP command queue to be empty.\n");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
return status;
|
||||
}
|
||||
|
||||
/*!
|
||||
|
||||
@@ -476,24 +476,6 @@ void GspMsgQueuesCleanup(MESSAGE_QUEUE_COLLECTION **ppMQCollection)
|
||||
*ppMQCollection = NULL;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Calculate 32-bit checksum
|
||||
*
|
||||
* This routine assumes that the data is padded out with zeros to the next
|
||||
* 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
|
||||
*/
|
||||
static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
|
||||
{
|
||||
NvU64 *p = (NvU64 *)pData;
|
||||
NvU64 *pEnd = (NvU64 *)((NvUPtr)pData + uLen);
|
||||
NvU64 checkSum = 0;
|
||||
|
||||
while (p < pEnd)
|
||||
checkSum ^= *p++;
|
||||
|
||||
return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
|
||||
}
|
||||
|
||||
/*!
|
||||
* GspMsgQueueSendCommand
|
||||
*
|
||||
@@ -533,7 +515,7 @@ NV_STATUS GspMsgQueueSendCommand(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
|
||||
|
||||
pCQE->seqNum = pMQI->txSeqNum;
|
||||
pCQE->elemCount = GSP_MSG_QUEUE_BYTES_TO_ELEMENTS(uElementSize);
|
||||
pCQE->checkSum = 0;
|
||||
pCQE->checkSum = 0; // The checkSum field is included in the checksum calculation, so zero it.
|
||||
|
||||
ConfidentialCompute *pCC = GPU_GET_CONF_COMPUTE(pGpu);
|
||||
if (pCC != NULL && pCC->getProperty(pCC, PDB_PROP_CONFCOMPUTE_ENCRYPT_ENABLED))
|
||||
@@ -660,7 +642,8 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
|
||||
NvU32 nRetries;
|
||||
NvU32 nMaxRetries = 3;
|
||||
NvU32 nElements = 1; // Assume record fits in one queue element for now.
|
||||
NvU32 uElementSize = 0;
|
||||
NvU32 uElementSize;
|
||||
NvU32 checkSum;
|
||||
NvU32 seqMismatchDiff = NV_U32_MAX;
|
||||
NV_STATUS nvStatus = NV_OK;
|
||||
ConfidentialCompute *pCC = NULL;
|
||||
@@ -713,15 +696,23 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
|
||||
pCC = GPU_GET_CONF_COMPUTE(pGpu);
|
||||
if (pCC != NULL && pCC->getProperty(pCC, PDB_PROP_CONFCOMPUTE_ENCRYPT_READY))
|
||||
{
|
||||
// In Confidential Compute scenario, checksum includes complete element range.
|
||||
if (_checkSum32(pMQI->pCmdQueueElement, (nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN)) != 0)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
|
||||
nvStatus = NV_ERR_INVALID_DATA;
|
||||
continue;
|
||||
}
|
||||
//
|
||||
// In the Confidential Compute scenario, the actual message length
|
||||
// is inside the encrypted payload, and we can't access it before
|
||||
// decryption, therefore the checksum encompasses the whole element
|
||||
// range. This makes checksum verification significantly slower
|
||||
// because messages are typically much smaller than element size.
|
||||
//
|
||||
checkSum = _checkSum32(pMQI->pCmdQueueElement,
|
||||
(nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN));
|
||||
} else
|
||||
if (_checkSum32(pMQI->pCmdQueueElement, uElementSize) != 0)
|
||||
{
|
||||
checkSum = _checkSum32(pMQI->pCmdQueueElement,
|
||||
(GSP_MSG_QUEUE_ELEMENT_HDR_SIZE +
|
||||
pMQI->pCmdQueueElement->rpc.length));
|
||||
}
|
||||
|
||||
if (checkSum != 0)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
|
||||
nvStatus = NV_ERR_INVALID_DATA;
|
||||
|
||||
@@ -1587,6 +1587,7 @@ memdescFree
|
||||
}
|
||||
|
||||
if (pMemDesc->_addressSpace != ADDR_FBMEM &&
|
||||
pMemDesc->_addressSpace != ADDR_EGM &&
|
||||
pMemDesc->_addressSpace != ADDR_SYSMEM)
|
||||
{
|
||||
return;
|
||||
@@ -1991,6 +1992,7 @@ memdescUnmap
|
||||
switch (pMemDesc->_addressSpace)
|
||||
{
|
||||
case ADDR_SYSMEM:
|
||||
case ADDR_EGM:
|
||||
{
|
||||
osUnmapSystemMemory(pMemDesc, Kernel, ProcessId, Address, Priv);
|
||||
break;
|
||||
|
||||
@@ -733,8 +733,9 @@ memUnmap_IMPL
|
||||
//
|
||||
}
|
||||
// System Memory case
|
||||
else if ((pGpu == NULL) || ((memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM) &&
|
||||
FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags)))
|
||||
else if ((pGpu == NULL) || (((memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM)
|
||||
|| (memdescGetAddressSpace(pMemDesc) == ADDR_EGM)
|
||||
) && FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags)))
|
||||
{
|
||||
if (FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags))
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user