535.274.02

This commit is contained in:
Maneet Singh
2025-09-30 12:40:20 -07:00
parent 9c67f19366
commit 66ab8e8596
22 changed files with 318 additions and 97 deletions

View File

@@ -36,25 +36,25 @@
// and then checked back in. You cannot make changes to these sections without
// corresponding changes to the buildmeister script
#ifndef NV_BUILD_BRANCH
#define NV_BUILD_BRANCH r539_41
#define NV_BUILD_BRANCH r539_56
#endif
#ifndef NV_PUBLIC_BRANCH
#define NV_PUBLIC_BRANCH r539_41
#define NV_PUBLIC_BRANCH r539_56
#endif
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r539_41-927"
#define NV_BUILD_CHANGELIST_NUM (36124219)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r539_56-994"
#define NV_BUILD_CHANGELIST_NUM (36497304)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "rel/gpu_drv/r535/r539_41-927"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36124219)
#define NV_BUILD_NAME "rel/gpu_drv/r535/r539_56-994"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36497304)
#else /* Windows builds */
#define NV_BUILD_BRANCH_VERSION "r539_41-1"
#define NV_BUILD_CHANGELIST_NUM (36117060)
#define NV_BUILD_BRANCH_VERSION "r539_56-1"
#define NV_BUILD_CHANGELIST_NUM (36476729)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "539.42"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36117060)
#define NV_BUILD_NAME "539.57"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36476729)
#define NV_BUILD_BRANCH_BASE_VERSION R535
#endif
// End buildmeister python edited section

View File

@@ -4,7 +4,7 @@
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
#define NV_VERSION_STRING "535.261.03"
#define NV_VERSION_STRING "535.274.02"
#else

View File

@@ -1253,25 +1253,25 @@ NvBool isLibosPreserveLogBufferFull(LIBOS_LOG_DECODE *pLogDecode, NvU32 gpuInsta
{
NvU64 i = (NvU32)(pLogDecode->numLogBuffers);
NvU32 tag = LIBOS_LOG_NVLOG_BUFFER_TAG(pLogDecode->sourceName, i * 2);
NVLOG_BUFFER_HANDLE handle = 0;
NV_STATUS status = nvlogGetBufferHandleFromTag(tag, &handle);
if (status != NV_OK)
//
// Cannot use nvlogGetBufferHandleFromTag here since in multi GPU case,
// we can have multiple buffers with exact same tag, only differentiable
// from gpuInstance
//
for (i = 0; i < NVLOG_MAX_BUFFERS; i++)
{
return NV_FALSE;
}
NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[handle];
if (pNvLogBuffer == NULL)
{
return NV_FALSE;
}
if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) &&
DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance &&
(pNvLogBuffer->pos >= pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64)))
{
return NV_TRUE;
if (NvLogLogger.pBuffers[i] != NULL)
{
NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[i];
if ((pNvLogBuffer->tag == tag) &&
(DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance) &&
FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) &&
(pNvLogBuffer->pos >= pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64)))
{
return NV_TRUE;
}
}
}
return NV_FALSE;
@@ -1279,19 +1279,27 @@ NvBool isLibosPreserveLogBufferFull(LIBOS_LOG_DECODE *pLogDecode, NvU32 gpuInsta
static NvBool findPreservedNvlogBuffer(NvU32 tag, NvU32 gpuInstance, NVLOG_BUFFER_HANDLE *pHandle)
{
NVLOG_BUFFER_HANDLE handle = 0;
NV_STATUS status = nvlogGetBufferHandleFromTag(tag, &handle);
NvU64 i;
if (status != NV_OK)
return NV_FALSE;
NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[handle];
if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) &&
DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance &&
(pNvLogBuffer->pos < pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64)))
//
// Cannot use nvlogGetBufferHandleFromTag here since in multi GPU case,
// we can have multiple buffers with exact same tag, only differentiable
// from gpuInstance
//
for (i = 0; i < NVLOG_MAX_BUFFERS; i++)
{
*pHandle = handle;
return NV_TRUE;
if (NvLogLogger.pBuffers[i] != NULL)
{
NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[i];
if ((pNvLogBuffer->tag == tag) &&
(DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance) &&
FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) &&
(pNvLogBuffer->pos < pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64)))
{
*pHandle = i;
return NV_TRUE;
}
}
}
return NV_FALSE;

View File

@@ -772,23 +772,23 @@ static inline NV_STATUS intrRestoreIntrRegValue(OBJGPU *pGpu, struct Intr *pIntr
#define intrRestoreIntrRegValue_HAL(pGpu, pIntr, arg0, arg1, arg2) intrRestoreIntrRegValue(pGpu, pIntr, arg0, arg1, arg2)
static inline NV_STATUS intrTriggerCpuDoorbellForVF_46f6a7(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid) {
static inline NV_STATUS intrTriggerCpuDoorbellForVF_46f6a7(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid, NvBool bRearmIntr) {
return NV_ERR_NOT_SUPPORTED;
}
NV_STATUS intrTriggerCpuDoorbellForVF_TU102(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid);
NV_STATUS intrTriggerCpuDoorbellForVF_TU102(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid, NvBool bRearmIntr);
#ifdef __nvoc_intr_h_disabled
static inline NV_STATUS intrTriggerCpuDoorbellForVF(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid) {
static inline NV_STATUS intrTriggerCpuDoorbellForVF(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid, NvBool bRearmIntr) {
NV_ASSERT_FAILED_PRECOMP("Intr was disabled!");
return NV_ERR_NOT_SUPPORTED;
}
#else //__nvoc_intr_h_disabled
#define intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid) intrTriggerCpuDoorbellForVF_46f6a7(pGpu, pIntr, gfid)
#define intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid, bRearmIntr) intrTriggerCpuDoorbellForVF_46f6a7(pGpu, pIntr, gfid, bRearmIntr)
#endif //__nvoc_intr_h_disabled
#define intrTriggerCpuDoorbellForVF_HAL(pGpu, pIntr, gfid) intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid)
#define intrTriggerCpuDoorbellForVF_HAL(pGpu, pIntr, gfid, bRearmIntr) intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid, bRearmIntr)
NV_STATUS intrTriggerPrivDoorbell_TU102(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid);

View File

@@ -76,6 +76,7 @@ struct THREAD_STATE_NODE
*/
NvU32 threadSeqId;
NvBool bValid;
NvBool bUsingHeap;
THREAD_TIMEOUT_STATE timeout;
NvU32 cpuNum;
NvU32 flags;
@@ -199,6 +200,7 @@ void threadStateFreeISRLockless(THREAD_STATE_NODE *, OBJGPU*, NvU32);
void threadStateInitISRAndDeferredIntHandler(THREAD_STATE_NODE *, OBJGPU*, NvU32);
void threadStateFreeISRAndDeferredIntHandler(THREAD_STATE_NODE *, OBJGPU*, NvU32);
void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags);
THREAD_STATE_NODE* threadStateAlloc(NvU32 flags);
void threadStateFree(THREAD_STATE_NODE *pThreadNode, NvU32 flags);
NV_STATUS threadStateGetCurrent(THREAD_STATE_NODE **ppThreadNode, OBJGPU *pGpu);

View File

@@ -601,6 +601,110 @@ void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
}
}
/**
*
* @brief Allocate a heap-based threadState
* @param[in] flags Thread state flags
*
* @return Heap-allocated THREAD_STATE_NODE* on success, NULL on failure
*/
THREAD_STATE_NODE* threadStateAlloc(NvU32 flags)
{
THREAD_STATE_NODE *pHeapNode;
NV_STATUS rmStatus;
NvU64 funcAddr;
// Isrs should be using threadStateIsrInit().
NV_ASSERT((flags & (THREAD_STATE_FLAGS_IS_ISR_LOCKLESS |
THREAD_STATE_FLAGS_IS_ISR |
THREAD_STATE_FLAGS_DEFERRED_INT_HANDLER_RUNNING)) == 0);
// Check to see if ThreadState is enabled
if (!(threadStateDatabase.setupFlags & THREAD_STATE_SETUP_FLAGS_ENABLED))
return NULL;
// Allocate heap node directly
pHeapNode = portMemAllocNonPaged(sizeof(THREAD_STATE_NODE));
if (pHeapNode == NULL)
return NULL;
portMemSet(pHeapNode, 0, sizeof(*pHeapNode));
pHeapNode->threadSeqId = portAtomicIncrementU32(&threadStateDatabase.threadSeqCntr);
pHeapNode->cpuNum = osGetCurrentProcessorNumber();
pHeapNode->bUsingHeap = NV_TRUE;
pHeapNode->flags = flags;
//
// The thread state free callbacks are only supported in the non-ISR paths
// as they invoke memory allocation routines.
//
listInit(&pHeapNode->cbList, portMemAllocatorGetGlobalNonPaged());
pHeapNode->flags |= THREAD_STATE_FLAGS_STATE_FREE_CB_ENABLED;
rmStatus = _threadNodeInitTime(pHeapNode);
if (rmStatus == NV_OK)
pHeapNode->flags |= THREAD_STATE_FLAGS_TIMEOUT_INITED;
rmStatus = osGetCurrentThread(&pHeapNode->threadId);
if (rmStatus != NV_OK)
goto cleanup_heap;
NV_ASSERT_OR_GOTO(pHeapNode->cpuNum < threadStateDatabase.maxCPUs, cleanup_heap);
funcAddr = (NvU64) (NV_RETURN_ADDRESS());
portSyncSpinlockAcquire(threadStateDatabase.spinlock);
if (!mapInsertExisting(&threadStateDatabase.dbRoot, (NvU64)pHeapNode->threadId, pHeapNode))
{
rmStatus = NV_ERR_OBJECT_NOT_FOUND;
// Place in the Preempted List if threadId is already present in the API list
if (mapInsertExisting(&threadStateDatabase.dbRootPreempted, (NvU64)pHeapNode->threadId, pHeapNode))
{
pHeapNode->flags |= THREAD_STATE_FLAGS_PLACED_ON_PREEMPT_LIST;
pHeapNode->bValid = NV_TRUE;
rmStatus = NV_OK;
}
else
{
// Reset the threadId as insertion failed on both maps. bValid is already NV_FALSE
pHeapNode->threadId = 0;
portSyncSpinlockRelease(threadStateDatabase.spinlock);
goto cleanup_heap;
}
}
else
{
pHeapNode->bValid = NV_TRUE;
rmStatus = NV_OK;
}
_threadStateLogInitCaller(pHeapNode, funcAddr);
portSyncSpinlockRelease(threadStateDatabase.spinlock);
_threadStatePrintInfo(pHeapNode);
NV_ASSERT(rmStatus == NV_OK);
threadPriorityStateAlloc();
if (TLS_MIRROR_THREADSTATE)
{
THREAD_STATE_NODE **pTls = (THREAD_STATE_NODE **)tlsEntryAcquire(TLS_ENTRY_ID_THREADSTATE);
NV_ASSERT_OR_GOTO(pTls != NULL, cleanup_heap);
if (*pTls != NULL)
{
NV_PRINTF(LEVEL_WARNING,
"TLS: Nested threadState inits detected. Previous threadState node is %p, new is %p\n",
*pTls, pHeapNode);
}
*pTls = pHeapNode;
}
return pHeapNode;
cleanup_heap:
portMemFree(pHeapNode);
return NULL;
}
/**
* @brief Initialize a threadState for locked ISR and Bottom-half
*
@@ -863,6 +967,12 @@ void threadStateFree(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
r);
}
}
// Free heap memory if this node was heap-allocated
if (pThreadNode->bUsingHeap)
{
portMemFree(pThreadNode);
}
}
/**

View File

@@ -231,6 +231,11 @@ memdescCreate
allocSize = Size;
if (allocSize == 0)
{
return NV_ERR_INVALID_ARGUMENT;
}
//
// this memdesc may have gotten forced to sysmem if no carveout,
// but for VPR it needs to be in vidmem, so check and re-direct here,
@@ -301,16 +306,7 @@ memdescCreate
// (4k >> 12 = 1). This modification helps us to avoid overflow of variable
// allocSize, in case caller of this function passes highest value of NvU64.
//
// If allocSize is passed as 0, PageCount should be returned as 0.
//
if (allocSize == 0)
{
PageCount = 0;
}
else
{
PageCount = ((allocSize - 1) >> RM_PAGE_SHIFT) + 1;
}
PageCount = ((allocSize - 1) >> RM_PAGE_SHIFT) + 1;
if (PhysicallyContiguous)
{

View File

@@ -58,6 +58,11 @@ NV_STATUS stdmemValidateParams
return NV_ERR_INVALID_ARGUMENT;
}
if (pAllocData->size == 0)
{
return NV_ERR_INVALID_ARGUMENT;
}
//
// These flags don't do anything in this path. No mapping on alloc and
// kernel map is controlled by TYPE

View File

@@ -3179,7 +3179,7 @@ cliresCtrlCmdNvdGetNvlogBufferInfo_IMPL
}
pBuffer = NvLogLogger.pBuffers[hBuffer];
NV_ASSERT_OR_RETURN(pBuffer != NULL, NV_ERR_OBJECT_NOT_FOUND);
NV_ASSERT_OR_ELSE(pBuffer != NULL, status = NV_ERR_OBJECT_NOT_FOUND; goto done);
NvBool bPause = pParams->flags & DRF_DEF(0000, _CTRL_NVD_NVLOG_BUFFER_INFO_FLAGS, _PAUSE, _YES);
nvlogPauseLoggingToBuffer(hBuffer, bPause);

View File

@@ -6289,7 +6289,7 @@ static NV_STATUS dupMemory(struct gpuDevice *device,
{
NV_STATUS status = NV_OK;
nvGpuOpsLockSet acquiredLocks;
THREAD_STATE_NODE threadState;
THREAD_STATE_NODE *pThreadState;
NvHandle dupedMemHandle;
Memory *pMemory = NULL;
PMEMORY_DESCRIPTOR pMemDesc = NULL;
@@ -6310,14 +6310,15 @@ static NV_STATUS dupMemory(struct gpuDevice *device,
NV_ASSERT((flags == NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE) || (flags == NV04_DUP_HANDLE_FLAGS_NONE));
threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
pThreadState = threadStateAlloc(THREAD_STATE_FLAGS_NONE);
if (!pThreadState)
return NV_ERR_NO_MEMORY;
// RS-TODO use dual client locking
status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, device->session->handle,
&pSessionClient, &acquiredLocks);
if (status != NV_OK)
{
threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
threadStateFree(pThreadState, THREAD_STATE_FLAGS_NONE);
return status;
}
@@ -6359,15 +6360,23 @@ static NV_STATUS dupMemory(struct gpuDevice *device,
}
// For SYSMEM or indirect peer mappings
bIsIndirectPeer = gpumgrCheckIndirectPeer(pMappingGpu, pAdjustedMemDesc->pGpu);
// Deviceless memory (NV01_MEMORY_DEVICELESS) can have a NULL pGpu. Perform targeted
// null checks before IOMMU operations that require valid GPU contexts.
bIsIndirectPeer = (pAdjustedMemDesc->pGpu != NULL) ?
gpumgrCheckIndirectPeer(pMappingGpu, pAdjustedMemDesc->pGpu) : NV_FALSE;
if (bIsIndirectPeer ||
memdescIsSysmem(pAdjustedMemDesc))
{
if (NV_UNLIKELY(pAdjustedMemDesc->pGpu == NULL))
{
status = NV_ERR_INVALID_STATE;
goto freeGpaMemdesc;
}
// For sysmem allocations, the dup done below is very shallow and in
// particular doesn't create IOMMU mappings required for the mapped GPU
// to access the memory. That's a problem if the mapped GPU is different
// from the GPU that the allocation was created under. Add them
// explicitly here and remove them when the memory is freed in n
// explicitly here and remove them when the memory is freed in
// nvGpuOpsFreeDupedHandle(). Notably memdescMapIommu() refcounts the
// mappings so it's ok to call it if the mappings are already there.
//
@@ -6436,7 +6445,7 @@ freeGpaMemdesc:
done:
_nvGpuOpsLocksRelease(&acquiredLocks);
threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
threadStateFree(pThreadState, THREAD_STATE_FLAGS_NONE);
return status;
}