525.78.01

2026-02-07 00:29:58 +00:00 · 2023-01-05 10:40:27 -08:00
parent 9594cc0169
commit dac2350c7f
180 changed files with 9465 additions and 4853 deletions
--- a/src/nvidia/arch/nvalloc/unix/include/nv.h
+++ b/src/nvidia/arch/nvalloc/unix/include/nv.h
@@ -958,7 +958,6 @@ NV_STATUS  NV_API_CALL  rm_log_gpu_crash          (nv_stack_t *, nv_state_t *);
 void       NV_API_CALL rm_kernel_rmapi_op(nvidia_stack_t *sp, void *ops_cmd);
 NvBool     NV_API_CALL rm_get_device_remove_flag(nvidia_stack_t *sp, NvU32 gpu_id);
 NV_STATUS  NV_API_CALL rm_gpu_copy_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
-NV_STATUS  NV_API_CALL rm_gpu_copy_mmu_faults_unlocked(nvidia_stack_t *, nv_state_t *, NvU32 *);
 NV_STATUS  NV_API_CALL rm_gpu_handle_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
 NvBool     NV_API_CALL rm_gpu_need_4k_page_isolation(nv_state_t *);
 NvBool     NV_API_CALL rm_is_chipset_io_coherent(nv_stack_t *);
--- a/src/nvidia/arch/nvalloc/unix/src/os.c
+++ b/src/nvidia/arch/nvalloc/unix/src/os.c
@@ -1221,12 +1221,11 @@ static void postEvent(
    NvBool dataValid
 )
 {
-    nv_state_t *nv = nv_get_ctl_state();
-    portSyncSpinlockAcquire(nv->event_spinlock);
-    if (event->active)
-        nv_post_event(event, hEvent, notifyIndex,
-                      info32, info16, dataValid);
-    portSyncSpinlockRelease(nv->event_spinlock);
+    if (osReferenceObjectCount(event) != NV_OK)
+        return;
+    nv_post_event(event, hEvent, notifyIndex,
+                  info32, info16, dataValid);
+    osDereferenceObjectCount(event);
 }

 NvU32 osSetEvent
@@ -1445,6 +1444,12 @@ NV_STATUS osReferenceObjectCount(void *pEvent)
    nv_event_t *event = pEvent;

    portSyncSpinlockAcquire(nv->event_spinlock);
+    // If event->active is false, don't allow any more reference
+    if (!event->active)
+    {
+        portSyncSpinlockRelease(nv->event_spinlock);
+        return NV_ERR_INVALID_EVENT;
+    }
    ++event->refcount;
    portSyncSpinlockRelease(nv->event_spinlock);
    return NV_OK;
@@ -1457,11 +1462,10 @@ NV_STATUS osDereferenceObjectCount(void *pOSEvent)

    portSyncSpinlockAcquire(nv->event_spinlock);
    NV_ASSERT(event->refcount > 0);
-    --event->refcount;
    // If event->refcount == 0 but event->active is true, the client
    // has not yet freed the OS event.  free_os_event will free its
    // memory when they do, or else when the client itself is freed.
-    if (event->refcount == 0 && !event->active)
+    if (--event->refcount == 0 && !event->active)
        portMemFree(event);
    portSyncSpinlockRelease(nv->event_spinlock);

--- a/src/nvidia/arch/nvalloc/unix/src/osapi.c
+++ b/src/nvidia/arch/nvalloc/unix/src/osapi.c
@@ -354,9 +354,7 @@ static void free_os_event_under_lock(nv_event_t *event)
    // If refcount > 0, event will be freed by osDereferenceObjectCount
    // when the last associated RM event is freed.
    if (event->refcount == 0)
-    {
        portMemFree(event);
-    }
 }

 static void free_os_events(
@@ -2910,23 +2908,21 @@ static NV_STATUS RmRunNanoTimerCallback(
    void *pTmrEvent
 )
 {
-    OBJSYS             *pSys = SYS_GET_INSTANCE();
    POBJTMR             pTmr = GPU_GET_TIMER(pGpu);
    THREAD_STATE_NODE   threadState;
    NV_STATUS         status = NV_OK;
    // LOCK: try to acquire GPUs lock
    if ((status = rmGpuLocksAcquire(GPU_LOCK_FLAGS_COND_ACQUIRE, RM_LOCK_MODULES_TMR)) != NV_OK)
    {
-        PTMR_EVENT_PVT pEvent = (PTMR_EVENT_PVT) pTmrEvent;
-        // We failed to acquire the lock; schedule a timer to try again.
-        return osStartNanoTimer(pGpu->pOsGpuInfo, pEvent->super.pOSTmrCBdata, 1000);
-    }
+        TMR_EVENT *pEvent = (TMR_EVENT *)pTmrEvent;

-    if ((status = osCondAcquireRmSema(pSys->pSema)) != NV_OK)
-    {
-        // UNLOCK: release GPUs lock
-        rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
-        return status;
+        //
+        // We failed to acquire the lock - depending on what's holding it,
+        // the lock could be held for a while, so try again soon, but not too
+        // soon to prevent the owner from making forward progress indefinitely.
+        //
+        return osStartNanoTimer(pGpu->pOsGpuInfo, pEvent->pOSTmrCBdata,
+                                osGetTickResolution());
    }

    threadStateInitISRAndDeferredIntHandler(&threadState, pGpu,
@@ -2939,7 +2935,6 @@ static NV_STATUS RmRunNanoTimerCallback(
    threadStateFreeISRAndDeferredIntHandler(&threadState,
        pGpu, THREAD_STATE_FLAGS_IS_DEFERRED_INT_HANDLER);

-    osReleaseRmSema(pSys->pSema, NULL);
    // UNLOCK: release GPUs lock
    rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, pGpu);

--- a/src/nvidia/arch/nvalloc/unix/src/unix_intr.c
+++ b/src/nvidia/arch/nvalloc/unix/src/unix_intr.c
@@ -608,36 +608,19 @@ done:
 // Use this call when MMU faults needs to be copied
 // outisde of RM lock.
 //
-NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults_unlocked(
-    nvidia_stack_t *sp,
-    nv_state_t *nv,
-    NvU32 *faultsCopied
+static NV_STATUS _rm_gpu_copy_mmu_faults_unlocked(
+    OBJGPU *pGpu,
+    NvU32 *pFaultsCopied,
+    THREAD_STATE_NODE *pThreadState
 )
 {
-    OBJGPU       *pGpu;
-    void         *fp;
-    NV_STATUS status = NV_OK;
-
-    NV_ENTER_RM_RUNTIME(sp,fp);
-
-    pGpu = NV_GET_NV_PRIV_PGPU(nv);
-    if (pGpu == NULL || faultsCopied == NULL)
-    {
-        status = NV_ERR_OBJECT_NOT_FOUND;
-        goto done;
-    }
-
    // Non-replayable faults are copied to the client shadow buffer by GSP-RM.
    if (IS_GSP_CLIENT(pGpu))
    {
-        status = NV_ERR_NOT_SUPPORTED;
-        goto done;
+        return NV_ERR_NOT_SUPPORTED;
    }

-done:
-    NV_EXIT_RM_RUNTIME(sp,fp);
-
-    return status;
+    return NV_OK;
 }

 //
@@ -650,10 +633,12 @@ NV_STATUS rm_gpu_handle_mmu_faults(
 )
 {
    NvU32 status = NV_OK;
+    OBJGPU *pGpu;
+    void *fp;
+
+    NV_ENTER_RM_RUNTIME(sp,fp);

    *faultsCopied = 0;
-
-    OBJGPU *pGpu;
    pGpu = NV_GET_NV_PRIV_PGPU(nv);
    
    if (pGpu == NULL)
@@ -661,40 +646,50 @@ NV_STATUS rm_gpu_handle_mmu_faults(
        return NV_ERR_OBJECT_NOT_FOUND;
    }

-    if (IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu) && !IS_VIRTUAL(pGpu))
    {
+        KernelGmmu *pKernelGmmu;
+        PORT_MEM_ALLOCATOR *pIsrAllocator;
        THREAD_STATE_NODE threadState;
+        NvU8 stackAllocator[TLS_ISR_ALLOCATOR_SIZE]; // ISR allocations come from this buffer

-        KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
-        Intr *pIntr    = GPU_GET_INTR(pGpu);
-
-        NvU32 hw_put = 0;
-        NvU32 hw_get = 0;
-
+        pIsrAllocator = portMemAllocatorCreateOnExistingBlock(stackAllocator, sizeof(stackAllocator));
+        tlsIsrInit(pIsrAllocator);
        threadStateInitISRLockless(&threadState, pGpu, THREAD_STATE_FLAGS_IS_ISR_LOCKLESS);

-        kgmmuReadFaultBufferPutPtr_HAL(pGpu, pKernelGmmu, NON_REPLAYABLE_FAULT_BUFFER,
-                                    &hw_put, &threadState);
+        pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);

-        kgmmuReadFaultBufferGetPtr_HAL(pGpu, pKernelGmmu, NON_REPLAYABLE_FAULT_BUFFER,
-                                    &hw_get, &threadState);
-
-        if(hw_get != hw_put)
+        if (IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu) && !IS_VIRTUAL(pGpu))
        {
-            // We have to clear the top level interrupt bit here since otherwise
-            // the bottom half will attempt to service the interrupt on the CPU
-            // side before GSP recieves the notification and services it
-            kgmmuClearNonReplayableFaultIntr(pGpu, pKernelGmmu, &threadState);
-            status = intrTriggerPrivDoorbell_HAL(pGpu, pIntr, NV_DOORBELL_NOTIFY_LEAF_SERVICE_NON_REPLAYABLE_FAULT_HANDLE);
+            Intr *pIntr = GPU_GET_INTR(pGpu);

+            if (kgmmuIsNonReplayableFaultPending_HAL(pGpu, pKernelGmmu, &threadState))
+            {
+                // We have to clear the top level interrupt bit here since otherwise
+                // the bottom half will attempt to service the interrupt on the CPU
+                // side before GSP recieves the notification and services it
+                kgmmuClearNonReplayableFaultIntr_HAL(pGpu, pKernelGmmu, &threadState);
+                status = intrTriggerPrivDoorbell_HAL(pGpu, pIntr, NV_DOORBELL_NOTIFY_LEAF_SERVICE_NON_REPLAYABLE_FAULT_HANDLE);
+
+            }
+        }
+        else if (IS_VIRTUAL_WITH_SRIOV(pGpu))
+        {
+            if (kgmmuIsNonReplayableFaultPending_HAL(pGpu, pKernelGmmu, &threadState))
+            {
+                status = _rm_gpu_copy_mmu_faults_unlocked(pGpu, faultsCopied, &threadState);
+            }
+        }
+        else
+        {
+            status = _rm_gpu_copy_mmu_faults_unlocked(pGpu, faultsCopied, &threadState);
        }

        threadStateFreeISRLockless(&threadState, pGpu, THREAD_STATE_FLAGS_IS_ISR_LOCKLESS);
+        tlsIsrDestroy(pIsrAllocator);
+        portMemAllocatorRelease(pIsrAllocator);
    }
-    else
-    {
-        status = rm_gpu_copy_mmu_faults_unlocked(sp, nv, faultsCopied);
-    }
+
+    NV_EXIT_RM_RUNTIME(sp,fp);
    return status;
 }