570.123.07

2026-02-09 17:50:00 +00:00 · 2025-03-25 12:40:01 -07:00
parent 5e6ad2b575
commit 4d941c0b6e
146 changed files with 53927 additions and 54744 deletions
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -70,8 +70,8 @@ typedef struct

    struct
    {
-        // Number of the bottom-half invocations for this interrupt on a GPU over
-        // its lifetime
+        // Number of the bottom-half invocations for this interrupt on a GPU
+        // over its lifetime.
        NvU64 bottom_half_count;

        // A bitmask of the CPUs on which the bottom half has executed. The
@@ -110,20 +110,20 @@ typedef struct
    // bottom-half per interrupt type.
    nv_kthread_q_t bottom_half_q;

-    // Protects the state of interrupts (enabled/disabled) and whether the GPU is
-    // currently handling them. Taken in both interrupt and process context.
+    // Protects the state of interrupts (enabled/disabled) and whether the GPU
+    // is currently handling them. Taken in both interrupt and process context.
    uvm_spinlock_irqsave_t interrupts_lock;

    uvm_intr_handler_t replayable_faults;
    uvm_intr_handler_t non_replayable_faults;
-    uvm_intr_handler_t access_counters;
+    uvm_intr_handler_t *access_counters;

    // Kernel thread used to kill channels on fatal non-replayable faults.
    // This is needed because we cannot call into RM from the bottom-half to
    // avoid deadlocks.
    nv_kthread_q_t kill_channel_q;

-    // Number of top-half ISRs called for this GPU over its lifetime
+    // Number of top-half ISRs called for this GPU over its lifetime.
    NvU64 interrupt_count;
 } uvm_isr_info_t;

@@ -133,7 +133,7 @@ NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);
 // Initialize ISR handling state
 NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);

-// Flush any currently scheduled bottom halves.  This is called during GPU
+// Flush any currently scheduled bottom halves. This is called during GPU
 // removal.
 void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);

@@ -146,7 +146,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
 void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);

 // Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
-// half thread.  This will also disable replayable page fault interrupts (if
+// half thread. This will also disable replayable page fault interrupts (if
 // supported by the GPU) because the top half attempts to take this lock, and we
 // would cause an interrupt storm if we didn't disable them first.
 //
@@ -154,49 +154,48 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
 void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);

 // Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
-// re-enable replayable page fault interrupts.  Unlike
-// uvm_parent_gpu_replayable_faults_isr_lock(), which should only called from
+// re-enable replayable page fault interrupts. Unlike
+// uvm_parent_gpu_replayable_faults_isr_lock(), which should only be called from
 // non-top/bottom half threads, this can be called by any thread.
 void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);

 // Lock/unlock routines for non-replayable faults. These do not need to prevent
 // interrupt storms since the GPU fault buffers for non-replayable faults are
-// managed by RM.  Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
+// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
 // under the parent need to have been previously retained.
 void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
 void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);

 // See uvm_parent_gpu_replayable_faults_isr_lock/unlock
-void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
-void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
+void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters);
+void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters);

 // Increments the reference count tracking whether access counter interrupts
 // should be disabled. The caller is guaranteed that access counter interrupts
 // are disabled upon return. Interrupts might already be disabled prior to
 // making this call. Each call is ref-counted, so this must be paired with a
-// call to uvm_parent_gpu_access_counters_intr_enable().
+// call to uvm_access_counters_intr_enable().
 //
 // parent_gpu->isr.interrupts_lock must be held to call this function.
-void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
+void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters);

 // Decrements the reference count tracking whether access counter interrupts
 // should be disabled. Only once the count reaches 0 are the HW interrupts
 // actually enabled, so this call does not guarantee that the interrupts have
 // been re-enabled upon return.
 //
-// uvm_parent_gpu_access_counters_intr_disable() must have been called prior to
-// calling this function.
+// uvm_access_counters_intr_disable() must have been called prior to calling
+// this function.
 //
 // NOTE: For pulse-based interrupts, the caller is responsible for re-arming
 // the interrupt.
 //
 // parent_gpu->isr.interrupts_lock must be held to call this function.
-void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
+void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters);

 // Return the first valid GPU given the parent GPU or NULL if no MIG instances
 // are registered. This should only be called from bottom halves or if the
 // g_uvm_global.global_lock is held so that the returned pointer remains valid.
-//
 uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu);

 #endif // __UVM_GPU_ISR_H__