570.123.07

2026-02-01 22:19:46 +00:00 · 2025-03-25 12:40:01 -07:00
parent 5e6ad2b575
commit 4d941c0b6e
146 changed files with 53927 additions and 54744 deletions
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -189,6 +189,9 @@ struct uvm_service_block_context_struct

    // Prefetch temporary state.
    uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
+
+    // Access counters notification buffer index.
+    NvU32 access_counters_buffer_index;
 };

 typedef struct
@@ -197,8 +200,8 @@ typedef struct
    {
        struct
        {
-            // Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned region
-            // of a SAM VMA. Used for batching ATS faults in a vma.
+            // Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned
+            // region of a SAM VMA. Used for batching ATS faults in a vma.
            uvm_page_mask_t prefetch_only_fault_mask;

            // Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region
@@ -350,7 +353,7 @@ typedef struct
    // entries from the GPU buffer
    NvU32 max_batch_size;

-    struct uvm_replayable_fault_buffer_info_struct
+    struct uvm_replayable_fault_buffer_struct
    {
        // Maximum number of faults entries that can be stored in the buffer
        NvU32 max_faults;
@@ -414,7 +417,7 @@ typedef struct
        uvm_ats_fault_invalidate_t ats_invalidate;
    } replayable;

-    struct uvm_non_replayable_fault_buffer_info_struct
+    struct uvm_non_replayable_fault_buffer_struct
    {
        // Maximum number of faults entries that can be stored in the buffer
        NvU32 max_faults;
@@ -468,7 +471,7 @@ typedef struct

    // Timestamp when prefetch faults where disabled last time
    NvU64 disable_prefetch_faults_timestamp;
-} uvm_fault_buffer_info_t;
+} uvm_fault_buffer_t;

 struct uvm_access_counter_service_batch_context_struct
 {
@@ -476,30 +479,14 @@ struct uvm_access_counter_service_batch_context_struct

    NvU32 num_cached_notifications;

-    struct
-    {
-        uvm_access_counter_buffer_entry_t   **notifications;
+    uvm_access_counter_buffer_entry_t **notifications;

-        NvU32                             num_notifications;
+    NvU32 num_notifications;

-        // Boolean used to avoid sorting the fault batch by instance_ptr if we
-        // determine at fetch time that all the access counter notifications in
-        // the batch report the same instance_ptr
-        bool is_single_instance_ptr;
-    } virt;
-
-    struct
-    {
-        uvm_access_counter_buffer_entry_t    **notifications;
-        uvm_reverse_map_t                      *translations;
-
-        NvU32                              num_notifications;
-
-        // Boolean used to avoid sorting the fault batch by aperture if we
-        // determine at fetch time that all the access counter notifications in
-        // the batch report the same aperture
-        bool                              is_single_aperture;
-    } phys;
+    // Boolean used to avoid sorting the fault batch by instance_ptr if we
+    // determine at fetch time that all the access counter notifications in
+    // the batch report the same instance_ptr
+    bool is_single_instance_ptr;

    // Helper page mask to compute the accessed pages within a VA block
    uvm_page_mask_t accessed_pages;
@@ -514,31 +501,15 @@ struct uvm_access_counter_service_batch_context_struct
    NvU32 batch_id;
 };

-typedef struct
+struct uvm_access_counter_buffer_struct
 {
-    // Values used to configure access counters in RM
-    struct
-    {
-        UVM_ACCESS_COUNTER_GRANULARITY  granularity;
-        UVM_ACCESS_COUNTER_USE_LIMIT    use_limit;
-    } rm;
+    uvm_parent_gpu_t *parent_gpu;

-    // The following values are precomputed by the access counter notification
-    // handling code. See comments for UVM_MAX_TRANSLATION_SIZE in
-    // uvm_gpu_access_counters.c for more details.
-    NvU64 translation_size;
-
-    NvU64 translations_per_counter;
-
-    NvU64 sub_granularity_region_size;
-
-    NvU64 sub_granularity_regions_per_translation;
-} uvm_gpu_access_counter_type_config_t;
-
-typedef struct
-{
    UvmGpuAccessCntrInfo rm_info;

+    // Access counters may have multiple notification buffers.
+    NvU32 index;
+
    NvU32 max_notifications;

    NvU32 max_batch_size;
@@ -560,10 +531,22 @@ typedef struct
    // may override it to try different configuration values.
    struct
    {
-        uvm_gpu_access_counter_type_config_t mimc;
-        uvm_gpu_access_counter_type_config_t momc;
+        // Values used to configure access counters in RM
+        struct
+        {
+            UVM_ACCESS_COUNTER_GRANULARITY granularity;
+        } rm;

-        NvU32                                threshold;
+        // The following values are precomputed by the access counter
+        // notification handling code. See comments for UVM_MAX_TRANSLATION_SIZE
+        // in uvm_gpu_access_counters.c for more details.
+        NvU64 translation_size;
+
+        NvU64 sub_granularity_region_size;
+
+        NvU64 sub_granularity_regions_per_translation;
+
+        NvU32 threshold;
    } current_config;

    // Access counter statistics
@@ -575,7 +558,7 @@ typedef struct
    } stats;

    // Ignoring access counters means that notifications are left in the HW
-    // buffer without being serviced.  Requests to ignore access counters
+    // buffer without being serviced. Requests to ignore access counters
    // are counted since the suspend path inhibits access counter interrupts,
    // and the resume path needs to know whether to reenable them.
    NvU32 notifications_ignored_count;
@@ -583,13 +566,25 @@ typedef struct
    // Context structure used to service a GPU access counter batch
    uvm_access_counter_service_batch_context_t batch_service_context;

-    // VA space that reconfigured the access counters configuration, if any.
-    // Used in builtin tests only, to avoid reconfigurations from different
-    // processes
-    //
-    // Locking: both readers and writers must hold the access counters ISR lock
-    uvm_va_space_t *reconfiguration_owner;
-} uvm_access_counter_buffer_info_t;
+    struct
+    {
+        // VA space that reconfigured the access counters configuration, if any.
+        // Used in builtin tests only, to avoid reconfigurations from different
+        // processes.
+        //
+        // Locking: both readers and writers must hold the access counters ISR
+        // lock.
+        uvm_va_space_t *reconfiguration_owner;
+
+        // The service access counters loop breaks after processing the first
+        // batch. It will be retriggered if there are pending notifications, but
+        // it releases the ISR service lock to check certain races that would be
+        // difficult to hit otherwise.
+        bool one_iteration_per_batch;
+        NvU32 sleep_per_iteration_us;
+    } test;
+
+};

 typedef struct
 {
@@ -745,15 +740,11 @@ struct uvm_gpu_struct

    struct
    {
-        // Mask of peer_gpus set
+        // Mask of peer_gpus set.
        uvm_processor_mask_t peer_gpu_mask;

-        // lazily-populated array of peer GPUs, indexed by the peer's GPU index
-        uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
-
-        // Leaf spinlock used to synchronize access to the peer_gpus table so
-        // that it can be safely accessed from the access counters bottom half
-        uvm_spinlock_t peer_gpus_lock;
+        // Leaf spinlock used to synchronize access to peer_gpu_mask.
+        uvm_spinlock_t peer_gpu_lock;
    } peer_info;

    // Maximum number of subcontexts supported
@@ -828,14 +819,6 @@ struct uvm_gpu_struct
        uvm_bit_locks_t bitlocks;
    } sysmem_mappings;

-    // Reverse lookup table used to query the user mapping associated with a
-    // sysmem (DMA) physical address.
-    //
-    // The system memory mapping information referred to by this field is
-    // different from that of sysmem_mappings, because it relates to user
-    // mappings (instead of kernel), and it is used in most configurations.
-    uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;
-
    struct
    {
        uvm_conf_computing_dma_buffer_pool_t dma_buffer_pool;
@@ -957,6 +940,16 @@ struct uvm_gpu_struct
    uvm_mutex_t device_p2p_lock;
 };

+typedef struct
+{
+    bool access_counters_alloc_buffer;
+    bool access_counters_alloc_block_context;
+    bool isr_access_counters_alloc;
+    bool isr_access_counters_alloc_stats_cpu;
+    bool access_counters_batch_context_notifications;
+    bool access_counters_batch_context_notification_cache;
+} uvm_test_parent_gpu_inject_error_t;
+
 // In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
 // parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
 // (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
@@ -965,8 +958,8 @@ struct uvm_gpu_struct
 struct uvm_parent_gpu_struct
 {
    // Reference count for how many places are holding on to a parent GPU
-    // (internal to the UVM driver).  This includes any GPUs we know about, not
-    // just GPUs that are registered with a VA space.  Most GPUs end up being
+    // (internal to the UVM driver). This includes any GPUs we know about, not
+    // just GPUs that are registered with a VA space. Most GPUs end up being
    // registered, but there are brief periods when they are not registered,
    // such as during interrupt handling, and in add_gpu() or remove_gpu().
    nv_kref_t gpu_kref;
@@ -976,7 +969,7 @@ struct uvm_parent_gpu_struct

    uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];

-    // Bitmap of valid child entries in the gpus[] table.  Used to retrieve a
+    // Bitmap of valid child entries in the gpus[] table. Used to retrieve a
    // usable child GPU in bottom-halves.
    DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);

@@ -1079,11 +1072,6 @@ struct uvm_parent_gpu_struct

    bool access_counters_supported;

-    // If this is true, physical address based access counter notifications are
-    // potentially generated. If false, only virtual address based notifications
-    // are generated (assuming access_counters_supported is true too).
-    bool access_counters_can_use_physical_addresses;
-
    bool fault_cancel_va_supported;

    // True if the GPU has hardware support for scoped atomics
@@ -1205,17 +1193,17 @@ struct uvm_parent_gpu_struct
    // Interrupt handling state and locks
    uvm_isr_info_t isr;

-    // Fault buffer info. This is only valid if supports_replayable_faults is
-    // set to true.
-    uvm_fault_buffer_info_t fault_buffer_info;
+    // This is only valid if supports_replayable_faults is set to true.
+    uvm_fault_buffer_t fault_buffer;

    // PMM lazy free processing queue.
    // TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
    nv_kthread_q_t lazy_free_q;

-    // Access counter buffer info. This is only valid if
-    // supports_access_counters is set to true.
-    uvm_access_counter_buffer_info_t access_counter_buffer_info;
+    // This is only valid if supports_access_counters is set to true. This array
+    // has rm_info.accessCntrBufferCount entries.
+    uvm_access_counter_buffer_t *access_counter_buffer;
+    uvm_mutex_t access_counters_enablement_lock;

    // Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
    NvU32 utlb_per_gpc_count;
@@ -1348,6 +1336,8 @@ struct uvm_parent_gpu_struct
        // GPUs.
        NvU64 base_address;
    } egm;
+
+    uvm_test_parent_gpu_inject_error_t test;
 };

 static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
@@ -1395,10 +1385,10 @@ typedef struct
    //   detected to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was
    //   called.
    //
-    // - The peer_gpus_lock is held on one of the GPUs. In this case, the other
-    //   GPU must be read from the original GPU's peer_gpus table. The fields
-    //   will not change while the lock is held, but they may no longer be valid
-    //   because the other GPU might be in teardown.
+    // - The peer_gpu_lock is held on one of the GPUs. In this case, the other
+    //   GPU must be referred from the original GPU's peer_gpu_mask reference.
+    //   The fields will not change while the lock is held, but they may no
+    //   longer be valid because the other GPU might be in teardown.

    // This field is used to determine when this struct has been initialized
    // (ref_count != 0). NVLink peers are initialized at GPU registration time.
@@ -1510,7 +1500,7 @@ uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
 uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);

 // Like uvm_parent_gpu_get_by_uuid(), but this variant does not assertion-check
-// that the caller is holding the global_lock.  This is a narrower-purpose
+// that the caller is holding the global_lock. This is a narrower-purpose
 // function, and is only intended for use by the top-half ISR, or other very
 // limited cases.
 uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);
@@ -1521,6 +1511,7 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_u
 // LOCKING: Takes and releases the global lock for the caller.
 NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
                                 const uvm_rm_user_object_t *user_rm_device,
+                                 const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
                                 uvm_gpu_t **gpu_out);

 // Retain a gpu which is known to already be retained. Does NOT require the
@@ -1578,10 +1569,6 @@ uvm_gpu_address_t uvm_gpu_peer_copy_address(uvm_gpu_t *owning_gpu, NvU64 address
 // The two GPUs must have different parents.
 NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);

-// Get the processor id accessible by the given GPU for the given physical
-// address.
-uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
-
 // Get the EGM aperture for local_gpu to use to map memory resident on the CPU
 // NUMA node that remote_gpu is attached to.
 // Note that local_gpu can be equal to remote_gpu when memory is resident in
@@ -1655,7 +1642,8 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_

 // Check whether the provided address points to peer memory:
 // * Physical address using one of the PEER apertures
-// * Physical address using SYS aperture that belongs to an exposed coherent memory
+// * Physical address using SYS aperture that belongs to an exposed coherent
+//   memory
 // * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
 bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);

@@ -1684,8 +1672,8 @@ NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu);
 // Check for NVLINK errors without calling into RM
 //
 // Calling into RM is problematic in many places, this check is always safe to
-// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK error
-// and it's required to call uvm_gpu_check_nvlink_error() to be sure.
+// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK
+// error and it's required to call uvm_gpu_check_nvlink_error() to be sure.
 NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu);

 // Map size bytes of contiguous sysmem on the GPU for physical access