565.57.01

2026-04-24 08:19:14 +00:00 · 2024-10-22 17:38:58 +02:00
parent ed4be64962
commit d5a0858f90
1049 changed files with 209491 additions and 167508 deletions
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -49,9 +49,13 @@
 #include <linux/mmu_notifier.h>
 #include "uvm_conf_computing.h"

-// Buffer length to store uvm gpu id, RM device name and gpu uuid.
-#define UVM_GPU_NICE_NAME_BUFFER_LENGTH (sizeof("ID 999: : ") + \
-            UVM_GPU_NAME_LENGTH + UVM_GPU_UUID_TEXT_BUFFER_LENGTH)
+#define UVM_PARENT_GPU_UUID_PREFIX "GPU-"
+#define UVM_GPU_UUID_PREFIX "GI-"
+
+// UVM_UUID_STRING_LENGTH already includes NULL, don't double-count it with
+// sizeof()
+#define UVM_PARENT_GPU_UUID_STRING_LENGTH (sizeof(UVM_PARENT_GPU_UUID_PREFIX) - 1 + UVM_UUID_STRING_LENGTH)
+#define UVM_GPU_UUID_STRING_LENGTH (sizeof(UVM_GPU_UUID_PREFIX) - 1 + UVM_UUID_STRING_LENGTH)

 #define UVM_GPU_MAGIC_VALUE 0xc001d00d12341993ULL

@@ -184,29 +188,45 @@ struct uvm_service_block_context_struct

 typedef struct
 {
-    // Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
-    // VMA. Used for batching ATS faults in a vma. This is unused for access
-    // counter service requests.
-    uvm_page_mask_t read_fault_mask;
+    union
+    {
+        struct
+        {
+            // Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region
+            // of a SAM VMA. Used for batching ATS faults in a vma.
+            uvm_page_mask_t read_fault_mask;

-    // Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
-    // SAM VMA. Used for batching ATS faults in a vma. This is unused for access
-    // counter service requests.
-    uvm_page_mask_t write_fault_mask;
+            // Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region
+            // of a SAM VMA. Used for batching ATS faults in a vma.
+            uvm_page_mask_t write_fault_mask;

-    // Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
-    // of a SAM VMA. Used to return ATS fault status. This is unused for access
-    // counter service requests.
-    uvm_page_mask_t faults_serviced_mask;
+            // Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region
+            // of a SAM VMA. This is a logical or of read_fault_mask and
+            // write_mask.
+            uvm_page_mask_t accessed_mask;

-    // Mask of successfully serviced read faults on pages in write_fault_mask.
-    // This is unused for access counter service requests.
-    uvm_page_mask_t reads_serviced_mask;
+            // Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE
+            // aligned region of a SAM VMA. Used to return ATS fault status.
+            uvm_page_mask_t faults_serviced_mask;

-    // Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
-    // VMA. This is used as input for access counter service requests and output
-    // of fault service requests.
-    uvm_page_mask_t accessed_mask;
+            // Mask of successfully serviced read faults on pages in
+            // write_fault_mask.
+            uvm_page_mask_t reads_serviced_mask;
+
+        } faults;
+
+        struct
+        {
+            // Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region
+            // of a SAM VMA.
+            uvm_page_mask_t accessed_mask;
+
+            // Mask of successfully migrated pages in a UVM_VA_BLOCK_SIZE
+            // aligned region of a SAM VMA.
+            uvm_page_mask_t migrated_mask;
+
+        } access_counters;
+    };

    // Client type of the service requestor.
    uvm_fault_client_type_t client_type;
@@ -633,9 +653,10 @@ struct uvm_gpu_struct
    NvProcessorUuid uuid;

    // Nice printable name in the format:
-    // ID: 999: GPU-<parent_uuid> UVM-GI-<gi_uuid>.
-    // UVM_GPU_UUID_TEXT_BUFFER_LENGTH includes the null character.
-    char name[9 + 2 * UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
+    // ID: 999: GPU-<parent_uuid> GI-<gi_uuid>
+    // UVM_PARENT_GPU_UUID_STRING_LENGTH includes a NULL character but will be
+    // used for a space instead.
+    char name[sizeof("ID: 999: ") - 1 + UVM_PARENT_GPU_UUID_STRING_LENGTH - 1 + 1 + UVM_GPU_UUID_STRING_LENGTH];

    // Refcount of the gpu, i.e. how many times it has been retained. This is
    // roughly a count of how many times it has been registered with a VA space,
@@ -682,6 +703,12 @@ struct uvm_gpu_struct
            bool enabled;
            unsigned int node_id;
        } numa;
+
+        // Physical address of the start of statically mapped fb memory in BAR1
+        NvU64 static_bar1_start;
+
+        // Size of statically mapped fb memory in BAR1.
+        NvU64 static_bar1_size;
    } mem_info;

    struct
@@ -706,9 +733,6 @@ struct uvm_gpu_struct
    struct
    {
        // Mask of peer_gpus set
-        //
-        // We can use a regular processor id because P2P is not allowed between
-        // partitioned GPUs when SMC is enabled
        uvm_processor_mask_t peer_gpu_mask;

        // lazily-populated array of peer GPUs, indexed by the peer's GPU index
@@ -859,16 +883,19 @@ struct uvm_gpu_struct

    struct
    {
+        // "gpus/UVM-GPU-${physical-UUID}/${sub_processor_index}/"
        struct proc_dir_entry *dir;

+        // "gpus/${gpu_id}" -> "UVM-GPU-${physical-UUID}/${sub_processor_index}"
        struct proc_dir_entry *dir_symlink;

-        // The GPU instance UUID symlink if SMC is enabled.
+        // The GPU instance UUID symlink.
+        // "gpus/UVM-GI-${GI-UUID}" ->
+        //     "UVM-GPU-${physical-UUID}/${sub_processor_index}"
        struct proc_dir_entry *gpu_instance_uuid_symlink;

+        // "gpus/UVM-GPU-${physical-UUID}/${sub_processor_index}/info"
        struct proc_dir_entry *info_file;
-
-        struct proc_dir_entry *dir_peers;
    } procfs;

    // Placeholder for per-GPU performance heuristics information
@@ -876,6 +903,13 @@ struct uvm_gpu_struct

    // Force pushbuffer's GPU VA to be >= 1TB; used only for testing purposes.
    bool uvm_test_force_upper_pushbuffer_segment;
+
+    // Have we initialised device p2p pages.
+    bool device_p2p_initialised;
+
+    // Used to protect allocation of p2p_mem and assignment of the page
+    // zone_device_data fields.
+    uvm_mutex_t device_p2p_lock;
 };

 // In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
@@ -905,7 +939,7 @@ struct uvm_parent_gpu_struct
    NvProcessorUuid uuid;

    // Nice printable name including the uvm gpu id, ascii name from RM and uuid
-    char name[UVM_GPU_NICE_NAME_BUFFER_LENGTH];
+    char name[sizeof("ID 999: : ") - 1 + UVM_GPU_NAME_LENGTH + UVM_PARENT_GPU_UUID_STRING_LENGTH];

    // GPU information and provided by RM (architecture, implementation,
    // hardware classes, etc.).
@@ -1087,11 +1121,17 @@ struct uvm_parent_gpu_struct

    struct
    {
+        // "gpus/UVM-GPU-${physical-UUID}/"
        struct proc_dir_entry *dir;

+        // "gpus/UVM-GPU-${physical-UUID}/fault_stats"
        struct proc_dir_entry *fault_stats_file;

+        // "gpus/UVM-GPU-${physical-UUID}/access_counters"
        struct proc_dir_entry *access_counters_file;
+
+        // "gpus/UVM-GPU-${physical-UUID}/peers/"
+        struct proc_dir_entry *dir_peers;
    } procfs;

    // Interrupt handling state and locks
@@ -1239,42 +1279,59 @@ static uvmGpuDeviceHandle uvm_gpu_device_handle(uvm_gpu_t *gpu)
    return gpu->parent->rm_device;
 }

-struct uvm_gpu_peer_struct
+typedef struct
+{
+    // ref_count also controls state maintained in each GPU instance
+    // (uvm_gpu_t). See init_peer_access().
+    NvU64 ref_count;
+} uvm_gpu_peer_t;
+
+typedef struct
 {
    // The fields in this global structure can only be inspected under one of
    // the following conditions:
    //
-    // - The VA space lock is held for either read or write, both GPUs are
-    //   registered in the VA space, and the corresponding bit in the
+    // - The VA space lock is held for either read or write, both parent GPUs
+    //   are registered in the VA space, and the corresponding bit in the
    //   va_space.enabled_peers bitmap is set.
    //
    // - The global lock is held.
    //
-    // - While the global lock was held in the past, the two GPUs were detected
-    //   to be SMC peers and were both retained.
+    // - While the global lock was held in the past, the two parent GPUs were
+    //   both retained.
    //
-    // - While the global lock was held in the past, the two GPUs were detected
-    //   to be NVLINK peers and were both retained.
+    // - While the global lock was held in the past, the two parent GPUs were
+    //   detected to be NVLINK peers and were both retained.
    //
-    // - While the global lock was held in the past, the two GPUs were detected
-    //   to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was called.
+    // - While the global lock was held in the past, the two parent GPUs were
+    //   detected to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was
+    //   called.
    //
    // - The peer_gpus_lock is held on one of the GPUs. In this case, the other
    //   GPU must be read from the original GPU's peer_gpus table. The fields
    //   will not change while the lock is held, but they may no longer be valid
    //   because the other GPU might be in teardown.

-    // Peer Id associated with this device w.r.t. to a peer GPU.
+    // This field is used to determine when this struct has been initialized
+    // (ref_count != 0). NVLink peers are initialized at GPU registration time.
+    // PCIe peers are initialized when retain_pcie_peers_from_uuids() is called.
+    NvU64 ref_count;
+
+    // Saved values from UvmGpuP2PCapsParams to be used after GPU instance
+    // creation. This should be per GPU instance since LCEs are associated with
+    // GPU instances, not parent GPUs, but for now MIG is not supported for
+    // NVLINK peers so RM associates this state with the parent GPUs. This will
+    // need to be revisited if that NVLINK MIG peer support is added.
+    NvU8 optimalNvlinkWriteCEs[2];
+
+    // Peer Id associated with this device with respect to a peer parent GPU.
    // Note: peerId (A -> B) != peerId (B -> A)
    // peer_id[0] from min(gpu_id_1, gpu_id_2) -> max(gpu_id_1, gpu_id_2)
    // peer_id[1] from max(gpu_id_1, gpu_id_2) -> min(gpu_id_1, gpu_id_2)
    NvU8 peer_ids[2];

-    // The link type between the peer GPUs, currently either PCIe or NVLINK.
-    // This field is used to determine the when this peer struct has been
-    // initialized (link_type != UVM_GPU_LINK_INVALID). NVLink peers are
-    // initialized at GPU registration time. PCIe peers are initialized when
-    // the refcount below goes from 0 to 1.
+    // The link type between the peer parent GPUs, currently either PCIe or
+    // NVLINK.
    uvm_gpu_link_type_t link_type;

    // Maximum unidirectional bandwidth between the peers in megabytes per
@@ -1282,10 +1339,6 @@ struct uvm_gpu_peer_struct
    // See UvmGpuP2PCapsParams.
    NvU32 total_link_line_rate_mbyte_per_s;

-    // For PCIe, the number of times that this has been retained by a VA space.
-    // For NVLINK this will always be 1.
-    NvU64 ref_count;
-
    // This handle gets populated when enable_peer_access successfully creates
    // an NV50_P2P object. disable_peer_access resets the same on the object
    // deletion.
@@ -1299,9 +1352,13 @@ struct uvm_gpu_peer_struct
        // GPU-A <-> GPU-B link is bidirectional, pairs[x][0] is always the
        // local GPU, while pairs[x][1] is the remote GPU. The table shall be
        // filled like so: [[GPU-A, GPU-B], [GPU-B, GPU-A]].
-        uvm_gpu_t *pairs[2][2];
+        uvm_parent_gpu_t *pairs[2][2];
    } procfs;
-};
+
+    // Peer-to-peer state for MIG instance pairs between two different parent
+    // GPUs.
+    uvm_gpu_peer_t gpu_peers[UVM_MAX_UNIQUE_SUB_PROCESSOR_PAIRS];
+} uvm_parent_gpu_peer_t;

 // Initialize global gpu state
 NV_STATUS uvm_gpu_init(void);
@@ -1380,12 +1437,12 @@ static NvU64 uvm_gpu_retained_count(uvm_gpu_t *gpu)
    return atomic64_read(&gpu->retained_count);
 }

-// Decrease the refcount on the parent GPU object, and actually delete the object
-// if the refcount hits zero.
+// Decrease the refcount on the parent GPU object, and actually delete the
+// object if the refcount hits zero.
 void uvm_parent_gpu_kref_put(uvm_parent_gpu_t *gpu);

-// Calculates peer table index using GPU ids.
-NvU32 uvm_gpu_peer_table_index(const uvm_gpu_id_t gpu_id0, const uvm_gpu_id_t gpu_id1);
+// Returns a GPU peer pair index in the range [0 .. UVM_MAX_UNIQUE_GPU_PAIRS).
+NvU32 uvm_gpu_pair_index(const uvm_gpu_id_t id0, const uvm_gpu_id_t id1);

 // Either retains an existing PCIe peer entry or creates a new one. In both
 // cases the two GPUs are also each retained.
@@ -1396,35 +1453,26 @@ NV_STATUS uvm_gpu_retain_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
 // LOCKING: requires the global lock to be held
 void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);

+uvm_gpu_link_type_t uvm_parent_gpu_peer_link_type(uvm_parent_gpu_t *parent_gpu0, uvm_parent_gpu_t *parent_gpu1);
+
 // Get the aperture for local_gpu to use to map memory resident on remote_gpu.
 // They must not be the same gpu.
 uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);

+// Return the reference count for the P2P state between the given GPUs.
+// The two GPUs must have different parents.
+NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);
+
 // Get the processor id accessible by the given GPU for the given physical
 // address.
 uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);

-// Get the P2P capabilities between the gpus with the given indexes
-uvm_gpu_peer_t *uvm_gpu_index_peer_caps(const uvm_gpu_id_t gpu_id0, const uvm_gpu_id_t gpu_id1);
+bool uvm_parent_gpus_are_nvswitch_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);

-// Get the P2P capabilities between the given gpus
-static uvm_gpu_peer_t *uvm_gpu_peer_caps(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
+static bool uvm_gpus_are_smc_peers(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
 {
-    return uvm_gpu_index_peer_caps(gpu0->id, gpu1->id);
-}
+    UVM_ASSERT(gpu0 != gpu1);

-static bool uvm_gpus_are_nvswitch_connected(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
-{
-    if (gpu0->parent->nvswitch_info.is_nvswitch_connected && gpu1->parent->nvswitch_info.is_nvswitch_connected) {
-        UVM_ASSERT(uvm_gpu_peer_caps(gpu0, gpu1)->link_type >= UVM_GPU_LINK_NVLINK_2);
-        return true;
-    }
-
-    return false;
-}
-
-static bool uvm_gpus_are_smc_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
-{
    return gpu0->parent == gpu1->parent;
 }

@@ -1595,9 +1643,6 @@ static bool uvm_parent_gpu_needs_proxy_channel_pool(const uvm_parent_gpu_t *pare

 uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu);

-// Debug print of GPU properties
-void uvm_gpu_print(uvm_gpu_t *gpu);
-
 // Add the given instance pointer -> user_channel mapping to this GPU. The
 // bottom half GPU page fault handler uses this to look up the VA space for GPU
 // faults.
@@ -1637,4 +1682,7 @@ typedef enum
    UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
 } uvm_gpu_buffer_flush_mode_t;

+// PCIe BAR containing static framebuffer memory mappings for PCIe P2P
+int uvm_device_p2p_static_bar(uvm_gpu_t *gpu);
+
 #endif // __UVM_GPU_H__