550.40.07

2026-02-04 07:10:19 +00:00 · 2024-01-24 17:51:53 +01:00
parent bb2dac1f20
commit 91676d6628
1411 changed files with 261367 additions and 145959 deletions
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -57,14 +57,16 @@

 typedef struct
 {
-    // Number of faults from this uTLB that have been fetched but have not been serviced yet
+    // Number of faults from this uTLB that have been fetched but have not been
+    // serviced yet.
    NvU32 num_pending_faults;

    // Whether the uTLB contains fatal faults
    bool has_fatal_faults;

-    // We have issued a replay of type START_ACK_ALL while containing fatal faults. This puts
-    // the uTLB in lockdown mode and no new translations are accepted
+    // We have issued a replay of type START_ACK_ALL while containing fatal
+    // faults. This puts the uTLB in lockdown mode and no new translations are
+    // accepted.
    bool in_lockdown;

    // We have issued a cancel on this uTLB
@@ -126,8 +128,8 @@ struct uvm_service_block_context_struct
        struct list_head service_context_list;

        // A mask of GPUs that need to be checked for ECC errors before the CPU
-        // fault handler returns, but after the VA space lock has been unlocked to
-        // avoid the RM/UVM VA space lock deadlocks.
+        // fault handler returns, but after the VA space lock has been unlocked
+        // to avoid the RM/UVM VA space lock deadlocks.
        uvm_processor_mask_t gpus_to_check_for_ecc;

        // This is set to throttle page fault thrashing.
@@ -160,9 +162,9 @@ struct uvm_service_block_context_struct

    struct
    {
-        // Per-processor mask with the pages that will be resident after servicing.
-        // We need one mask per processor because we may coalesce faults that
-        // trigger migrations to different processors.
+        // Per-processor mask with the pages that will be resident after
+        // servicing. We need one mask per processor because we may coalesce
+        // faults that trigger migrations to different processors.
        uvm_page_mask_t new_residency;
    } per_processor_masks[UVM_ID_MAX_PROCESSORS];

@@ -179,23 +181,28 @@ struct uvm_service_block_context_struct
 typedef struct
 {
    // Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
-    // VMA. Used for batching ATS faults in a vma.
+    // VMA. Used for batching ATS faults in a vma. This is unused for access
+    // counter service requests.
    uvm_page_mask_t read_fault_mask;

    // Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
-    // SAM VMA. Used for batching ATS faults in a vma.
+    // SAM VMA. Used for batching ATS faults in a vma. This is unused for access
+    // counter service requests.
    uvm_page_mask_t write_fault_mask;

    // Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
-    // of a SAM VMA. Used to return ATS fault status.
+    // of a SAM VMA. Used to return ATS fault status. This is unused for access
+    // counter service requests.
    uvm_page_mask_t faults_serviced_mask;

    // Mask of successfully serviced read faults on pages in write_fault_mask.
+    // This is unused for access counter service requests.
    uvm_page_mask_t reads_serviced_mask;

-    // Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
-    // SAM VMA. This is used as input to the prefetcher.
-    uvm_page_mask_t faulted_mask;
+    // Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
+    // VMA. This is used as input for access counter service requests and output
+    // of fault service requests.
+    uvm_page_mask_t accessed_mask;

    // Client type of the service requestor.
    uvm_fault_client_type_t client_type;
@@ -294,11 +301,8 @@ struct uvm_fault_service_batch_context_struct

 struct uvm_ats_fault_invalidate_struct
 {
-    // Whether the TLB batch contains any information
-    bool            write_faults_in_batch;
-
-    // Batch of TLB entries to be invalidated
-    uvm_tlb_batch_t write_faults_tlb_batch;
+    bool            tlb_batch_pending;
+    uvm_tlb_batch_t tlb_batch;
 };

 typedef struct
@@ -443,20 +447,9 @@ struct uvm_access_counter_service_batch_context_struct
        NvU32                             num_notifications;

        // Boolean used to avoid sorting the fault batch by instance_ptr if we
-        // determine at fetch time that all the access counter notifications in the
-        // batch report the same instance_ptr
+        // determine at fetch time that all the access counter notifications in
+        // the batch report the same instance_ptr
        bool is_single_instance_ptr;
-
-        // Scratch space, used to generate artificial physically addressed notifications.
-        // Virtual address notifications are always aligned to 64k. This means up to 16
-        // different physical locations could have been accessed to trigger one notification.
-        // The sub-granularity mask can correspond to any of them.
-        struct
-        {
-            uvm_processor_id_t resident_processors[16];
-            uvm_gpu_phys_address_t phys_addresses[16];
-            uvm_access_counter_buffer_entry_t phys_entry;
-        } scratch;
    } virt;

    struct
@@ -467,8 +460,8 @@ struct uvm_access_counter_service_batch_context_struct
        NvU32                              num_notifications;

        // Boolean used to avoid sorting the fault batch by aperture if we
-        // determine at fetch time that all the access counter notifications in the
-        // batch report the same aperture
+        // determine at fetch time that all the access counter notifications in
+        // the batch report the same aperture
        bool                              is_single_aperture;
    } phys;

@@ -478,6 +471,9 @@ struct uvm_access_counter_service_batch_context_struct
    // Structure used to coalesce access counter servicing in a VA block
    uvm_service_block_context_t block_service_context;

+    // Structure used to service access counter migrations in an ATS block.
+    uvm_ats_fault_context_t ats_context;
+
    // Unique id (per-GPU) generated for tools events recording
    NvU32 batch_id;
 };
@@ -610,10 +606,22 @@ typedef enum
    UVM_GPU_PEER_COPY_MODE_COUNT
 } uvm_gpu_peer_copy_mode_t;

+// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
+// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
+// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
+// partitions within the parent. The parent GPU and partition GPU have
+// different "id" and "uuid".
 struct uvm_gpu_struct
 {
    uvm_parent_gpu_t *parent;

+    // The gpu's GI uuid if SMC is enabled; otherwise, a copy of parent->uuid.
+    NvProcessorUuid uuid;
+
+    // Nice printable name in the format: ID: 999: UVM-GPU-<parent_uuid>.
+    // UVM_GPU_UUID_TEXT_BUFFER_LENGTH includes the null character.
+    char name[9 + UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
+
    // Refcount of the gpu, i.e. how many times it has been retained. This is
    // roughly a count of how many times it has been registered with a VA space,
    // except that some paths retain the GPU temporarily without a VA space.
@@ -632,13 +640,9 @@ struct uvm_gpu_struct
    // user can create a lot of va spaces and register the gpu with them).
    atomic64_t retained_count;

-    // A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS); this is a copy
-    // of the parent's id.
+    // A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS).
    uvm_gpu_id_t id;

-    // A unique uvm global_gpu id in range [1, UVM_GLOBAL_ID_MAX_PROCESSORS)
-    uvm_global_gpu_id_t global_id;
-
    // Should be UVM_GPU_MAGIC_VALUE. Used for memory checking.
    NvU64 magic;

@@ -664,8 +668,8 @@ struct uvm_gpu_struct
    struct
    {
        // Big page size used by the internal UVM VA space
-        // Notably it may be different than the big page size used by a user's VA
-        // space in general.
+        // Notably it may be different than the big page size used by a user's
+        // VA space in general.
        NvU32 internal_size;
    } big_page;

@@ -691,8 +695,8 @@ struct uvm_gpu_struct
        // lazily-populated array of peer GPUs, indexed by the peer's GPU index
        uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];

-        // Leaf spinlock used to synchronize access to the peer_gpus table so that
-        // it can be safely accessed from the access counters bottom half
+        // Leaf spinlock used to synchronize access to the peer_gpus table so
+        // that it can be safely accessed from the access counters bottom half
        uvm_spinlock_t peer_gpus_lock;
    } peer_info;

@@ -852,6 +856,11 @@ struct uvm_gpu_struct
    bool uvm_test_force_upper_pushbuffer_segment;
 };

+// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
+// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
+// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
+// partitions within the parent. The parent GPU and partition GPU have
+// different "id" and "uuid".
 struct uvm_parent_gpu_struct
 {
    // Reference count for how many places are holding on to a parent GPU
@@ -864,11 +873,11 @@ struct uvm_parent_gpu_struct
    // The number of uvm_gpu_ts referencing this uvm_parent_gpu_t.
    NvU32 num_retained_gpus;

-    uvm_gpu_t *gpus[UVM_ID_MAX_SUB_PROCESSORS];
+    uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];

    // Bitmap of valid child entries in the gpus[] table.  Used to retrieve a
    // usable child GPU in bottom-halves.
-    DECLARE_BITMAP(valid_gpus, UVM_ID_MAX_SUB_PROCESSORS);
+    DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);

    // The gpu's uuid
    NvProcessorUuid uuid;
@@ -880,8 +889,8 @@ struct uvm_parent_gpu_struct
    // hardware classes, etc.).
    UvmGpuInfo rm_info;

-    // A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS)
-    uvm_gpu_id_t id;
+    // A unique uvm gpu id in range [1, UVM_PARENT_ID_MAX_PROCESSORS)
+    uvm_parent_gpu_id_t id;

    // Reference to the Linux PCI device
    //
@@ -916,12 +925,13 @@ struct uvm_parent_gpu_struct
    // dma_addressable_start (in bifSetupDmaWindow_IMPL()) and hence when
    // referencing sysmem from the GPU, dma_addressable_start should be
    // subtracted from the physical address. The DMA mapping helpers like
-    // uvm_gpu_map_cpu_pages() and uvm_gpu_dma_alloc_page() take care of that.
+    // uvm_parent_gpu_map_cpu_pages() and uvm_parent_gpu_dma_alloc_page() take
+    // care of that.
    NvU64 dma_addressable_start;
    NvU64 dma_addressable_limit;

-    // Total size (in bytes) of physically mapped (with uvm_gpu_map_cpu_pages)
-    // sysmem pages, used for leak detection.
+    // Total size (in bytes) of physically mapped (with
+    // uvm_parent_gpu_map_cpu_pages) sysmem pages, used for leak detection.
    atomic64_t mapped_cpu_pages_size;

    // Hardware Abstraction Layer
@@ -940,7 +950,11 @@ struct uvm_parent_gpu_struct
    // Virtualization mode of the GPU.
    UVM_VIRT_MODE virt_mode;

-    // Whether the GPU can trigger faults on prefetch instructions
+    // Pascal+ GPUs can trigger faults on prefetch instructions. If false, this
+    // feature must be disabled at all times in GPUs of the given architecture.
+    // If true, the feature can be toggled at will by SW.
+    //
+    // The field should not be used unless the GPU supports replayable faults.
    bool prefetch_fault_supported;

    // Number of membars required to flush out HSHUB following a TLB invalidate
@@ -955,6 +969,11 @@ struct uvm_parent_gpu_struct

    bool access_counters_supported;

+    // If this is true, physical address based access counter notifications are
+    // potentially generated. If false, only virtual address based notifications
+    // are generated (assuming access_counters_supported is true too).
+    bool access_counters_can_use_physical_addresses;
+
    bool fault_cancel_va_supported;

    // True if the GPU has hardware support for scoped atomics
@@ -981,6 +1000,10 @@ struct uvm_parent_gpu_struct

    bool plc_supported;

+    // If true, page_tree initialization pre-populates no_ats_ranges. It only
+    // affects ATS systems.
+    bool no_ats_range_required;
+
    // Parameters used by the TLB batching API
    struct
    {
@@ -1052,14 +1075,16 @@ struct uvm_parent_gpu_struct
    // Interrupt handling state and locks
    uvm_isr_info_t isr;

-    // Fault buffer info. This is only valid if supports_replayable_faults is set to true
+    // Fault buffer info. This is only valid if supports_replayable_faults is
+    // set to true.
    uvm_fault_buffer_info_t fault_buffer_info;

    // PMM lazy free processing queue.
    // TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
    nv_kthread_q_t lazy_free_q;

-    // Access counter buffer info. This is only valid if supports_access_counters is set to true
+    // Access counter buffer info. This is only valid if
+    // supports_access_counters is set to true.
    uvm_access_counter_buffer_info_t access_counter_buffer_info;

    // Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
@@ -1109,7 +1134,7 @@ struct uvm_parent_gpu_struct
    uvm_rb_tree_t instance_ptr_table;
    uvm_spinlock_t instance_ptr_table_lock;

-    // This is set to true if the GPU belongs to an SLI group. Else, set to false.
+    // This is set to true if the GPU belongs to an SLI group.
    bool sli_enabled;

    struct
@@ -1136,8 +1161,8 @@ struct uvm_parent_gpu_struct
    // environment, rather than using the peer-id field of the PTE (which can
    // only address 8 gpus), all gpus are assigned a 47-bit physical address
    // space by the fabric manager. Any physical address access to these
-    // physical address spaces are routed through the switch to the corresponding
-    // peer.
+    // physical address spaces are routed through the switch to the
+    // corresponding peer.
    struct
    {
        bool is_nvswitch_connected;
@@ -1175,9 +1200,14 @@ struct uvm_parent_gpu_struct
    } smmu_war;
 };

+static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
+{
+    return parent_gpu->name;
+}
+
 static const char *uvm_gpu_name(uvm_gpu_t *gpu)
 {
-    return gpu->parent->name;
+    return gpu->name;
 }

 static const NvProcessorUuid *uvm_gpu_uuid(uvm_gpu_t *gpu)
@@ -1362,7 +1392,8 @@ void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
 // They must not be the same gpu.
 uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);

-// Get the processor id accessible by the given GPU for the given physical address
+// Get the processor id accessible by the given GPU for the given physical
+// address.
 uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);

 // Get the P2P capabilities between the gpus with the given indexes
@@ -1407,10 +1438,11 @@ static bool uvm_gpus_are_indirect_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
 // mapping covering the passed address, has been previously created.
 static uvm_gpu_address_t uvm_gpu_address_virtual_from_vidmem_phys(uvm_gpu_t *gpu, NvU64 pa)
 {
-    UVM_ASSERT(uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu));
+    UVM_ASSERT(uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent) ||
+               uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent));
    UVM_ASSERT(pa <= gpu->mem_info.max_allocatable_address);

-    if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu))
+    if (uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent))
        UVM_ASSERT(gpu->static_flat_mapping.ready);

    return uvm_gpu_address_virtual(gpu->parent->flat_vidmem_va_base + pa);
@@ -1422,12 +1454,12 @@ static uvm_gpu_address_t uvm_gpu_address_virtual_from_vidmem_phys(uvm_gpu_t *gpu
 //
 // The actual GPU mapping only exists if a linear mapping covering the passed
 // address has been previously created.
-static uvm_gpu_address_t uvm_gpu_address_virtual_from_sysmem_phys(uvm_gpu_t *gpu, NvU64 pa)
+static uvm_gpu_address_t uvm_parent_gpu_address_virtual_from_sysmem_phys(uvm_parent_gpu_t *parent_gpu, NvU64 pa)
 {
-    UVM_ASSERT(uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu));
-    UVM_ASSERT(pa <= (gpu->parent->dma_addressable_limit - gpu->parent->dma_addressable_start));
+    UVM_ASSERT(uvm_mmu_parent_gpu_needs_dynamic_sysmem_mapping(parent_gpu));
+    UVM_ASSERT(pa <= (parent_gpu->dma_addressable_limit - parent_gpu->dma_addressable_start));

-    return uvm_gpu_address_virtual(gpu->parent->flat_sysmem_va_base + pa);
+    return uvm_gpu_address_virtual(parent_gpu->flat_sysmem_va_base + pa);
 }

 // Given a GPU or CPU physical address (not peer), retrieve an address suitable
@@ -1437,11 +1469,12 @@ static uvm_gpu_address_t uvm_gpu_address_copy(uvm_gpu_t *gpu, uvm_gpu_phys_addre
    UVM_ASSERT(phys_addr.aperture == UVM_APERTURE_VID || phys_addr.aperture == UVM_APERTURE_SYS);

    if (phys_addr.aperture == UVM_APERTURE_VID) {
-        if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu))
+        if (uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent) ||
+            uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent))
            return uvm_gpu_address_virtual_from_vidmem_phys(gpu, phys_addr.address);
    }
-    else if (uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu)) {
-        return uvm_gpu_address_virtual_from_sysmem_phys(gpu, phys_addr.address);
+    else if (uvm_mmu_parent_gpu_needs_dynamic_sysmem_mapping(gpu->parent)) {
+        return uvm_parent_gpu_address_virtual_from_sysmem_phys(gpu->parent, phys_addr.address);
    }

    return uvm_gpu_address_from_phys(phys_addr);
@@ -1459,9 +1492,9 @@ NV_STATUS uvm_gpu_check_ecc_error(uvm_gpu_t *gpu);

 // Check for ECC errors without calling into RM
 //
-// Calling into RM is problematic in many places, this check is always safe to do.
-// Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error and
-// it's required to call uvm_gpu_check_ecc_error() to be sure.
+// Calling into RM is problematic in many places, this check is always safe to
+// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error
+// and it's required to call uvm_gpu_check_ecc_error() to be sure.
 NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);

 // Map size bytes of contiguous sysmem on the GPU for physical access
@@ -1470,19 +1503,19 @@ NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
 //
 // Returns the physical address of the pages that can be used to access them on
 // the GPU.
-NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
+NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);

-// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
-void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
+// Unmap num_pages pages previously mapped with uvm_parent_gpu_map_cpu_pages().
+void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);

-static NV_STATUS uvm_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
+static NV_STATUS uvm_parent_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
 {
-    return uvm_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
+    return uvm_parent_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
 }

-static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
+static void uvm_parent_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
 {
-    uvm_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
+    uvm_parent_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
 }

 // Allocate and map a page of system DMA memory on the GPU for physical access
@@ -1491,13 +1524,13 @@ static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addre
 // - the address of the page that can be used to access them on
 //   the GPU in the dma_address_out parameter.
 // - the address of allocated memory in CPU virtual address space.
-void *uvm_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
-                             gfp_t gfp_flags,
-                             NvU64 *dma_address_out);
+void *uvm_parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
+                                    gfp_t gfp_flags,
+                                    NvU64 *dma_address_out);

 // Unmap and free size bytes of contiguous sysmem DMA previously allocated
-// with uvm_gpu_map_cpu_pages().
-void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);
+// with uvm_parent_gpu_map_cpu_pages().
+void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);

 // Returns whether the given range is within the GPU's addressable VA ranges.
 // It requires the input 'addr' to be in canonical form for platforms compliant
@@ -1518,6 +1551,8 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
 // The GPU must be initialized before calling this function.
 bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);

+bool uvm_platform_uses_canonical_form_address(void);
+
 // Returns addr's canonical form for host systems that use canonical form
 // addresses.
 NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
@@ -1527,47 +1562,49 @@ static bool uvm_parent_gpu_is_coherent(const uvm_parent_gpu_t *parent_gpu)
    return parent_gpu->system_bus.memory_window_end > parent_gpu->system_bus.memory_window_start;
 }

-static bool uvm_gpu_has_pushbuffer_segments(uvm_gpu_t *gpu)
+static bool uvm_parent_gpu_needs_pushbuffer_segments(uvm_parent_gpu_t *parent_gpu)
 {
-    return gpu->parent->max_host_va > (1ull << 40);
+    return parent_gpu->max_host_va > (1ull << 40);
 }

-static bool uvm_gpu_supports_eviction(uvm_gpu_t *gpu)
+static bool uvm_parent_gpu_supports_eviction(uvm_parent_gpu_t *parent_gpu)
 {
    // Eviction is supported only if the GPU supports replayable faults
-    return gpu->parent->replayable_faults_supported;
+    return parent_gpu->replayable_faults_supported;
 }

-static bool uvm_gpu_is_virt_mode_sriov_heavy(const uvm_gpu_t *gpu)
+static bool uvm_parent_gpu_is_virt_mode_sriov_heavy(const uvm_parent_gpu_t *parent_gpu)
 {
-    return gpu->parent->virt_mode == UVM_VIRT_MODE_SRIOV_HEAVY;
+    return parent_gpu->virt_mode == UVM_VIRT_MODE_SRIOV_HEAVY;
 }

-static bool uvm_gpu_is_virt_mode_sriov_standard(const uvm_gpu_t *gpu)
+static bool uvm_parent_gpu_is_virt_mode_sriov_standard(const uvm_parent_gpu_t *parent_gpu)
 {
-    return gpu->parent->virt_mode == UVM_VIRT_MODE_SRIOV_STANDARD;
+    return parent_gpu->virt_mode == UVM_VIRT_MODE_SRIOV_STANDARD;
 }

 // Returns true if the virtualization mode is SR-IOV heavy or SR-IOV standard.
-static bool uvm_gpu_is_virt_mode_sriov(const uvm_gpu_t *gpu)
+static bool uvm_parent_gpu_is_virt_mode_sriov(const uvm_parent_gpu_t *parent_gpu)
 {
-    return uvm_gpu_is_virt_mode_sriov_heavy(gpu) || uvm_gpu_is_virt_mode_sriov_standard(gpu);
+    return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu) ||
+           uvm_parent_gpu_is_virt_mode_sriov_standard(parent_gpu);
 }

-static bool uvm_gpu_uses_proxy_channel_pool(const uvm_gpu_t *gpu)
+static bool uvm_parent_gpu_needs_proxy_channel_pool(const uvm_parent_gpu_t *parent_gpu)
 {
-    return uvm_gpu_is_virt_mode_sriov_heavy(gpu);
+    return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu);
 }

-uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu);
+uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu);

 // Debug print of GPU properties
 void uvm_gpu_print(uvm_gpu_t *gpu);

-// Add the given instance pointer -> user_channel mapping to this GPU. The bottom
-// half GPU page fault handler uses this to look up the VA space for GPU faults.
-NV_STATUS uvm_gpu_add_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
-void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
+// Add the given instance pointer -> user_channel mapping to this GPU. The
+// bottom half GPU page fault handler uses this to look up the VA space for GPU
+// faults.
+NV_STATUS uvm_parent_gpu_add_user_channel(uvm_parent_gpu_t *parent_gpu, uvm_user_channel_t *user_channel);
+void uvm_parent_gpu_remove_user_channel(uvm_parent_gpu_t *parent_gpu, uvm_user_channel_t *user_channel);

 // Looks up an entry added by uvm_gpu_add_user_channel. Return codes:
 //  NV_OK                        Translation successful
@@ -1578,13 +1615,13 @@ void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channe
 // out_va_space is valid if NV_OK is returned, otherwise it's NULL. The caller
 // is responsibile for ensuring that the returned va_space can't be destroyed,
 // so these functions should only be called from the bottom half.
-NV_STATUS uvm_gpu_fault_entry_to_va_space(uvm_gpu_t *gpu,
-                                          uvm_fault_buffer_entry_t *fault,
-                                          uvm_va_space_t **out_va_space);
+NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
+                                                 uvm_fault_buffer_entry_t *fault,
+                                                 uvm_va_space_t **out_va_space);

-NV_STATUS uvm_gpu_access_counter_entry_to_va_space(uvm_gpu_t *gpu,
-                                                   uvm_access_counter_buffer_entry_t *entry,
-                                                   uvm_va_space_t **out_va_space);
+NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
+                                                          uvm_access_counter_buffer_entry_t *entry,
+                                                          uvm_va_space_t **out_va_space);

 typedef enum
 {