560.28.03

2026-01-31 13:39:47 +00:00 · 2024-07-19 15:45:15 -07:00
parent 5fdf5032fb
commit 448d5cc656
859 changed files with 165424 additions and 91129 deletions
--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
@@ -81,6 +81,8 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
            return UVM_GPU_LINK_NVLINK_3;
        case UVM_LINK_TYPE_NVLINK_4:
            return UVM_GPU_LINK_NVLINK_4;
+        case UVM_LINK_TYPE_NVLINK_5:
+            return UVM_GPU_LINK_NVLINK_5;
        case UVM_LINK_TYPE_C2C:
            return UVM_GPU_LINK_C2C;
        default:
@@ -460,7 +462,7 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
 static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
 {

-    BUILD_BUG_ON(UVM_GPU_LINK_MAX != 7);
+    BUILD_BUG_ON(UVM_GPU_LINK_MAX != 8);

    switch (link_type) {
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_INVALID);
@@ -469,6 +471,7 @@ static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_2);
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_3);
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_4);
+        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_5);
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_C2C);
        UVM_ENUM_STRING_DEFAULT();
    }
@@ -1679,12 +1682,9 @@ static void remove_gpu(uvm_gpu_t *gpu)
    // TODO: Bug 2008200: Add and remove the GPU in a more reasonable spot.
    uvm_conf_computing_gpu_deinit(gpu);

-    // TODO: Bug 2844714: If the parent is not being freed, the following
-    // gpu_table_lock is only needed to protect concurrent
-    // find_first_valid_gpu() in BH from the __clear_bit here. After
-    // find_first_valid_gpu() is removed, gpu_table_lock should only be acquired
-    // and released in the free_parent case.
-    //
+    // If the parent is not being freed, the following gpu_table_lock is only
+    // needed to protect concurrent uvm_parent_gpu_find_first_valid_gpu() in BH
+    // from the __clear_bit here.
    // In the free_parent case, gpu_table_lock protects the top half from the
    // uvm_global_remove_parent_gpu()
    uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
@@ -2262,18 +2262,6 @@ static void set_optimal_p2p_write_ces(const UvmGpuP2PCapsParams *p2p_caps_params
    ce0 = p2p_caps_params->optimalNvlinkWriteCEs[sorted ? 0 : 1];
    ce1 = p2p_caps_params->optimalNvlinkWriteCEs[sorted ? 1 : 0];

-    // Indirect peers communicate through the CPU, so the optimal CE
-    // should match the one selected for writing to system memory
-    if (peer_caps->is_indirect_peer) {
-        uvm_channel_pool_t *pool;
-
-        pool = gpu0->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
-        UVM_ASSERT(ce0 == pool->engine_index);
-
-        pool = gpu1->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
-        UVM_ASSERT(ce1 == pool->engine_index);
-    }
-
    uvm_channel_manager_set_p2p_ce(gpu0->channel_manager, gpu1, ce0);
    uvm_channel_manager_set_p2p_ce(gpu1->channel_manager, gpu0, ce1);
 }
@@ -2369,66 +2357,45 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
    peer_caps->total_link_line_rate_mbyte_per_s = p2p_caps_params->totalLinkLineRateMBps;

    // Initialize peer ids and establish peer mappings
-    peer_caps->is_indirect_peer = (p2p_caps_params->indirectAccess == NV_TRUE);
+    // Peer id from min(gpu_id0, gpu_id1) -> max(gpu_id0, gpu_id1)
+    peer_caps->peer_ids[0] = p2p_caps_params->peerIds[0];

-    if (peer_caps->is_indirect_peer) {
-        UVM_ASSERT(gpu0->mem_info.numa.enabled);
-        UVM_ASSERT(gpu1->mem_info.numa.enabled);
+    // Peer id from max(gpu_id0, gpu_id1) -> min(gpu_id0, gpu_id1)
+    peer_caps->peer_ids[1] = p2p_caps_params->peerIds[1];

-        status = uvm_pmm_gpu_indirect_peer_init(&gpu0->pmm, gpu1);
-        if (status != NV_OK)
-            return status;
+    // Establish peer mappings from each GPU to the other.
+    status = uvm_mmu_create_peer_identity_mappings(gpu0, gpu1);
+    if (status != NV_OK)
+        return status;

-        status = uvm_pmm_gpu_indirect_peer_init(&gpu1->pmm, gpu0);
-        if (status != NV_OK)
-            return status;
+    status = uvm_mmu_create_peer_identity_mappings(gpu1, gpu0);
+    if (status != NV_OK)
+        return status;

-        set_optimal_p2p_write_ces(p2p_caps_params, peer_caps, gpu0, gpu1);
-        UVM_ASSERT(peer_caps->total_link_line_rate_mbyte_per_s == 0);
-    }
-    else {
-        // Peer id from min(gpu_id0, gpu_id1) -> max(gpu_id0, gpu_id1)
-        peer_caps->peer_ids[0] = p2p_caps_params->peerIds[0];
+    set_optimal_p2p_write_ces(p2p_caps_params, peer_caps, gpu0, gpu1);

-        // Peer id from max(gpu_id0, gpu_id1) -> min(gpu_id0, gpu_id1)
-        peer_caps->peer_ids[1] = p2p_caps_params->peerIds[1];
+    UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
+    UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);

-        // Establish peer mappings from each GPU to the other. Indirect peers
-        // do not require identity mappings since they use sysmem aperture to
-        // communicate.
-        status = uvm_mmu_create_peer_identity_mappings(gpu0, gpu1);
-        if (status != NV_OK)
-            return status;
+    // In the case of NVLINK peers, this initialization will happen during
+    // add_gpu. As soon as the peer info table is assigned below, the access
+    // counter bottom half could start operating on the GPU being newly
+    // added and inspecting the peer caps, so all of the appropriate
+    // initialization must happen before this point.
+    uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);

-        status = uvm_mmu_create_peer_identity_mappings(gpu1, gpu0);
-        if (status != NV_OK)
-            return status;
+    uvm_processor_mask_set(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
+    UVM_ASSERT(gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] == NULL);
+    gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = gpu1;

-        set_optimal_p2p_write_ces(p2p_caps_params, peer_caps, gpu0, gpu1);
+    uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
+    uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);

-        UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
-        UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
+    uvm_processor_mask_set(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
+    UVM_ASSERT(gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] == NULL);
+    gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = gpu0;

-        // In the case of NVLINK peers, this initialization will happen during
-        // add_gpu. As soon as the peer info table is assigned below, the access
-        // counter bottom half could start operating on the GPU being newly
-        // added and inspecting the peer caps, so all of the appropriate
-        // initialization must happen before this point.
-        uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
-
-        uvm_processor_mask_set(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
-        UVM_ASSERT(gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] == NULL);
-        gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = gpu1;
-
-        uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
-        uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
-
-        uvm_processor_mask_set(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
-        UVM_ASSERT(gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] == NULL);
-        gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = gpu0;
-
-        uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
-    }
+    uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);

    return init_procfs_peer_files(gpu0, gpu1);
 }
@@ -2496,7 +2463,6 @@ static NV_STATUS enable_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
        goto cleanup;

    // Sanity checks
-    UVM_ASSERT(p2p_caps_params.indirectAccess == NV_FALSE);
    UVM_ASSERT(p2p_caps_params.p2pLink == UVM_LINK_TYPE_PCIE);

    status = init_peer_access(gpu0, gpu1, &p2p_caps_params, peer_caps);
@@ -2526,29 +2492,26 @@ static NV_STATUS enable_nvlink_peer_access(uvm_gpu_t *gpu0,
    UVM_ASSERT(peer_caps->ref_count == 0);
    peer_caps->ref_count = 1;

-    if (!p2p_caps_params->indirectAccess) {
-        // Create P2P object for direct NVLink peers
-        status = create_p2p_object(gpu0, gpu1, &p2p_handle);
-        if (status != NV_OK) {
-            UVM_ERR_PRINT("failed to create a P2P object with error: %s, for GPU1:%s and GPU2:%s \n",
-                           nvstatusToString(status),
-                           uvm_gpu_name(gpu0),
-                           uvm_gpu_name(gpu1));
-            return status;
-        }
-
-        UVM_ASSERT(p2p_handle != 0);
-
-        // Store the handle in the global table.
-        peer_caps->p2p_handle = p2p_handle;
-
-        // Update p2p caps after p2p object creation as it generates the peer
-        // ids
-        status = get_p2p_caps(gpu0, gpu1, p2p_caps_params);
-        if (status != NV_OK)
-            goto cleanup;
+    // Create P2P object for direct NVLink peers
+    status = create_p2p_object(gpu0, gpu1, &p2p_handle);
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("failed to create a P2P object with error: %s, for GPU1:%s and GPU2:%s \n",
+                       nvstatusToString(status),
+                       uvm_gpu_name(gpu0),
+                       uvm_gpu_name(gpu1));
+        return status;
    }

+    UVM_ASSERT(p2p_handle != 0);
+
+    // Store the handle in the global table.
+    peer_caps->p2p_handle = p2p_handle;
+
+    // Update p2p caps after p2p object creation as it generates the peer ids.
+    status = get_p2p_caps(gpu0, gpu1, p2p_caps_params);
+    if (status != NV_OK)
+        goto cleanup;
+
    status = init_peer_access(gpu0, gpu1, p2p_caps_params, peer_caps);
    if (status != NV_OK)
        goto cleanup;
@@ -2583,11 +2546,6 @@ static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu)
        if (p2p_caps_params.p2pLink == UVM_LINK_TYPE_NONE || p2p_caps_params.p2pLink == UVM_LINK_TYPE_PCIE)
            continue;

-        // Indirect peers are only supported when onlined as NUMA nodes, because
-        // we want to use vm_insert_page and dma_map_page.
-        if (p2p_caps_params.indirectAccess && (!gpu->mem_info.numa.enabled || !other_gpu->mem_info.numa.enabled))
-            continue;
-
        status = enable_nvlink_peer_access(gpu, other_gpu, &p2p_caps_params);
        if (status != NV_OK)
            goto cleanup;
@@ -2676,32 +2634,25 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
        deinit_procfs_peer_cap_files(peer_caps);

    p2p_handle = peer_caps->p2p_handle;
+    UVM_ASSERT(p2p_handle);

-    if (peer_caps->is_indirect_peer) {
-        uvm_pmm_gpu_indirect_peer_destroy(&gpu0->pmm, gpu1);
-        uvm_pmm_gpu_indirect_peer_destroy(&gpu1->pmm, gpu0);
-    }
-    else {
-        UVM_ASSERT(p2p_handle);
+    uvm_mmu_destroy_peer_identity_mappings(gpu0, gpu1);
+    uvm_mmu_destroy_peer_identity_mappings(gpu1, gpu0);

-        uvm_mmu_destroy_peer_identity_mappings(gpu0, gpu1);
-        uvm_mmu_destroy_peer_identity_mappings(gpu1, gpu0);
+    uvm_rm_locked_call_void(nvUvmInterfaceP2pObjectDestroy(uvm_global_session_handle(), p2p_handle));

-        uvm_rm_locked_call_void(nvUvmInterfaceP2pObjectDestroy(uvm_global_session_handle(), p2p_handle));
+    UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
+    UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);

-        UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
-        UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
+    uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
+    uvm_processor_mask_clear(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
+    gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = NULL;
+    uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);

-        uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
-        uvm_processor_mask_clear(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
-        gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = NULL;
-        uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
-
-        uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
-        uvm_processor_mask_clear(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
-        gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = NULL;
-        uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
-    }
+    uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
+    uvm_processor_mask_clear(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
+    gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = NULL;
+    uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);

    // Flush the access counter buffer to avoid getting stale notifications for
    // accesses to GPUs to which peer access is being disabled. This is also
@@ -2741,10 +2692,6 @@ static uvm_aperture_t uvm_gpu_peer_caps_aperture(uvm_gpu_peer_t *peer_caps, uvm_
 {
    size_t peer_index;

-    // Indirect peers are accessed as sysmem addresses
-    if (peer_caps->is_indirect_peer)
-        return UVM_APERTURE_SYS;
-
    // MIG instances in the same physical GPU have vidmem addresses
    if (local_gpu->parent == remote_gpu->parent)
        return UVM_APERTURE_VID;
@@ -2795,6 +2742,7 @@ uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_p
    for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
        uvm_gpu_t *other_gpu = gpu->peer_info.peer_gpus[uvm_id_gpu_index(id)];
        UVM_ASSERT(other_gpu);
+        UVM_ASSERT(!uvm_gpus_are_smc_peers(gpu, other_gpu));

        if (uvm_gpus_are_nvswitch_connected(gpu, other_gpu)) {
            // NVSWITCH connected systems use an extended physical address to
@@ -2831,7 +2779,7 @@ static NvU64 instance_ptr_to_key(uvm_gpu_phys_address_t instance_ptr)

    // Instance pointers must be 4k aligned and they must have either VID or SYS
    // apertures. Compress them as much as we can both to guarantee that the key
-    // fits within 64 bits, and to make the table as shallow as possible.
+    // fits within 64 bits, and to make the key space as small as possible.
    UVM_ASSERT(IS_ALIGNED(instance_ptr.address, UVM_PAGE_SIZE_4K));
    UVM_ASSERT(instance_ptr.aperture == UVM_APERTURE_VID || instance_ptr.aperture == UVM_APERTURE_SYS);

@@ -2848,7 +2796,7 @@ static NV_STATUS parent_gpu_add_user_channel_subctx_info(uvm_parent_gpu_t *paren
    uvm_rb_tree_node_t *channel_tree_node;
    uvm_user_channel_subctx_info_t *channel_subctx_info;
    uvm_user_channel_subctx_info_t *new_channel_subctx_info = NULL;
-    uvm_va_space_t *va_space = user_channel->gpu_va_space->va_space;
+    uvm_gpu_va_space_t *gpu_va_space = user_channel->gpu_va_space;

    if (!user_channel->in_subctx)
        return NV_OK;
@@ -2892,21 +2840,21 @@ static NV_STATUS parent_gpu_add_user_channel_subctx_info(uvm_parent_gpu_t *paren

    user_channel->subctx_info = channel_subctx_info;

-    // Register the VA space of the channel subcontext info descriptor, or
+    // Register the GPU VA space of the channel subcontext info descriptor, or
    // check that the existing one matches the channel's
    if (channel_subctx_info->subctxs[user_channel->subctx_id].refcount++ > 0) {
-        UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].va_space == va_space,
-                       "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected VA space 0x%llx but got 0x%llx instead\n",
+        UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space == gpu_va_space,
+                       "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected GPU VA space 0x%llx but got 0x%llx instead\n",
                       user_channel->hw_runlist_id,
                       user_channel->hw_channel_id,
                       instance_ptr.address,
                       uvm_aperture_string(instance_ptr.aperture),
                       user_channel->subctx_id,
                       user_channel->tsg.id,
-                       (NvU64)va_space,
-                       (NvU64)channel_subctx_info->subctxs[user_channel->subctx_id].va_space);
-        UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].va_space != NULL,
-                       "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: VA space is NULL\n",
+                       (NvU64)gpu_va_space,
+                       (NvU64)channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space);
+        UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space != NULL,
+                       "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: GPU VA space is NULL\n",
                       user_channel->hw_runlist_id,
                       user_channel->hw_channel_id,
                       instance_ptr.address,
@@ -2923,17 +2871,17 @@ static NV_STATUS parent_gpu_add_user_channel_subctx_info(uvm_parent_gpu_t *paren
                       user_channel->tsg.id);
    }
    else {
-        UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].va_space == NULL,
-                       "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected VA space NULL but got 0x%llx instead\n",
+        UVM_ASSERT_MSG(channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space == NULL,
+                       "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected GPU VA space NULL but got 0x%llx instead\n",
                       user_channel->hw_runlist_id,
                       user_channel->hw_channel_id,
                       instance_ptr.address,
                       uvm_aperture_string(instance_ptr.aperture),
                       user_channel->subctx_id,
                       user_channel->tsg.id,
-                       (NvU64)channel_subctx_info->subctxs[user_channel->subctx_id].va_space);
+                       (NvU64)channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space);

-        channel_subctx_info->subctxs[user_channel->subctx_id].va_space = va_space;
+        channel_subctx_info->subctxs[user_channel->subctx_id].gpu_va_space = gpu_va_space;
    }

    ++channel_subctx_info->total_refcount;
@@ -2957,7 +2905,7 @@ static void parent_gpu_remove_user_channel_subctx_info_locked(uvm_parent_gpu_t *
                                                              uvm_user_channel_t *user_channel)
 {
    uvm_gpu_phys_address_t instance_ptr = user_channel->instance_ptr.addr;
-    uvm_va_space_t *va_space = user_channel->gpu_va_space->va_space;
+    uvm_gpu_va_space_t *gpu_va_space = user_channel->gpu_va_space;

    uvm_assert_spinlock_locked(&parent_gpu->instance_ptr_table_lock);

@@ -2986,16 +2934,17 @@ static void parent_gpu_remove_user_channel_subctx_info_locked(uvm_parent_gpu_t *
                   user_channel->subctx_id,
                   user_channel->tsg.id);

-    UVM_ASSERT_MSG(user_channel->subctx_info->subctxs[user_channel->subctx_id].va_space == va_space,
-                   "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: expected VA space 0x%llx but got 0x%llx instead\n",
+    UVM_ASSERT_MSG(user_channel->subctx_info->subctxs[user_channel->subctx_id].gpu_va_space == gpu_va_space,
+                   "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: "
+                   "expected GPU VA space 0x%llx but got 0x%llx instead\n",
                   user_channel->hw_runlist_id,
                   user_channel->hw_channel_id,
                   instance_ptr.address,
                   uvm_aperture_string(instance_ptr.aperture),
                   user_channel->subctx_id,
                   user_channel->tsg.id,
-                   (NvU64)va_space,
-                   (NvU64)user_channel->subctx_info->subctxs[user_channel->subctx_id].va_space);
+                   (NvU64)gpu_va_space,
+                   (NvU64)user_channel->subctx_info->subctxs[user_channel->subctx_id].gpu_va_space);

    UVM_ASSERT_MSG(user_channel->subctx_info->total_refcount > 0,
                   "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: TSG refcount is 0\n",
@@ -3008,7 +2957,7 @@ static void parent_gpu_remove_user_channel_subctx_info_locked(uvm_parent_gpu_t *

    // Decrement VA space refcount. If it gets to zero, unregister the pointer
    if (--user_channel->subctx_info->subctxs[user_channel->subctx_id].refcount == 0)
-        user_channel->subctx_info->subctxs[user_channel->subctx_id].va_space = NULL;
+        user_channel->subctx_info->subctxs[user_channel->subctx_id].gpu_va_space = NULL;

    if (--user_channel->subctx_info->total_refcount == 0) {
        uvm_rb_tree_remove(&parent_gpu->tsg_table, &user_channel->subctx_info->node);
@@ -3091,7 +3040,7 @@ static uvm_user_channel_t *instance_ptr_to_user_channel(uvm_parent_gpu_t *parent
    return get_user_channel(instance_node);
 }

-static uvm_va_space_t *user_channel_and_subctx_to_va_space(uvm_user_channel_t *user_channel, NvU32 subctx_id)
+static uvm_gpu_va_space_t *user_channel_and_subctx_to_gpu_va_space(uvm_user_channel_t *user_channel, NvU32 subctx_id)
 {
    uvm_user_channel_subctx_info_t *channel_subctx_info;

@@ -3119,28 +3068,31 @@ static uvm_va_space_t *user_channel_and_subctx_to_va_space(uvm_user_channel_t *u
    // uncleanly and work from that subcontext continues running with work from
    // other subcontexts.
    if (channel_subctx_info->subctxs[subctx_id].refcount == 0) {
-        UVM_ASSERT(channel_subctx_info->subctxs[subctx_id].va_space == NULL);
+        UVM_ASSERT(channel_subctx_info->subctxs[subctx_id].gpu_va_space == NULL);
    }
    else {
-        UVM_ASSERT_MSG(channel_subctx_info->subctxs[subctx_id].va_space,
-                       "instance_ptr {0x%llx:%s} in TSG %u: no VA space for SubCTX %u\n",
+        UVM_ASSERT_MSG(channel_subctx_info->subctxs[subctx_id].gpu_va_space,
+                       "instance_ptr {0x%llx:%s} in TSG %u: no GPU VA space for SubCTX %u\n",
                       user_channel->instance_ptr.addr.address,
                       uvm_aperture_string(user_channel->instance_ptr.addr.aperture),
                       user_channel->tsg.id,
                       subctx_id);
    }

-    return channel_subctx_info->subctxs[subctx_id].va_space;
+    return channel_subctx_info->subctxs[subctx_id].gpu_va_space;
 }

 NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
-                                                 uvm_fault_buffer_entry_t *fault,
-                                                 uvm_va_space_t **out_va_space)
+                                                 const uvm_fault_buffer_entry_t *fault,
+                                                 uvm_va_space_t **out_va_space,
+                                                 uvm_gpu_t **out_gpu)
 {
    uvm_user_channel_t *user_channel;
+    uvm_gpu_va_space_t *gpu_va_space;
    NV_STATUS status = NV_OK;

    *out_va_space = NULL;
+    *out_gpu = NULL;

    uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);

@@ -3161,8 +3113,10 @@ NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
        // We can safely access user_channel->gpu_va_space under the
        // instance_ptr_table_lock since gpu_va_space is set to NULL after this
        // function is called in uvm_user_channel_detach
-        UVM_ASSERT(uvm_gpu_va_space_state(user_channel->gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
-        *out_va_space = user_channel->gpu_va_space->va_space;
+        gpu_va_space = user_channel->gpu_va_space;
+        UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
+        *out_va_space = gpu_va_space->va_space;
+        *out_gpu = gpu_va_space->gpu;
    }
    else {
        NvU32 ve_id = fault->fault_source.ve_id;
@@ -3172,12 +3126,17 @@ NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,

        ve_id -= user_channel->smc_engine_ve_id_offset;

-        *out_va_space = user_channel_and_subctx_to_va_space(user_channel, ve_id);
+        gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, ve_id);

        // Instance pointer is valid but the fault targets a non-existent
        // subcontext.
-        if (!*out_va_space)
+        if (gpu_va_space) {
+            *out_va_space = gpu_va_space->va_space;
+            *out_gpu = gpu_va_space->gpu;
+        }
+        else {
            status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
+        }
    }

 exit_unlock:
@@ -3187,13 +3146,16 @@ exit_unlock:
 }

 NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
-                                                          uvm_access_counter_buffer_entry_t *entry,
-                                                          uvm_va_space_t **out_va_space)
+                                                          const uvm_access_counter_buffer_entry_t *entry,
+                                                          uvm_va_space_t **out_va_space,
+                                                          uvm_gpu_t **out_gpu)
 {
    uvm_user_channel_t *user_channel;
+    uvm_gpu_va_space_t *gpu_va_space;
    NV_STATUS status = NV_OK;

    *out_va_space = NULL;
+    *out_gpu = NULL;
    UVM_ASSERT(entry->address.is_virtual);

    uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
@@ -3209,13 +3171,20 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
                       "Access counter packet contains SubCTX %u for channel not in subctx\n",
                       entry->virtual_info.ve_id);

-        UVM_ASSERT(uvm_gpu_va_space_state(user_channel->gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
-        *out_va_space = user_channel->gpu_va_space->va_space;
+        gpu_va_space = user_channel->gpu_va_space;
+        UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
+        *out_va_space = gpu_va_space->va_space;
+        *out_gpu = gpu_va_space->gpu;
    }
    else {
-        *out_va_space = user_channel_and_subctx_to_va_space(user_channel, entry->virtual_info.ve_id);
-        if (!*out_va_space)
+        gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->virtual_info.ve_id);
+        if (gpu_va_space) {
+            *out_va_space = gpu_va_space->va_space;
+            *out_gpu = gpu_va_space->gpu;
+        }
+        else {
            status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
+        }
    }

 exit_unlock: