545.23.06

2026-02-25 09:23:59 +00:00 · 2023-10-17 09:25:29 -07:00
parent f59818b751
commit b5bf85a8e3
917 changed files with 132480 additions and 110015 deletions
--- a/kernel-open/nvidia-uvm/uvm_va_space.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space.c
@@ -222,6 +222,12 @@ NV_STATUS uvm_va_space_create(struct address_space *mapping, uvm_va_space_t **va
    uvm_down_write_mmap_lock(current->mm);
    uvm_va_space_down_write(va_space);

+    va_space->va_block_context = uvm_va_block_context_alloc(NULL);
+    if (!va_space->va_block_context) {
+        status = NV_ERR_NO_MEMORY;
+        goto fail;
+    }
+
    status = uvm_perf_init_va_space_events(va_space, &va_space->perf_events);
    if (status != NV_OK)
        goto fail;
@@ -258,6 +264,7 @@ NV_STATUS uvm_va_space_create(struct address_space *mapping, uvm_va_space_t **va
 fail:
    uvm_perf_heuristics_unload(va_space);
    uvm_perf_destroy_va_space_events(&va_space->perf_events);
+    uvm_va_block_context_free(va_space->va_block_context);
    uvm_va_space_up_write(va_space);
    uvm_up_write_mmap_lock(current->mm);

@@ -457,8 +464,6 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
        uvm_va_range_destroy(va_range, &deferred_free_list);
    }

-    uvm_hmm_va_space_destroy(va_space);
-
    uvm_range_group_radix_tree_destroy(va_space);

    // Unregister all GPUs in the VA space. Note that this does not release the
@@ -466,11 +471,17 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
    for_each_va_space_gpu(gpu, va_space)
        unregister_gpu(va_space, gpu, NULL, &deferred_free_list, NULL);

+    uvm_hmm_va_space_destroy(va_space);
+
    uvm_perf_heuristics_unload(va_space);
    uvm_perf_destroy_va_space_events(&va_space->perf_events);

    va_space_remove_dummy_thread_contexts(va_space);

+    // Destroy the VA space's block context node tracking after all ranges have
+    // been destroyed as the VA blocks may reference it.
+    uvm_va_block_context_free(va_space->va_block_context);
+
    uvm_va_space_up_write(va_space);

    UVM_ASSERT(uvm_processor_mask_empty(&va_space->registered_gpus));
@@ -688,7 +699,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,

    // Mixing coherent and non-coherent GPUs is not supported
    for_each_va_space_gpu(other_gpu, va_space) {
-        if (uvm_gpu_is_coherent(gpu->parent) != uvm_gpu_is_coherent(other_gpu->parent)) {
+        if (uvm_parent_gpu_is_coherent(gpu->parent) != uvm_parent_gpu_is_coherent(other_gpu->parent)) {
            status = NV_ERR_INVALID_DEVICE;
            goto done;
        }
@@ -729,7 +740,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
        processor_mask_array_set(va_space->has_nvlink, UVM_ID_CPU, gpu->id);
    }

-    if (uvm_gpu_is_coherent(gpu->parent)) {
+    if (uvm_parent_gpu_is_coherent(gpu->parent)) {
        processor_mask_array_set(va_space->has_native_atomics, gpu->id, UVM_ID_CPU);

        if (gpu->mem_info.numa.enabled) {
@@ -1540,7 +1551,6 @@ static void remove_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space,
    atomic_inc(&va_space->gpu_va_space_deferred_free.num_pending);

    uvm_processor_mask_clear(&va_space->registered_gpu_va_spaces, gpu_va_space->gpu->id);
-    uvm_processor_mask_clear_atomic(&va_space->needs_fault_buffer_flush, gpu_va_space->gpu->id);
    va_space->gpu_va_spaces[uvm_id_gpu_index(gpu_va_space->gpu->id)] = NULL;
    gpu_va_space->state = UVM_GPU_VA_SPACE_STATE_DEAD;
 }
@@ -1610,14 +1620,14 @@ NV_STATUS uvm_va_space_unregister_gpu_va_space(uvm_va_space_t *va_space, const N
    return status;
 }

-bool uvm_va_space_peer_enabled(uvm_va_space_t *va_space, uvm_gpu_t *gpu1, uvm_gpu_t *gpu2)
+bool uvm_va_space_peer_enabled(uvm_va_space_t *va_space, const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
 {
    size_t table_index;

+    UVM_ASSERT(uvm_processor_mask_test(&va_space->registered_gpus, gpu0->id));
    UVM_ASSERT(uvm_processor_mask_test(&va_space->registered_gpus, gpu1->id));
-    UVM_ASSERT(uvm_processor_mask_test(&va_space->registered_gpus, gpu2->id));

-    table_index = uvm_gpu_peer_table_index(gpu1->id, gpu2->id);
+    table_index = uvm_gpu_peer_table_index(gpu0->id, gpu1->id);
    return !!test_bit(table_index, va_space->enabled_peers);
 }

@@ -2073,9 +2083,16 @@ NV_STATUS uvm_service_block_context_init(void)
    // Pre-allocate some fault service contexts for the CPU and add them to the global list
    while (num_preallocated_contexts-- > 0) {
        uvm_service_block_context_t *service_context = uvm_kvmalloc(sizeof(*service_context));
+
        if (!service_context)
            return NV_ERR_NO_MEMORY;

+        service_context->block_context = uvm_va_block_context_alloc(NULL);
+        if (!service_context->block_context) {
+            uvm_kvfree(service_context);
+            return NV_ERR_NO_MEMORY;
+        }
+
        list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
    }

@@ -2089,6 +2106,7 @@ void uvm_service_block_context_exit(void)
    // Free fault service contexts for the CPU and add clear the global list
    list_for_each_entry_safe(service_context, service_context_tmp, &g_cpu_service_block_context_list,
                             cpu_fault.service_context_list) {
+        uvm_va_block_context_free(service_context->block_context);
        uvm_kvfree(service_context);
    }
    INIT_LIST_HEAD(&g_cpu_service_block_context_list);
@@ -2110,8 +2128,17 @@ static uvm_service_block_context_t *service_block_context_cpu_alloc(void)

    uvm_spin_unlock(&g_cpu_service_block_context_list_lock);

-    if (!service_context)
+    if (!service_context) {
        service_context = uvm_kvmalloc(sizeof(*service_context));
+        service_context->block_context = uvm_va_block_context_alloc(NULL);
+        if (!service_context->block_context) {
+            uvm_kvfree(service_context);
+            service_context = NULL;
+        }
+    }
+    else {
+        uvm_va_block_context_init(service_context->block_context, NULL);
+    }

    return service_context;
 }
@@ -2137,6 +2164,7 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
    NV_STATUS status = uvm_global_get_status();
    bool tools_enabled;
    bool major_fault = false;
+    bool is_remote_mm = false;
    uvm_service_block_context_t *service_context;
    uvm_global_processor_mask_t gpus_to_check_for_ecc;

@@ -2177,7 +2205,7 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
    // mmap_lock held on the CPU fault path, so tell the fault handler to use
    // that one. current->mm might differ if we're on the access_process_vm
    // (ptrace) path or if another driver is calling get_user_pages.
-    service_context->block_context.mm = vma->vm_mm;
+    service_context->block_context->mm = vma->vm_mm;

    // The mmap_lock might be held in write mode, but the mode doesn't matter
    // for the purpose of lock ordering and we don't rely on it being in write
@@ -2216,25 +2244,32 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
            uvm_tools_record_throttling_end(va_space, fault_addr, UVM_ID_CPU);

        if (is_hmm) {
-            // Note that normally we should find a va_block for the faulting
-            // address because the block had to be created when migrating a
-            // page to the GPU and a device private PTE inserted into the CPU
-            // page tables in order for migrate_to_ram() to be called. Not
-            // finding it means the PTE was remapped to a different virtual
-            // address with mremap() so create a new va_block if needed.
-            status = uvm_hmm_va_block_find_create(va_space,
-                                                  fault_addr,
-                                                  &service_context->block_context.hmm.vma,
-                                                  &va_block);
-            if (status != NV_OK)
-                break;
+            if (va_space->va_space_mm.mm == vma->vm_mm) {
+                // Note that normally we should find a va_block for the faulting
+                // address because the block had to be created when migrating a
+                // page to the GPU and a device private PTE inserted into the CPU
+                // page tables in order for migrate_to_ram() to be called. Not
+                // finding it means the PTE was remapped to a different virtual
+                // address with mremap() so create a new va_block if needed.
+                status = uvm_hmm_va_block_find_create(va_space,
+                                                      fault_addr,
+                                                      &service_context->block_context->hmm.vma,
+                                                      &va_block);
+                if (status != NV_OK)
+                    break;

-            UVM_ASSERT(service_context->block_context.hmm.vma == vma);
-            status = uvm_hmm_migrate_begin(va_block);
-            if (status != NV_OK)
-                break;
+                UVM_ASSERT(service_context->block_context->hmm.vma == vma);
+                status = uvm_hmm_migrate_begin(va_block);
+                if (status != NV_OK)
+                    break;

-            service_context->cpu_fault.vmf = vmf;
+                service_context->cpu_fault.vmf = vmf;
+            }
+            else {
+                is_remote_mm = true;
+                status = uvm_hmm_remote_cpu_fault(vmf);
+                break;
+            }
        }
        else {
            status = uvm_va_block_find_create_managed(va_space, fault_addr, &va_block);
@@ -2265,7 +2300,7 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,

    tools_enabled = va_space->tools.enabled;

-    if (status == NV_OK) {
+    if (status == NV_OK && !is_remote_mm) {
        uvm_va_space_global_gpus_in_mask(va_space,
                                         &gpus_to_check_for_ecc,
                                         &service_context->cpu_fault.gpus_to_check_for_ecc);
@@ -2275,7 +2310,7 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
    uvm_va_space_up_read(va_space);
    uvm_record_unlock_mmap_lock_read(vma->vm_mm);

-    if (status == NV_OK) {
+    if (status == NV_OK && !is_remote_mm) {
        status = uvm_global_mask_check_ecc_error(&gpus_to_check_for_ecc);
        uvm_global_mask_release(&gpus_to_check_for_ecc);
    }