535.43.02

2026-02-22 07:53:58 +00:00 · 2023-05-30 10:11:36 -07:00
parent 6dd092ddb7
commit eb5c7665a1
1403 changed files with 295367 additions and 86235 deletions
--- a/kernel-open/nvidia-uvm/uvm_va_space.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space.c
@@ -171,8 +171,8 @@ NV_STATUS uvm_va_space_create(struct address_space *mapping, uvm_va_space_t **va
        return NV_ERR_NO_MEMORY;

    if (flags & ~UVM_INIT_FLAGS_MASK) {
-	    uvm_kvfree(va_space);
-	    return NV_ERR_INVALID_ARGUMENT;
+        uvm_kvfree(va_space);
+        return NV_ERR_INVALID_ARGUMENT;
    }

    uvm_init_rwsem(&va_space->lock, UVM_LOCK_ORDER_VA_SPACE);
@@ -262,6 +262,7 @@ fail:
    uvm_perf_destroy_va_space_events(&va_space->perf_events);
    uvm_va_space_up_write(va_space);
    uvm_up_write_mmap_lock(current->mm);
+
    // See the comment in uvm_va_space_mm_unregister() for why this has to be
    // called after releasing the locks.
    uvm_va_space_mm_unregister(va_space);
@@ -366,6 +367,11 @@ static void unregister_gpu(uvm_va_space_t *va_space,
        }
    }

+    if (va_space->gpu_unregister_dma_buffer[uvm_id_gpu_index(gpu->id)]) {
+        uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool,
+                                           va_space->gpu_unregister_dma_buffer[uvm_id_gpu_index(gpu->id)],
+                                           &va_space->gpu_unregister_dma_buffer[uvm_id_gpu_index(gpu->id)]->tracker);
+    }
    va_space_check_processors_masks(va_space);
 }

@@ -412,6 +418,15 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
    uvm_global_processor_mask_t retained_gpus;
    LIST_HEAD(deferred_free_list);

+    // Normally we'd expect this to happen as part of uvm_mm_release()
+    // but if userspace never initialized uvm_mm_fd that won't happen.
+    // We don't have to take the va_space_mm spinlock and update state
+    // here because we know no other thread can be in or subsequently
+    // call uvm_api_mm_initialize successfully because the UVM
+    // file-descriptor has been released.
+    if (va_space->va_space_mm.state == UVM_VA_SPACE_MM_STATE_UNINITIALIZED)
+        uvm_va_space_mm_unregister(va_space);
+
    // Remove the VA space from the global list before we start tearing things
    // down so other threads can't see the VA space in a partially-valid state.
    uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
@@ -517,17 +532,8 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)

    uvm_deferred_free_object_list(&deferred_free_list);

-    // Remove the mm_struct association on this VA space, if any. This may
-    // invoke uvm_va_space_mm_shutdown(), which in turn will disable all
-    // channels and wait for any retainers to finish, so it has to be done
-    // outside of the VA space lock.
-    //
-    // Since we must already handle mm shutdown being called at any point prior
-    // to this call, this call can be made at any point in
-    // uvm_va_space_destroy(). It's beneficial to do it late after doing all
-    // deferred frees for GPU VA spaces and channels, because then
-    // uvm_va_space_mm_shutdown() will have minimal work to do.
-    uvm_va_space_mm_unregister(va_space);
+    // MM FD teardown should already have destroyed va_space_mm
+    UVM_ASSERT(!uvm_va_space_mm_alive(&va_space->va_space_mm));

    uvm_mutex_lock(&g_uvm_global.global_lock);

@@ -684,12 +690,9 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
        goto done;
    }

-    // Mixing Volta and Pascal GPUs is not supported on P9 systems.
+    // Mixing coherent and non-coherent GPUs is not supported
    for_each_va_space_gpu(other_gpu, va_space) {
-        if ((gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_2 &&
-             other_gpu->parent->sysmem_link < UVM_GPU_LINK_NVLINK_2) ||
-            (gpu->parent->sysmem_link < UVM_GPU_LINK_NVLINK_2 &&
-             other_gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_2)) {
+        if (uvm_gpu_is_coherent(gpu->parent) != uvm_gpu_is_coherent(other_gpu->parent)) {
            status = NV_ERR_INVALID_DEVICE;
            goto done;
        }
@@ -701,6 +704,17 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
        goto done;
    }

+    if (uvm_conf_computing_mode_enabled(gpu)) {
+        NvU32 gpu_index = uvm_id_gpu_index(gpu->id);
+        status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool,
+                                                     &va_space->gpu_unregister_dma_buffer[gpu_index],
+                                                     NULL);
+        if (status != NV_OK)
+            goto done;
+
+        gpu_can_access_sysmem = false;
+    }
+
    uvm_processor_mask_set(&va_space->registered_gpus, gpu->id);
    va_space->registered_gpus_table[uvm_id_gpu_index(gpu->id)] = gpu;

@@ -713,15 +727,16 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
    // All GPUs have native atomics on their own memory
    processor_mask_array_set(va_space->has_native_atomics, gpu->id, gpu->id);

-    if (gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_1) {
+    // TODO: Bug 3252572: Support the new link type UVM_GPU_LINK_C2C
+    if (gpu->parent->system_bus.link >= UVM_GPU_LINK_NVLINK_1) {
        processor_mask_array_set(va_space->has_nvlink, gpu->id, UVM_ID_CPU);
        processor_mask_array_set(va_space->has_nvlink, UVM_ID_CPU, gpu->id);
    }

-    if (gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_2) {
+    if (uvm_gpu_is_coherent(gpu->parent)) {
        processor_mask_array_set(va_space->has_native_atomics, gpu->id, UVM_ID_CPU);

-        if (gpu->parent->numa_info.enabled) {
+        if (gpu->mem_info.numa.enabled) {
            processor_mask_array_set(va_space->can_access, UVM_ID_CPU, gpu->id);
            processor_mask_array_set(va_space->accessible_from, gpu->id, UVM_ID_CPU);
            processor_mask_array_set(va_space->has_native_atomics, UVM_ID_CPU, gpu->id);
@@ -777,9 +792,9 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
            goto cleanup;
    }

-    if (gpu->parent->numa_info.enabled) {
+    if (gpu->mem_info.numa.enabled) {
        *numa_enabled = NV_TRUE;
-        *numa_node_id = (NvS32)uvm_gpu_numa_info(gpu)->node_id;
+        *numa_node_id = (NvS32)uvm_gpu_numa_node(gpu);
    }
    else {
        *numa_enabled = NV_FALSE;
@@ -987,6 +1002,7 @@ static NV_STATUS enable_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu
        return NV_ERR_NOT_COMPATIBLE;
    }

+    // TODO: Bug 3848497: Disable GPU Peer Mapping when HCC is enabled
    processor_mask_array_set(va_space->can_access, gpu0->id, gpu1->id);
    processor_mask_array_set(va_space->can_access, gpu1->id, gpu0->id);
    processor_mask_array_set(va_space->accessible_from, gpu0->id, gpu1->id);
@@ -1012,8 +1028,8 @@ static NV_STATUS enable_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu

        if (peer_caps->is_indirect_peer) {
            UVM_ASSERT(peer_caps->link_type >= UVM_GPU_LINK_NVLINK_2);
-            UVM_ASSERT(gpu0->parent->numa_info.enabled);
-            UVM_ASSERT(gpu1->parent->numa_info.enabled);
+            UVM_ASSERT(gpu0->mem_info.numa.enabled);
+            UVM_ASSERT(gpu1->mem_info.numa.enabled);

            processor_mask_array_set(va_space->indirect_peers, gpu0->id, gpu1->id);
            processor_mask_array_set(va_space->indirect_peers, gpu1->id, gpu0->id);
@@ -1411,6 +1427,10 @@ NV_STATUS uvm_va_space_register_gpu_va_space(uvm_va_space_t *va_space,
        return NV_ERR_INVALID_DEVICE;

    mm = uvm_va_space_mm_or_current_retain(va_space);
+    if (!mm) {
+        status = NV_ERR_PAGE_TABLE_NOT_AVAIL;
+        goto error_gpu_release;
+    }

    status = create_gpu_va_space(gpu, va_space, user_rm_va_space, &gpu_va_space);
    if (status != NV_OK)
@@ -2200,9 +2220,24 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
            uvm_tools_record_throttling_end(va_space, fault_addr, UVM_ID_CPU);

        if (is_hmm) {
-            status = uvm_hmm_va_block_cpu_find(va_space, service_context, vmf, &va_block);
+            // Note that normally we should find a va_block for the faulting
+            // address because the block had to be created when migrating a
+            // page to the GPU and a device private PTE inserted into the CPU
+            // page tables in order for migrate_to_ram() to be called. Not
+            // finding it means the PTE was remapped to a different virtual
+            // address with mremap() so create a new va_block if needed.
+            status = uvm_hmm_va_block_find_create(va_space,
+                                                  fault_addr,
+                                                  &service_context->block_context,
+                                                  &va_block);
            if (status != NV_OK)
                break;
+
+            status = uvm_hmm_migrate_begin(va_block);
+            if (status != NV_OK)
+                break;
+
+            service_context->cpu_fault.vmf = vmf;
        }
        else {
            status = uvm_va_block_find_create_managed(va_space, fault_addr, &va_block);
@@ -2219,10 +2254,10 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
        status = uvm_va_block_cpu_fault(va_block, fault_addr, is_write, service_context);

        if (is_hmm)
-            uvm_hmm_cpu_fault_finish(service_context);
+            uvm_hmm_migrate_finish(va_block);
    } while (status == NV_WARN_MORE_PROCESSING_REQUIRED);

-    if (status != NV_OK) {
+    if (status != NV_OK && !(is_hmm && status == NV_ERR_BUSY_RETRY)) {
        UvmEventFatalReason reason;

        reason = uvm_tools_status_to_fatal_fault_reason(status);