550.40.59

2026-03-06 21:59:55 +00:00 · 2024-04-01 11:46:00 -07:00
parent 66b6384d48
commit acebc4bb78
55 changed files with 1142 additions and 514 deletions
--- a/kernel-open/nvidia-uvm/uvm_va_space_mm.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space_mm.c
@@ -417,9 +417,7 @@ static void uvm_va_space_mm_shutdown(uvm_va_space_t *va_space)
    uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
    uvm_gpu_va_space_t *gpu_va_space;
    uvm_gpu_t *gpu;
-    // TODO: Bug 4351121: retained_gpus should be pre-allocated, not on the
-    // stack.
-    uvm_processor_mask_t retained_gpus;
+    uvm_processor_mask_t *retained_gpus = &va_space_mm->scratch_processor_mask;
    uvm_parent_processor_mask_t flushed_parent_gpus;
    LIST_HEAD(deferred_free_list);

@@ -443,32 +441,34 @@ static void uvm_va_space_mm_shutdown(uvm_va_space_t *va_space)

    // Detach all channels to prevent pending untranslated faults from getting
    // to this VA space. This also removes those channels from the VA space and
-    // puts them on the deferred free list, so only one thread will do this.
+    // puts them on the deferred free list.
    uvm_va_space_down_write(va_space);
    uvm_va_space_detach_all_user_channels(va_space, &deferred_free_list);
-    uvm_processor_mask_and(&retained_gpus, &va_space->registered_gpus, &va_space->faultable_processors);
-    uvm_global_gpu_retain(&retained_gpus);
+    uvm_processor_mask_and(retained_gpus, &va_space->registered_gpus, &va_space->faultable_processors);
+    uvm_global_gpu_retain(retained_gpus);
    uvm_va_space_up_write(va_space);

+    // It's ok to use retained_gpus outside the lock since there can only be one
+    // thread executing in uvm_va_space_mm_shutdown at a time.
+
    // Flush the fault buffer on all registered faultable GPUs.
    // This will avoid spurious cancels of stale pending translated
    // faults after we set UVM_VA_SPACE_MM_STATE_RELEASED later.
    uvm_parent_processor_mask_zero(&flushed_parent_gpus);
-    for_each_gpu_in_mask(gpu, &retained_gpus) {
+    for_each_gpu_in_mask(gpu, retained_gpus) {
        if (!uvm_parent_processor_mask_test_and_set(&flushed_parent_gpus, gpu->parent->id))
            uvm_gpu_fault_buffer_flush(gpu);
    }

-    uvm_global_gpu_release(&retained_gpus);
+    uvm_global_gpu_release(retained_gpus);

    // Call nvUvmInterfaceUnsetPageDirectory. This has no effect on non-MPS.
    // Under MPS this guarantees that no new GPU accesses will be made using
    // this mm.
    //
-    // We need only one thread to make this call, but two threads in here could
-    // race for it, or we could have one thread in here and one in
-    // destroy_gpu_va_space. Serialize these by starting in write mode then
-    // downgrading to read.
+    // We need only one thread to make this call, but we could have one thread
+    // in here and one in destroy_gpu_va_space. Serialize these by starting in
+    // write mode then downgrading to read.
    uvm_va_space_down_write(va_space);
    uvm_va_space_downgrade_write_rm(va_space);
    for_each_gpu_va_space(gpu_va_space, va_space)