595.71.05

2026-05-13 17:26:11 +00:00 · 2026-04-28 08:55:30 -07:00
parent db0c4e65c8
commit 51edebee79
47 changed files with 48117 additions and 47816 deletions
--- a/kernel-open/nvidia-uvm/uvm.c
+++ b/kernel-open/nvidia-uvm/uvm.c
@@ -1230,3 +1230,4 @@ module_exit(uvm_exit_entry);
 MODULE_LICENSE("Dual MIT/GPL");
 MODULE_INFO(supported, "external");
 MODULE_VERSION(NV_VERSION_STRING);
+MODULE_DESCRIPTION("NVIDIA Unified Virtual Memory kernel module");
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
@@ -1800,6 +1800,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
        // ATS faults can't be unserviceable, since unserviceable faults require
        // GMMU PTEs.
        UVM_ASSERT(!current_entry->is_fatal);
+        UVM_ASSERT(current_entry->gpu == gpu);

        i++;

@@ -1849,6 +1850,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,

    } while (current_entry &&
             (current_entry->fault_address < outer) &&
+             (previous_entry->gpu == current_entry->gpu) &&
             (previous_entry->va_space == current_entry->va_space));

    // Service the last sub-batch.
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
@@ -1582,6 +1582,31 @@ uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
    return uvm_va_block_region_from_start_end(va_block, start, end);
 }

+uvm_prot_t uvm_hmm_compute_mapping_prot(uvm_va_block_t *va_block,
+                                        uvm_processor_id_t processor_id,
+                                        uvm_page_index_t page_index)
+{
+    if (!uvm_processor_mask_test(&va_block->mapped, UVM_ID_CPU))
+        return UVM_PROT_NONE;
+
+    if (uvm_page_mask_test(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_WRITE], page_index)) {
+        if (uvm_processor_mask_test(&va_block->hmm.va_space->has_native_atomics[uvm_id_value(UVM_ID_CPU)],
+                                    processor_id))
+            // If the CPU has write access it also has atomic access, so it's
+            // fine for any GPU with HW support to do atomic accesses.
+            return UVM_PROT_READ_WRITE_ATOMIC;
+        else
+            // Otherwise the GPU needs to fault on atomic access to ensure the
+            // CPU is unmapped.
+            return UVM_PROT_READ_WRITE;
+    }
+
+    if (uvm_page_mask_test(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_READ], page_index))
+        return UVM_PROT_READ_ONLY;
+
+    return UVM_PROT_NONE;
+}
+
 uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
                                        struct vm_area_struct *vma,
                                        NvU64 addr)
--- a/kernel-open/nvidia-uvm/uvm_hmm.h
+++ b/kernel-open/nvidia-uvm/uvm_hmm.h
@@ -258,6 +258,14 @@ typedef struct
                                                      const uvm_va_policy_t *policy,
                                                      NvU64 address);

+    // Return the actual permissions allowed when mapping a page within a
+    // va_block on the given processor_id. This may differ from the logical
+    // permission if for example the kernel has the CPU pages mapped read-only
+    // to do copy-on-write.
+    uvm_prot_t uvm_hmm_compute_mapping_prot(uvm_va_block_t *va_block,
+                                            uvm_processor_id_t processor_id,
+                                            uvm_page_index_t page_index);
+
    // Return the logical protection allowed of a HMM va_block for the page at
    // the given address within the vma which must be valid. This is usually
    // obtained from uvm_hmm_va_block_find_create()).
@@ -561,6 +569,13 @@ typedef struct
        return (uvm_va_block_region_t){};
    }

+    static uvm_prot_t uvm_hmm_compute_mapping_prot(uvm_va_block_t *va_block,
+                                                   uvm_processor_id_t processor_id,
+                                                   uvm_page_index_t page_index)
+    {
+        return UVM_PROT_NONE;
+    }
+
    static uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
                                                   struct vm_area_struct *vma,
                                                   NvU64 addr)
--- a/kernel-open/nvidia-uvm/uvm_va_block.c
+++ b/kernel-open/nvidia-uvm/uvm_va_block.c
@@ -10991,6 +10991,13 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block
    uvm_processor_mask_t *resident_processors = &va_block_context->scratch_processor_mask;
    NvU32 resident_processors_count;

+    // TODO: Bug 5841902
+    // There are several calls to uvm_va_block_is_hmm() which need to be removed
+    if (uvm_va_block_is_hmm(va_block))
+        return uvm_hmm_compute_mapping_prot(va_block,
+                                            processor_id,
+                                            page_index);
+
    uvm_va_block_page_resident_processors(va_block, page_index, resident_processors);
    resident_processors_count = uvm_processor_mask_get_count(resident_processors);

--- a/kernel-open/nvidia-uvm/uvm_va_range.c
+++ b/kernel-open/nvidia-uvm/uvm_va_range.c
@@ -929,8 +929,12 @@ void uvm_va_range_remove_gpu_va_space(uvm_va_range_t *va_range,
                                                        gpu_va_space->gpu);
            break;
        case UVM_VA_RANGE_TYPE_DEVICE_P2P:
-            unmap_mapping_range(va_range->va_space->mapping, va_range->node.start, uvm_va_range_size(va_range), 1);
-            uvm_va_range_deinit_device_p2p(uvm_va_range_to_device_p2p(va_range), deferred_free_list);
+            // Device P2P ranges are associated with a specific GPU so destroy
+            // the range entirely if unregistering the associated GPU.
+            if (uvm_va_range_to_device_p2p(va_range)->gpu == gpu_va_space->gpu) {
+                unmap_mapping_range(va_range->va_space->mapping, va_range->node.start, uvm_va_range_size(va_range), 1);
+                uvm_va_range_deinit_device_p2p(uvm_va_range_to_device_p2p(va_range), deferred_free_list);
+            }
            break;
        default:
            UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
@@ -1172,9 +1176,10 @@ void uvm_va_range_unregister_gpu(uvm_va_range_t *va_range,
            va_range_unregister_gpu_semaphore_pool(uvm_va_range_to_semaphore_pool(va_range), gpu);
            break;
        case UVM_VA_RANGE_TYPE_DEVICE_P2P:
-            // All ranges should have been deinited by GPU VA space unregister,
-            // which should have already happened.
-            UVM_ASSERT(!uvm_va_range_to_device_p2p(va_range)->p2p_mem);
+            // All ranges for this GPU should have been deinited by GPU VA space
+            // unregister, which should have already happened.
+            if (uvm_va_range_to_device_p2p(va_range)->p2p_mem != NULL)
+                UVM_ASSERT(uvm_va_range_to_device_p2p(va_range)->gpu != gpu);
            break;
        default:
            UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",