580.65.06

2026-02-05 15:49:58 +00:00 · 2025-08-04 11:15:02 -07:00
parent d890313300
commit 307159f262
1315 changed files with 477791 additions and 279973 deletions
--- a/kernel-open/nvidia-uvm/uvm_pmm_sysmem.h
+++ b/kernel-open/nvidia-uvm/uvm_pmm_sysmem.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2024 NVIDIA Corporation
+    Copyright (c) 2017-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -30,96 +30,12 @@
 #include "uvm_lock.h"
 #include "uvm_pmm_gpu.h"

-// Module to handle per-GPU user mappings to sysmem physical memory. Notably,
-// this implements a reverse map of the DMA address to {va_block, virt_addr}.
-// This is required by the GPU access counters feature since they may provide a
-// physical address in the notification packet (GPA notifications). We use the
-// table to obtain the VAs of the memory regions being accessed remotely. The
-// reverse map is implemented by a radix tree, which is indexed using the
-// DMA address. For now, only PAGE_SIZE translations are supported (i.e. no
-// big/huge pages).
-//
-// TODO: Bug 1995015: add support for physically-contiguous mappings.
-struct uvm_pmm_sysmem_mappings_struct
-{
-    uvm_gpu_t                                      *gpu;
-
-    struct radix_tree_root             reverse_map_tree;
-
-    uvm_mutex_t                        reverse_map_lock;
-};
-
 // Global initialization/exit functions, that need to be called during driver
 // initialization/tear-down. These are needed to allocate/free global internal
 // data structures.
 NV_STATUS uvm_pmm_sysmem_init(void);
 void uvm_pmm_sysmem_exit(void);

-// Initialize per-GPU sysmem mapping tracking
-NV_STATUS uvm_pmm_sysmem_mappings_init(uvm_gpu_t *gpu, uvm_pmm_sysmem_mappings_t *sysmem_mappings);
-
-// Destroy per-GPU sysmem mapping tracking. The caller must ensure that all the
-// mappings have been removed before calling this function.
-void uvm_pmm_sysmem_mappings_deinit(uvm_pmm_sysmem_mappings_t *sysmem_mappings);
-
-// If the GPU used to initialize sysmem_mappings supports access counters, the
-// dma_addr -> {va_block, virt_addr} mapping is inserted in the reverse map.
-NV_STATUS uvm_pmm_sysmem_mappings_add_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                                  NvU64 dma_addr,
-                                                  NvU64 virt_addr,
-                                                  NvU64 region_size,
-                                                  uvm_va_block_t *va_block,
-                                                  uvm_processor_id_t owner);
-
-// If the GPU used to initialize sysmem_mappings supports access counters, the
-// entries for the physical region starting at dma_addr are removed from the
-// reverse map.
-void uvm_pmm_sysmem_mappings_remove_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr);
-
-// Like uvm_pmm_sysmem_mappings_remove_gpu_mapping but it doesn't assert if the
-// mapping doesn't exist. See uvm_va_block_evict_chunks for more information.
-void uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr);
-
-// If the GPU used to initialize sysmem_mappings supports access counters, the
-// mapping for the region starting at dma_addr is updated with va_block.
-// This is required on VA block split.
-void uvm_pmm_sysmem_mappings_reparent_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                                  NvU64 dma_addr,
-                                                  uvm_va_block_t *va_block);
-
-// If the GPU used to initialize sysmem_mappings supports access counters, the
-// mapping for the region starting at dma_addr is split into regions of
-// new_region_size. new_region_size must be a power of two and smaller than the
-// previously-registered size.
-NV_STATUS uvm_pmm_sysmem_mappings_split_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                                     NvU64 dma_addr,
-                                                     NvU64 new_region_size);
-
-// If the GPU used to initialize sysmem_mappings supports access counters, all
-// the mappings within the region [dma_addr, dma_addr + new_region_size) are
-// merged into a single mapping. new_region_size must be a power of two. The
-// whole region must be previously populated with mappings and all of them must
-// have the same VA block and processor owner.
-void uvm_pmm_sysmem_mappings_merge_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                                NvU64 dma_addr,
-                                                NvU64 new_region_size);
-
-// Obtain the {va_block, virt_addr} information for the mappings in the given
-// [dma_addr:dma_addr + region_size) range. dma_addr and region_size must be
-// page-aligned.
-//
-// Valid translations are written to out_mappings sequentially (there are no
-// gaps). max_out_mappings are written, at most. The caller is required to
-// provide enough entries in out_mappings.
-//
-// The VA Block in each returned translation entry is retained, and it's up to
-// the caller to release them
-size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
-                                           NvU64 dma_addr,
-                                           NvU64 region_size,
-                                           uvm_reverse_map_t *out_mappings,
-                                           size_t max_out_mappings);
-
 #define UVM_CPU_CHUNK_SIZES (UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | PAGE_SIZE)

 typedef enum
@@ -371,6 +287,12 @@ void uvm_cpu_chunk_free(uvm_cpu_chunk_t *chunk);
 // a kernel virtual mapping for the chunk. The virtual mapping persists until
 // GPU deinitialization, such that no unmap functionality is exposed.
 // For more details see uvm_mmu_sysmem_map().
+//
+// Before accessing this mapping the caller is responsible for issuing any
+// physical GPU TLB invalidations required by the architecture, such as with
+// uvm_mmu_tlb_invalidate_phys() or uvm_hal_tlb_invalidate_phys(). This is an
+// optimization allowing the caller to batch, defer, and otherwise control when
+// the operation is issued.
 NV_STATUS uvm_cpu_chunk_map_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu);

 // Destroy a CPU chunk's DMA mapping for the given GPU.
@@ -425,9 +347,9 @@ void uvm_cpu_chunk_mark_clean(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_inde
 bool uvm_cpu_chunk_is_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);

 static NV_STATUS uvm_test_get_cpu_chunk_allocation_sizes(UVM_TEST_GET_CPU_CHUNK_ALLOC_SIZES_PARAMS *params,
-                                                                struct file *filp)
+                                                         struct file *filp)
 {
-        params->alloc_size_mask = (NvU32)uvm_cpu_chunk_get_allocation_sizes();
-        return NV_OK;
+    params->alloc_size_mask = (NvU32)uvm_cpu_chunk_get_allocation_sizes();
+    return NV_OK;
 }
 #endif