580.65.06

This commit is contained in:
Maneet Singh
2025-08-04 11:15:02 -07:00
parent d890313300
commit 307159f262
1315 changed files with 477791 additions and 279973 deletions

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -30,96 +30,12 @@
#include "uvm_lock.h"
#include "uvm_pmm_gpu.h"
// Module to handle per-GPU user mappings to sysmem physical memory. Notably,
// this implements a reverse map of the DMA address to {va_block, virt_addr}.
// This is required by the GPU access counters feature since they may provide a
// physical address in the notification packet (GPA notifications). We use the
// table to obtain the VAs of the memory regions being accessed remotely. The
// reverse map is implemented by a radix tree, which is indexed using the
// DMA address. For now, only PAGE_SIZE translations are supported (i.e. no
// big/huge pages).
//
// TODO: Bug 1995015: add support for physically-contiguous mappings.
struct uvm_pmm_sysmem_mappings_struct
{
uvm_gpu_t *gpu;
struct radix_tree_root reverse_map_tree;
uvm_mutex_t reverse_map_lock;
};
// Global initialization/exit functions, that need to be called during driver
// initialization/tear-down. These are needed to allocate/free global internal
// data structures.
NV_STATUS uvm_pmm_sysmem_init(void);
void uvm_pmm_sysmem_exit(void);
// Initialize per-GPU sysmem mapping tracking
NV_STATUS uvm_pmm_sysmem_mappings_init(uvm_gpu_t *gpu, uvm_pmm_sysmem_mappings_t *sysmem_mappings);
// Destroy per-GPU sysmem mapping tracking. The caller must ensure that all the
// mappings have been removed before calling this function.
void uvm_pmm_sysmem_mappings_deinit(uvm_pmm_sysmem_mappings_t *sysmem_mappings);
// If the GPU used to initialize sysmem_mappings supports access counters, the
// dma_addr -> {va_block, virt_addr} mapping is inserted in the reverse map.
NV_STATUS uvm_pmm_sysmem_mappings_add_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 virt_addr,
NvU64 region_size,
uvm_va_block_t *va_block,
uvm_processor_id_t owner);
// If the GPU used to initialize sysmem_mappings supports access counters, the
// entries for the physical region starting at dma_addr are removed from the
// reverse map.
void uvm_pmm_sysmem_mappings_remove_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr);
// Like uvm_pmm_sysmem_mappings_remove_gpu_mapping but it doesn't assert if the
// mapping doesn't exist. See uvm_va_block_evict_chunks for more information.
void uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr);
// If the GPU used to initialize sysmem_mappings supports access counters, the
// mapping for the region starting at dma_addr is updated with va_block.
// This is required on VA block split.
void uvm_pmm_sysmem_mappings_reparent_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
uvm_va_block_t *va_block);
// If the GPU used to initialize sysmem_mappings supports access counters, the
// mapping for the region starting at dma_addr is split into regions of
// new_region_size. new_region_size must be a power of two and smaller than the
// previously-registered size.
NV_STATUS uvm_pmm_sysmem_mappings_split_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 new_region_size);
// If the GPU used to initialize sysmem_mappings supports access counters, all
// the mappings within the region [dma_addr, dma_addr + new_region_size) are
// merged into a single mapping. new_region_size must be a power of two. The
// whole region must be previously populated with mappings and all of them must
// have the same VA block and processor owner.
void uvm_pmm_sysmem_mappings_merge_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 new_region_size);
// Obtain the {va_block, virt_addr} information for the mappings in the given
// [dma_addr:dma_addr + region_size) range. dma_addr and region_size must be
// page-aligned.
//
// Valid translations are written to out_mappings sequentially (there are no
// gaps). max_out_mappings are written, at most. The caller is required to
// provide enough entries in out_mappings.
//
// The VA Block in each returned translation entry is retained, and it's up to
// the caller to release them
size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 region_size,
uvm_reverse_map_t *out_mappings,
size_t max_out_mappings);
#define UVM_CPU_CHUNK_SIZES (UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | PAGE_SIZE)
typedef enum
@@ -371,6 +287,12 @@ void uvm_cpu_chunk_free(uvm_cpu_chunk_t *chunk);
// a kernel virtual mapping for the chunk. The virtual mapping persists until
// GPU deinitialization, such that no unmap functionality is exposed.
// For more details see uvm_mmu_sysmem_map().
//
// Before accessing this mapping the caller is responsible for issuing any
// physical GPU TLB invalidations required by the architecture, such as with
// uvm_mmu_tlb_invalidate_phys() or uvm_hal_tlb_invalidate_phys(). This is an
// optimization allowing the caller to batch, defer, and otherwise control when
// the operation is issued.
NV_STATUS uvm_cpu_chunk_map_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu);
// Destroy a CPU chunk's DMA mapping for the given GPU.
@@ -425,9 +347,9 @@ void uvm_cpu_chunk_mark_clean(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_inde
bool uvm_cpu_chunk_is_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
static NV_STATUS uvm_test_get_cpu_chunk_allocation_sizes(UVM_TEST_GET_CPU_CHUNK_ALLOC_SIZES_PARAMS *params,
struct file *filp)
struct file *filp)
{
params->alloc_size_mask = (NvU32)uvm_cpu_chunk_get_allocation_sizes();
return NV_OK;
params->alloc_size_mask = (NvU32)uvm_cpu_chunk_get_allocation_sizes();
return NV_OK;
}
#endif