555.42.02

(cherry picked from commit 5a1c474040)
2026-03-03 04:09:50 +00:00 · 2024-05-21 15:11:46 +02:00
parent caa2dd11a0
commit 3084c04453
1004 changed files with 172522 additions and 150960 deletions
--- a/kernel-open/nvidia-uvm/uvm_mem.c
+++ b/kernel-open/nvidia-uvm/uvm_mem.c
@@ -290,15 +290,15 @@ uvm_chunk_sizes_mask_t uvm_mem_kernel_chunk_sizes(uvm_gpu_t *gpu)
    // Get the mmu mode hal directly as the internal address space tree has not
    // been created yet.
    uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(gpu->big_page.internal_size);
-    NvU32 page_sizes = hal->page_sizes();
+    NvU64 page_sizes = hal->page_sizes();

    return (uvm_chunk_sizes_mask_t)(page_sizes & UVM_CHUNK_SIZES_MASK);
 }

-static NvU32 mem_pick_chunk_size(uvm_mem_t *mem)
+static NvU64 mem_pick_chunk_size(uvm_mem_t *mem)
 {
-    NvU32 biggest_page_size;
-    NvU32 chunk_size;
+    NvU64 biggest_page_size;
+    NvU64 chunk_size;

    if (uvm_mem_is_sysmem(mem))
        return PAGE_SIZE;
@@ -315,12 +315,12 @@ static NvU32 mem_pick_chunk_size(uvm_mem_t *mem)
    // When UVM_PAGE_SIZE_DEFAULT is used on NUMA-enabled GPUs, we force
    // chunk_size to be PAGE_SIZE at least, to allow CPU mappings.
    if (mem->backing_gpu->mem_info.numa.enabled)
-        chunk_size = max(chunk_size, (NvU32)PAGE_SIZE);
+        chunk_size = max(chunk_size, (NvU64)PAGE_SIZE);

    return chunk_size;
 }

-static NvU32 mem_pick_gpu_page_size(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_tree_t *gpu_page_tree)
+static NvU64 mem_pick_gpu_page_size(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_tree_t *gpu_page_tree)
 {
    if (uvm_mem_is_vidmem(mem)) {
        // For vidmem allocations the chunk size is picked out of the supported
@@ -467,7 +467,7 @@ static NV_STATUS mem_alloc_sysmem_dma_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
    NvU64 *dma_addrs;

    UVM_ASSERT_MSG(mem->chunk_size == PAGE_SIZE,
-                   "mem->chunk_size is 0x%x. PAGE_SIZE is only supported.",
+                   "mem->chunk_size is 0x%llx. PAGE_SIZE is only supported.",
                   mem->chunk_size);
    UVM_ASSERT(uvm_mem_is_sysmem_dma(mem));

@@ -528,10 +528,9 @@ static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, gfp_t gfp_flags)

 // In case of failure, the caller is required to handle cleanup by calling
 // uvm_mem_free
-static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unprotected)
+static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
 {
    NV_STATUS status;
-    uvm_pmm_gpu_memory_type_t mem_type;

    UVM_ASSERT(uvm_mem_is_vidmem(mem));

@@ -548,23 +547,15 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unpr
    if (!mem->vidmem.chunks)
        return NV_ERR_NO_MEMORY;

-    // When CC is disabled the behavior is identical to that of PMM, and the
-    // protection flag is ignored (squashed by PMM internally).
-    if (is_unprotected)
-        mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_UNPROTECTED;
-    else
-        mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_PROTECTED;
-
-    status = uvm_pmm_gpu_alloc(&mem->backing_gpu->pmm,
-                               mem->chunks_count,
-                               mem->chunk_size,
-                               mem_type,
-                               UVM_PMM_ALLOC_FLAGS_NONE,
-                               mem->vidmem.chunks,
-                               NULL);
+    status = uvm_pmm_gpu_alloc_kernel(&mem->backing_gpu->pmm,
+                                      mem->chunks_count,
+                                      mem->chunk_size,
+                                      UVM_PMM_ALLOC_FLAGS_NONE,
+                                      mem->vidmem.chunks,
+                                      NULL);

    if (status != NV_OK) {
-        UVM_ERR_PRINT("uvm_pmm_gpu_alloc (count=%zd, size=0x%x) failed: %s\n",
+        UVM_ERR_PRINT("uvm_pmm_gpu_alloc_kernel (count=%zd, size=0x%llx) failed: %s\n",
                      mem->chunks_count,
                      mem->chunk_size,
                      nvstatusToString(status));
@@ -574,7 +565,7 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unpr
    return NV_OK;
 }

-static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero, bool is_unprotected)
+static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero)
 {
    if (uvm_mem_is_sysmem(mem)) {
        gfp_t gfp_flags;
@@ -596,7 +587,7 @@ static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zer
        return status;
    }

-    return mem_alloc_vidmem_chunks(mem, zero, is_unprotected);
+    return mem_alloc_vidmem_chunks(mem, zero);
 }

 NV_STATUS uvm_mem_map_kernel(uvm_mem_t *mem, const uvm_processor_mask_t *mask)
@@ -626,7 +617,6 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
    NV_STATUS status;
    NvU64 physical_size;
    uvm_mem_t *mem = NULL;
-    bool is_unprotected = false;

    UVM_ASSERT(params->size > 0);

@@ -648,12 +638,7 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
    physical_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
    mem->chunks_count = physical_size / mem->chunk_size;

-    if (params->is_unprotected)
-        UVM_ASSERT(uvm_mem_is_vidmem(mem));
-
-    is_unprotected = params->is_unprotected;
-
-    status = mem_alloc_chunks(mem, params->mm, params->zero, is_unprotected);
+    status = mem_alloc_chunks(mem, params->mm, params->zero);
    if (status != NV_OK)
        goto error;

@@ -1050,7 +1035,7 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
                             uvm_page_table_range_vec_t **range_vec)
 {
    NV_STATUS status;
-    NvU32 page_size;
+    NvU64 page_size;
    uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;

    uvm_mem_pte_maker_data_t pte_maker_data = {
@@ -1059,7 +1044,7 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
        };

    page_size = mem_pick_gpu_page_size(mem, gpu, tree);
-    UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x\n", page_size);
+    UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%llx\n", page_size);

    // When the Confidential Computing feature is enabled, DMA allocations are
    // majoritarily allocated and managed by a per-GPU DMA buffer pool