545.23.06

2026-02-23 00:13:59 +00:00 · 2023-10-17 09:25:29 -07:00
parent f59818b751
commit b5bf85a8e3
917 changed files with 132480 additions and 110015 deletions
--- a/kernel-open/nvidia-uvm/uvm_page_tree_test.c
+++ b/kernel-open/nvidia-uvm/uvm_page_tree_test.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2022 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -146,15 +146,9 @@ static void fake_tlb_invals_disable(void)
    g_fake_tlb_invals_tracking_enabled = false;
 }

-// Fake TLB invalidate VA that just saves off the parameters so that they can be
-// verified later.
-static void fake_tlb_invalidate_va(uvm_push_t *push,
-                                   uvm_gpu_phys_address_t pdb,
-                                   NvU32 depth,
-                                   NvU64 base,
-                                   NvU64 size,
-                                   NvU32 page_size,
-                                   uvm_membar_t membar)
+// Fake TLB invalidate VA that just saves off the parameters so that they can be verified later
+static void fake_tlb_invalidate_va(uvm_push_t *push, uvm_gpu_phys_address_t pdb,
+        NvU32 depth, NvU64 base, NvU64 size, NvU32 page_size, uvm_membar_t membar)
 {
    if (!g_fake_tlb_invals_tracking_enabled)
        return;
@@ -216,8 +210,8 @@ static bool assert_and_reset_last_invalidate(NvU32 expected_depth, bool expected
    }
    if ((g_last_fake_inval->membar == UVM_MEMBAR_NONE) == expected_membar) {
        UVM_TEST_PRINT("Expected %s membar, got %s instead\n",
-                       expected_membar ? "a" : "no",
-                       uvm_membar_string(g_last_fake_inval->membar));
+                expected_membar ? "a" : "no",
+                uvm_membar_string(g_last_fake_inval->membar));
        result = false;
    }

@@ -236,8 +230,7 @@ static bool assert_last_invalidate_all(NvU32 expected_depth, bool expected_memba
    }
    if (g_last_fake_inval->base != 0 || g_last_fake_inval->size != -1) {
        UVM_TEST_PRINT("Expected invalidate all but got range [0x%llx, 0x%llx) instead\n",
-                       g_last_fake_inval->base,
-                       g_last_fake_inval->base + g_last_fake_inval->size);
+                g_last_fake_inval->base, g_last_fake_inval->base + g_last_fake_inval->size);
        return false;
    }
    if (g_last_fake_inval->depth != expected_depth) {
@@ -254,16 +247,15 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
    UVM_ASSERT(g_fake_tlb_invals_tracking_enabled);

    if (g_fake_invals_count == 0) {
-        UVM_TEST_PRINT("Expected an invalidate for range [0x%llx, 0x%llx), but got none\n", base, base + size);
+        UVM_TEST_PRINT("Expected an invalidate for range [0x%llx, 0x%llx), but got none\n",
+                base, base + size);
        return false;
    }

    if ((inval->base != base || inval->size != size) && inval->base != 0 && inval->size != -1) {
        UVM_TEST_PRINT("Expected invalidate range [0x%llx, 0x%llx), but got range [0x%llx, 0x%llx) instead\n",
-                        base,
-                        base + size,
-                        inval->base,
-                        inval->base + inval->size);
+                base, base + size,
+                inval->base, inval->base + inval->size);
        return false;
    }
    if (inval->depth != expected_depth) {
@@ -278,13 +270,7 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
    return true;
 }

-static bool assert_invalidate_range(NvU64 base,
-                                    NvU64 size,
-                                    NvU32 page_size,
-                                    bool allow_inval_all,
-                                    NvU32 range_depth,
-                                    NvU32 all_depth,
-                                    bool expected_membar)
+static bool assert_invalidate_range(NvU64 base, NvU64 size, NvU32 page_size, bool allow_inval_all, NvU32 range_depth, NvU32 all_depth, bool expected_membar)
 {
    NvU32 i;

@@ -502,6 +488,7 @@ static NV_STATUS alloc_adjacent_pde_64k_memory(uvm_gpu_t *gpu)
    return NV_OK;
 }

+
 static NV_STATUS alloc_nearby_pde_64k_memory(uvm_gpu_t *gpu)
 {
    uvm_page_tree_t tree;
@@ -855,7 +842,6 @@ static NV_STATUS get_two_free_apart(uvm_gpu_t *gpu)
    TEST_CHECK_RET(range2.entry_count == 256);
    TEST_CHECK_RET(range2.table->ref_count == 512);
    TEST_CHECK_RET(range1.table == range2.table);
-
    // 4k page is second entry in a dual PDE
    TEST_CHECK_RET(range1.table == tree.root->entries[0]->entries[0]->entries[0]->entries[1]);
    TEST_CHECK_RET(range1.start_index == 256);
@@ -885,7 +871,6 @@ static NV_STATUS get_overlapping_dual_pdes(uvm_gpu_t *gpu)
    MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_64K, size, size, &range64k), NV_OK);
    TEST_CHECK_RET(range64k.entry_count == 16);
    TEST_CHECK_RET(range64k.table->ref_count == 16);
-
    // 4k page is second entry in a dual PDE
    TEST_CHECK_RET(range64k.table == tree.root->entries[0]->entries[0]->entries[0]->entries[0]);
    TEST_CHECK_RET(range64k.start_index == 16);
@@ -1045,13 +1030,10 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)

    // Depth 4
    NvU64 extent_pte = UVM_PAGE_SIZE_2M;
-
    // Depth 3
    NvU64 extent_pde0 = extent_pte * (1ull << 8);
-
    // Depth 2
    NvU64 extent_pde1 = extent_pde0 * (1ull << 9);
-
    // Depth 1
    NvU64 extent_pde2 = extent_pde1 * (1ull << 9);

@@ -1099,11 +1081,7 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
    return status;
 }

-static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree,
-                                                 NvU64 base,
-                                                 NvU64 size,
-                                                 NvU32 min_page_size,
-                                                 NvU32 max_page_size)
+static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree, NvU64 base, NvU64 size, NvU32 min_page_size, NvU32 max_page_size)
 {
    NV_STATUS status = NV_OK;
    uvm_push_t push;
@@ -1227,11 +1205,7 @@ static bool assert_range_vec_ptes(uvm_page_table_range_vec_t *range_vec, bool ex
            NvU64 expected_pte = expecting_cleared ? 0 : range_vec->size + offset;
            if (*pte != expected_pte) {
                UVM_TEST_PRINT("PTE is 0x%llx instead of 0x%llx for offset 0x%llx within range [0x%llx, 0x%llx)\n",
-                               *pte,
-                               expected_pte,
-                               offset,
-                               range_vec->start,
-                               range_vec->size);
+                        *pte, expected_pte, offset, range_vec->start, range_vec->size);
                return false;
            }
            offset += range_vec->page_size;
@@ -1252,11 +1226,7 @@ static NV_STATUS test_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec
    TEST_CHECK_RET(data.status == NV_OK);
    TEST_CHECK_RET(data.count == range_vec->size / range_vec->page_size);
    TEST_CHECK_RET(assert_invalidate_range_specific(g_last_fake_inval,
-                                                    range_vec->start,
-                                                    range_vec->size,
-                                                    range_vec->page_size,
-                                                    page_table_depth,
-                                                    membar != UVM_MEMBAR_NONE));
+            range_vec->start, range_vec->size, range_vec->page_size, page_table_depth, membar != UVM_MEMBAR_NONE));
    TEST_CHECK_RET(assert_range_vec_ptes(range_vec, false));

    fake_tlb_invals_disable();
@@ -1279,11 +1249,7 @@ static NV_STATUS test_range_vec_clear_ptes(uvm_page_table_range_vec_t *range_vec
    return NV_OK;
 }

-static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
-                                       NvU64 start,
-                                       NvU64 size,
-                                       NvU32 page_size,
-                                       uvm_page_table_range_vec_t **range_vec_out)
+static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree, NvU64 start, NvU64 size, NvU32 page_size, uvm_page_table_range_vec_t **range_vec_out)
 {
    uvm_page_table_range_vec_t *range_vec;
    uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;
@@ -1586,17 +1552,17 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)

        memset(phys_allocs, 0, sizeof(phys_allocs));

-        hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
+        hal->make_pde(&pde_bits, phys_allocs, 0);
        TEST_CHECK_RET(pde_bits == 0x0L);

        phys_allocs[0] = &alloc_sys;
        phys_allocs[1] = &alloc_vid;
-        hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
+        hal->make_pde(&pde_bits, phys_allocs, 0);
        TEST_CHECK_RET(pde_bits == 0x1BBBBBBD99999992LL);

        phys_allocs[0] = &alloc_vid;
        phys_allocs[1] = &alloc_sys;
-        hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
+        hal->make_pde(&pde_bits, phys_allocs, 0);
        TEST_CHECK_RET(pde_bits == 0x9999999E1BBBBBB1LL);

        for (j = 0; j <= 2; j++) {
@@ -1666,7 +1632,6 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
    uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
    uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
    uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
-
    // big versions have [11:8] set as well to test the page table merging
    uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999900LL);
    uvm_mmu_page_table_alloc_t alloc_big_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBBB00LL);
@@ -1674,31 +1639,31 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
    uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);

    // Make sure cleared PDEs work as expected
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 0);
    TEST_CHECK_RET(pde_bits[0] == 0);

    memset(pde_bits, 0xFF, sizeof(pde_bits));
-    hal->make_pde(pde_bits, phys_allocs, 3, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 3);
    TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);

    // Sys and vidmem PDEs
    phys_allocs[0] = &alloc_sys;
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 0);
    TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);

    phys_allocs[0] = &alloc_vid;
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 0);
    TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);

    // Dual PDEs
    phys_allocs[0] = &alloc_big_sys;
    phys_allocs[1] = &alloc_vid;
-    hal->make_pde(pde_bits, phys_allocs, 3, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 3);
    TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);

    phys_allocs[0] = &alloc_big_vid;
    phys_allocs[1] = &alloc_sys;
-    hal->make_pde(pde_bits, phys_allocs, 3, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 3);
    TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);

    // uncached, i.e., the sysmem data is not cached in GPU's L2 cache. Clear
@@ -1762,36 +1727,36 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
    uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);

    // Make sure cleared PDEs work as expected
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 0);
    TEST_CHECK_RET(pde_bits[0] == 0);

    memset(pde_bits, 0xFF, sizeof(pde_bits));
-    hal->make_pde(pde_bits, phys_allocs, 3, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 3);
    TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);

    // Sys and vidmem PDEs
    phys_allocs[0] = &alloc_sys;
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 0);
    TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);

    phys_allocs[0] = &alloc_vid;
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 0);
    TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);

    // Dual PDEs
    phys_allocs[0] = &alloc_big_sys;
    phys_allocs[1] = &alloc_vid;
-    hal->make_pde(pde_bits, phys_allocs, 3, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 3);
    TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);

    phys_allocs[0] = &alloc_big_vid;
    phys_allocs[1] = &alloc_sys;
-    hal->make_pde(pde_bits, phys_allocs, 3, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 3);
    TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);

    // NO_ATS PDE1 (depth 2)
    phys_allocs[0] = &alloc_vid;
-    hal->make_pde(pde_bits, phys_allocs, 2, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 2);
    if (g_uvm_global.ats.enabled)
        TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB2A);
    else
@@ -1840,32 +1805,32 @@ static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func ent
    uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);

    // Make sure cleared PDEs work as expected
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 0);
    TEST_CHECK_RET(pde_bits[0] == 0);

    // Cleared PDEs work as expected for big and small PDEs.
    memset(pde_bits, 0xFF, sizeof(pde_bits));
-    hal->make_pde(pde_bits, phys_allocs, 4, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 4);
    TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);

    // Sys and vidmem PDEs, uncached ATS allowed.
    phys_allocs[0] = &alloc_sys;
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 0);
    TEST_CHECK_RET(pde_bits[0] == 0x999999999900C);

    phys_allocs[0] = &alloc_vid;
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 0);
    TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBB00A);

    // Dual PDEs, uncached.
    phys_allocs[0] = &alloc_big_sys;
    phys_allocs[1] = &alloc_vid;
-    hal->make_pde(pde_bits, phys_allocs, 4, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 4);
    TEST_CHECK_RET(pde_bits[0] == 0x999999999991C && pde_bits[1] == 0xBBBBBBB01A);

    phys_allocs[0] = &alloc_big_vid;
    phys_allocs[1] = &alloc_sys;
-    hal->make_pde(pde_bits, phys_allocs, 4, NULL);
+    hal->make_pde(pde_bits, phys_allocs, 4);
    TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBBB1A && pde_bits[1] == 0x999999999901C);

    // uncached, i.e., the sysmem data is not cached in GPU's L2 cache, and
@@ -2338,8 +2303,7 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
    gpu->parent = parent_gpu;

    // At least test_tlb_invalidates() relies on global state
-    // (g_tlb_invalidate_*) so make sure only one test instance can run at a
-    // time.
+    // (g_tlb_invalidate_*) so make sure only one test instance can run at a time.
    uvm_mutex_lock(&g_uvm_global.global_lock);

    // Allocate the fake TLB tracking state. Notably tests still need to enable