mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-02-07 00:29:58 +00:00
535.43.24
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -146,9 +146,15 @@ static void fake_tlb_invals_disable(void)
|
||||
g_fake_tlb_invals_tracking_enabled = false;
|
||||
}
|
||||
|
||||
// Fake TLB invalidate VA that just saves off the parameters so that they can be verified later
|
||||
static void fake_tlb_invalidate_va(uvm_push_t *push, uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth, NvU64 base, NvU64 size, NvU32 page_size, uvm_membar_t membar)
|
||||
// Fake TLB invalidate VA that just saves off the parameters so that they can be
|
||||
// verified later.
|
||||
static void fake_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
if (!g_fake_tlb_invals_tracking_enabled)
|
||||
return;
|
||||
@@ -210,8 +216,8 @@ static bool assert_and_reset_last_invalidate(NvU32 expected_depth, bool expected
|
||||
}
|
||||
if ((g_last_fake_inval->membar == UVM_MEMBAR_NONE) == expected_membar) {
|
||||
UVM_TEST_PRINT("Expected %s membar, got %s instead\n",
|
||||
expected_membar ? "a" : "no",
|
||||
uvm_membar_string(g_last_fake_inval->membar));
|
||||
expected_membar ? "a" : "no",
|
||||
uvm_membar_string(g_last_fake_inval->membar));
|
||||
result = false;
|
||||
}
|
||||
|
||||
@@ -230,7 +236,8 @@ static bool assert_last_invalidate_all(NvU32 expected_depth, bool expected_memba
|
||||
}
|
||||
if (g_last_fake_inval->base != 0 || g_last_fake_inval->size != -1) {
|
||||
UVM_TEST_PRINT("Expected invalidate all but got range [0x%llx, 0x%llx) instead\n",
|
||||
g_last_fake_inval->base, g_last_fake_inval->base + g_last_fake_inval->size);
|
||||
g_last_fake_inval->base,
|
||||
g_last_fake_inval->base + g_last_fake_inval->size);
|
||||
return false;
|
||||
}
|
||||
if (g_last_fake_inval->depth != expected_depth) {
|
||||
@@ -247,15 +254,16 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
|
||||
UVM_ASSERT(g_fake_tlb_invals_tracking_enabled);
|
||||
|
||||
if (g_fake_invals_count == 0) {
|
||||
UVM_TEST_PRINT("Expected an invalidate for range [0x%llx, 0x%llx), but got none\n",
|
||||
base, base + size);
|
||||
UVM_TEST_PRINT("Expected an invalidate for range [0x%llx, 0x%llx), but got none\n", base, base + size);
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((inval->base != base || inval->size != size) && inval->base != 0 && inval->size != -1) {
|
||||
UVM_TEST_PRINT("Expected invalidate range [0x%llx, 0x%llx), but got range [0x%llx, 0x%llx) instead\n",
|
||||
base, base + size,
|
||||
inval->base, inval->base + inval->size);
|
||||
base,
|
||||
base + size,
|
||||
inval->base,
|
||||
inval->base + inval->size);
|
||||
return false;
|
||||
}
|
||||
if (inval->depth != expected_depth) {
|
||||
@@ -270,7 +278,13 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool assert_invalidate_range(NvU64 base, NvU64 size, NvU32 page_size, bool allow_inval_all, NvU32 range_depth, NvU32 all_depth, bool expected_membar)
|
||||
static bool assert_invalidate_range(NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
bool allow_inval_all,
|
||||
NvU32 range_depth,
|
||||
NvU32 all_depth,
|
||||
bool expected_membar)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
@@ -488,7 +502,6 @@ static NV_STATUS alloc_adjacent_pde_64k_memory(uvm_gpu_t *gpu)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
|
||||
static NV_STATUS alloc_nearby_pde_64k_memory(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_page_tree_t tree;
|
||||
@@ -842,6 +855,7 @@ static NV_STATUS get_two_free_apart(uvm_gpu_t *gpu)
|
||||
TEST_CHECK_RET(range2.entry_count == 256);
|
||||
TEST_CHECK_RET(range2.table->ref_count == 512);
|
||||
TEST_CHECK_RET(range1.table == range2.table);
|
||||
|
||||
// 4k page is second entry in a dual PDE
|
||||
TEST_CHECK_RET(range1.table == tree.root->entries[0]->entries[0]->entries[0]->entries[1]);
|
||||
TEST_CHECK_RET(range1.start_index == 256);
|
||||
@@ -871,6 +885,7 @@ static NV_STATUS get_overlapping_dual_pdes(uvm_gpu_t *gpu)
|
||||
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_64K, size, size, &range64k), NV_OK);
|
||||
TEST_CHECK_RET(range64k.entry_count == 16);
|
||||
TEST_CHECK_RET(range64k.table->ref_count == 16);
|
||||
|
||||
// 4k page is second entry in a dual PDE
|
||||
TEST_CHECK_RET(range64k.table == tree.root->entries[0]->entries[0]->entries[0]->entries[0]);
|
||||
TEST_CHECK_RET(range64k.start_index == 16);
|
||||
@@ -1030,10 +1045,13 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
|
||||
|
||||
// Depth 4
|
||||
NvU64 extent_pte = UVM_PAGE_SIZE_2M;
|
||||
|
||||
// Depth 3
|
||||
NvU64 extent_pde0 = extent_pte * (1ull << 8);
|
||||
|
||||
// Depth 2
|
||||
NvU64 extent_pde1 = extent_pde0 * (1ull << 9);
|
||||
|
||||
// Depth 1
|
||||
NvU64 extent_pde2 = extent_pde1 * (1ull << 9);
|
||||
|
||||
@@ -1081,7 +1099,11 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree, NvU64 base, NvU64 size, NvU32 min_page_size, NvU32 max_page_size)
|
||||
static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 min_page_size,
|
||||
NvU32 max_page_size)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_push_t push;
|
||||
@@ -1205,7 +1227,11 @@ static bool assert_range_vec_ptes(uvm_page_table_range_vec_t *range_vec, bool ex
|
||||
NvU64 expected_pte = expecting_cleared ? 0 : range_vec->size + offset;
|
||||
if (*pte != expected_pte) {
|
||||
UVM_TEST_PRINT("PTE is 0x%llx instead of 0x%llx for offset 0x%llx within range [0x%llx, 0x%llx)\n",
|
||||
*pte, expected_pte, offset, range_vec->start, range_vec->size);
|
||||
*pte,
|
||||
expected_pte,
|
||||
offset,
|
||||
range_vec->start,
|
||||
range_vec->size);
|
||||
return false;
|
||||
}
|
||||
offset += range_vec->page_size;
|
||||
@@ -1226,7 +1252,11 @@ static NV_STATUS test_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec
|
||||
TEST_CHECK_RET(data.status == NV_OK);
|
||||
TEST_CHECK_RET(data.count == range_vec->size / range_vec->page_size);
|
||||
TEST_CHECK_RET(assert_invalidate_range_specific(g_last_fake_inval,
|
||||
range_vec->start, range_vec->size, range_vec->page_size, page_table_depth, membar != UVM_MEMBAR_NONE));
|
||||
range_vec->start,
|
||||
range_vec->size,
|
||||
range_vec->page_size,
|
||||
page_table_depth,
|
||||
membar != UVM_MEMBAR_NONE));
|
||||
TEST_CHECK_RET(assert_range_vec_ptes(range_vec, false));
|
||||
|
||||
fake_tlb_invals_disable();
|
||||
@@ -1249,7 +1279,11 @@ static NV_STATUS test_range_vec_clear_ptes(uvm_page_table_range_vec_t *range_vec
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree, NvU64 start, NvU64 size, NvU32 page_size, uvm_page_table_range_vec_t **range_vec_out)
|
||||
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
uvm_page_table_range_vec_t **range_vec_out)
|
||||
{
|
||||
uvm_page_table_range_vec_t *range_vec;
|
||||
uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;
|
||||
@@ -1544,25 +1578,28 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999000LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
|
||||
uvm_mmu_mode_hal_t *hal;
|
||||
uvm_page_directory_t dir;
|
||||
NvU32 i, j, big_page_size, page_size;
|
||||
|
||||
dir.depth = 0;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(big_page_sizes); i++) {
|
||||
big_page_size = big_page_sizes[i];
|
||||
hal = gpu->parent->arch_hal->mmu_mode_hal(big_page_size);
|
||||
|
||||
memset(phys_allocs, 0, sizeof(phys_allocs));
|
||||
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(&pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits == 0x0L);
|
||||
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(&pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits == 0x1BBBBBBD99999992LL);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(&pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits == 0x9999999E1BBBBBB1LL);
|
||||
|
||||
for (j = 0; j <= 2; j++) {
|
||||
@@ -1632,38 +1669,47 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
|
||||
uvm_page_directory_t dir;
|
||||
|
||||
// big versions have [11:8] set as well to test the page table merging
|
||||
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999900LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_big_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBBB00LL);
|
||||
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
dir.index_in_parent = 0;
|
||||
dir.host_parent = NULL;
|
||||
dir.depth = 0;
|
||||
|
||||
// Make sure cleared PDEs work as expected
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0);
|
||||
|
||||
memset(pde_bits, 0xFF, sizeof(pde_bits));
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
dir.depth = 3;
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
|
||||
|
||||
// Sys and vidmem PDEs
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
dir.depth = 0;
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);
|
||||
|
||||
// Dual PDEs
|
||||
phys_allocs[0] = &alloc_big_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
dir.depth = 3;
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);
|
||||
|
||||
phys_allocs[0] = &alloc_big_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);
|
||||
|
||||
// uncached, i.e., the sysmem data is not cached in GPU's L2 cache. Clear
|
||||
@@ -1719,6 +1765,7 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
|
||||
uvm_page_directory_t dir;
|
||||
|
||||
// big versions have [11:8] set as well to test the page table merging
|
||||
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999900LL);
|
||||
@@ -1726,37 +1773,45 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
|
||||
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
dir.index_in_parent = 0;
|
||||
dir.host_parent = NULL;
|
||||
dir.depth = 0;
|
||||
|
||||
// Make sure cleared PDEs work as expected
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0);
|
||||
|
||||
memset(pde_bits, 0xFF, sizeof(pde_bits));
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
dir.depth = 3;
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
|
||||
|
||||
// Sys and vidmem PDEs
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
dir.depth = 0;
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);
|
||||
|
||||
// Dual PDEs
|
||||
phys_allocs[0] = &alloc_big_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
dir.depth = 3;
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);
|
||||
|
||||
phys_allocs[0] = &alloc_big_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);
|
||||
|
||||
// NO_ATS PDE1 (depth 2)
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 2);
|
||||
dir.depth = 2;
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
if (g_uvm_global.ats.enabled)
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB2A);
|
||||
else
|
||||
@@ -1791,104 +1846,203 @@ static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
|
||||
static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 pde_bits[2];
|
||||
uvm_page_directory_t *dirs[5];
|
||||
size_t i, num_page_sizes;
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999000LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0xBBBBBBB000LL);
|
||||
|
||||
// big versions have [11:8] set as well to test the page table merging
|
||||
// Big versions have [11:8] set as well to test the page table merging
|
||||
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999900LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_big_vid = fake_table_alloc(UVM_APERTURE_VID, 0xBBBBBBBB00LL);
|
||||
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
// Make sure cleared PDEs work as expected
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0);
|
||||
memset(dirs, 0, sizeof(dirs));
|
||||
// Fake directory tree.
|
||||
for (i = 0; i < ARRAY_SIZE(dirs); i++) {
|
||||
dirs[i] = uvm_kvmalloc_zero(sizeof(uvm_page_directory_t) + sizeof(dirs[i]->entries[0]) * 512);
|
||||
TEST_CHECK_GOTO(dirs[i] != NULL, cleanup);
|
||||
|
||||
dirs[i]->depth = i;
|
||||
dirs[i]->index_in_parent = 0;
|
||||
|
||||
if (i == 0)
|
||||
dirs[i]->host_parent = NULL;
|
||||
else
|
||||
dirs[i]->host_parent = dirs[i - 1];
|
||||
}
|
||||
|
||||
// Make sure cleared PDEs work as expected.
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0, cleanup);
|
||||
|
||||
// Cleared PDEs work as expected for big and small PDEs.
|
||||
memset(pde_bits, 0xFF, sizeof(pde_bits));
|
||||
hal->make_pde(pde_bits, phys_allocs, 4);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[4], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0 && pde_bits[1] == 0, cleanup);
|
||||
|
||||
// Sys and vidmem PDEs, uncached ATS allowed.
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x999999999900C);
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999900C, cleanup);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBB00A);
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0xBBBBBBB00A, cleanup);
|
||||
|
||||
// Dual PDEs, uncached.
|
||||
// Dual PDEs, uncached. We don't use child_dir in the depth 4 checks because
|
||||
// our policy decides the PDE's PCF without using it.
|
||||
phys_allocs[0] = &alloc_big_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 4);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x999999999991C && pde_bits[1] == 0xBBBBBBB01A);
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[4], 0);
|
||||
if (g_uvm_global.ats.enabled)
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999991C && pde_bits[1] == 0xBBBBBBB01A, cleanup);
|
||||
else
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999990C && pde_bits[1] == 0xBBBBBBB00A, cleanup);
|
||||
|
||||
phys_allocs[0] = &alloc_big_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 4);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBBB1A && pde_bits[1] == 0x999999999901C);
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[4], 0);
|
||||
if (g_uvm_global.ats.enabled)
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0xBBBBBBBB1A && pde_bits[1] == 0x999999999901C, cleanup);
|
||||
else
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0xBBBBBBBB0A && pde_bits[1] == 0x999999999900C, cleanup);
|
||||
|
||||
// We only need to test make_pde() on ATS when the CPU VA width < GPU's.
|
||||
if (g_uvm_global.ats.enabled && uvm_cpu_num_va_bits() < hal->num_va_bits()) {
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
|
||||
dirs[1]->index_in_parent = 0;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999900C, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 0;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 1;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 1);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 2;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 2);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 511;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 511);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
|
||||
|
||||
dirs[1]->index_in_parent = 1;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[0], 1);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999900C, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 0;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 509;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 509);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 510;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 510);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
|
||||
|
||||
phys_allocs[0] = NULL;
|
||||
|
||||
dirs[1]->index_in_parent = 0;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x0, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 0;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x0, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 2;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 2);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x10, cleanup);
|
||||
|
||||
dirs[1]->index_in_parent = 1;
|
||||
dirs[2]->index_in_parent = 509;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 509);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x10, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 510;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 510);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x0, cleanup);
|
||||
}
|
||||
|
||||
// uncached, i.e., the sysmem data is not cached in GPU's L2 cache, and
|
||||
// access counters disabled.
|
||||
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE_ATOMIC,
|
||||
UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) == 0x999999999968D);
|
||||
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE_ATOMIC,
|
||||
UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) == 0x999999999968D,
|
||||
cleanup);
|
||||
|
||||
// change to cached.
|
||||
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE_ATOMIC,
|
||||
UVM_MMU_PTE_FLAGS_CACHED | UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) ==
|
||||
0x9999999999685);
|
||||
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE_ATOMIC,
|
||||
UVM_MMU_PTE_FLAGS_CACHED | UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) ==
|
||||
0x9999999999685,
|
||||
cleanup);
|
||||
|
||||
// enable access counters.
|
||||
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE_ATOMIC,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999605);
|
||||
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE_ATOMIC,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999605,
|
||||
cleanup);
|
||||
|
||||
// remove atomic
|
||||
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999645);
|
||||
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999645,
|
||||
cleanup);
|
||||
|
||||
// read only
|
||||
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_ONLY,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999665);
|
||||
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_ONLY,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999665,
|
||||
cleanup);
|
||||
|
||||
// local video
|
||||
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_VID,
|
||||
0xBBBBBBB000LL,
|
||||
UVM_PROT_READ_ONLY,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0xBBBBBBB661);
|
||||
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_VID,
|
||||
0xBBBBBBB000LL,
|
||||
UVM_PROT_READ_ONLY,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0xBBBBBBB661,
|
||||
cleanup);
|
||||
|
||||
// peer 1
|
||||
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_PEER_1,
|
||||
0xBBBBBBB000LL,
|
||||
UVM_PROT_READ_ONLY,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x200000BBBBBBB663);
|
||||
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_PEER_1,
|
||||
0xBBBBBBB000LL,
|
||||
UVM_PROT_READ_ONLY,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x200000BBBBBBB663,
|
||||
cleanup);
|
||||
|
||||
// sparse
|
||||
TEST_CHECK_RET(hal->make_sparse_pte() == 0x8);
|
||||
TEST_CHECK_GOTO(hal->make_sparse_pte() == 0x8, cleanup);
|
||||
|
||||
// sked reflected
|
||||
TEST_CHECK_RET(hal->make_sked_reflected_pte() == 0xF09);
|
||||
TEST_CHECK_GOTO(hal->make_sked_reflected_pte() == 0xF09, cleanup);
|
||||
|
||||
num_page_sizes = get_page_sizes(gpu, page_sizes);
|
||||
|
||||
for (i = 0; i < num_page_sizes; i++)
|
||||
TEST_NV_CHECK_RET(entry_test_page_size(gpu, page_sizes[i]));
|
||||
TEST_NV_CHECK_GOTO(entry_test_page_size(gpu, page_sizes[i]), cleanup);
|
||||
|
||||
return NV_OK;
|
||||
cleanup:
|
||||
for (i = 0; i < ARRAY_SIZE(dirs); i++)
|
||||
uvm_kvfree(dirs[i]);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS alloc_4k_maxwell(uvm_gpu_t *gpu)
|
||||
@@ -2303,7 +2457,8 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
|
||||
gpu->parent = parent_gpu;
|
||||
|
||||
// At least test_tlb_invalidates() relies on global state
|
||||
// (g_tlb_invalidate_*) so make sure only one test instance can run at a time.
|
||||
// (g_tlb_invalidate_*) so make sure only one test instance can run at a
|
||||
// time.
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
// Allocate the fake TLB tracking state. Notably tests still need to enable
|
||||
@@ -2311,7 +2466,13 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
|
||||
// calls.
|
||||
TEST_NV_CHECK_GOTO(fake_tlb_invals_alloc(), done);
|
||||
|
||||
TEST_NV_CHECK_GOTO(maxwell_test_page_tree(gpu), done);
|
||||
// We prevent the maxwell_test_page_tree test from running on ATS-enabled
|
||||
// systems. On "fake" Maxwell-based ATS systems pde_fill() may push more
|
||||
// methods than what we support in UVM. Specifically, on
|
||||
// uvm_page_tree_init() which eventually calls phys_mem_init(). On Maxwell,
|
||||
// upper PDE levels have more than 512 entries.
|
||||
if (!g_uvm_global.ats.enabled)
|
||||
TEST_NV_CHECK_GOTO(maxwell_test_page_tree(gpu), done);
|
||||
TEST_NV_CHECK_GOTO(pascal_test_page_tree(gpu), done);
|
||||
TEST_NV_CHECK_GOTO(volta_test_page_tree(gpu), done);
|
||||
TEST_NV_CHECK_GOTO(ampere_test_page_tree(gpu), done);
|
||||
|
||||
Reference in New Issue
Block a user