mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-02-04 23:29:58 +00:00
545.23.06
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2022 NVIDIA Corporation
|
||||
Copyright (c) 2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2022 NVIDIA Corporation
|
||||
Copyright (c) 2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
||||
@@ -247,6 +247,11 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nv_kthread_q_init(nv_kthread_q_t *q, const char *qname)
|
||||
{
|
||||
return nv_kthread_q_init_on_node(q, qname, NV_KTHREAD_NO_NODE);
|
||||
}
|
||||
|
||||
// Returns true (non-zero) if the item was actually scheduled, and false if the
|
||||
// item was already pending in a queue.
|
||||
static int _raw_q_schedule(nv_kthread_q_t *q, nv_kthread_q_item_t *q_item)
|
||||
|
||||
@@ -27,6 +27,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_channel.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_lock.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hal.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_processors.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_allocator.c
|
||||
|
||||
@@ -82,10 +82,12 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_drop
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
|
||||
@@ -99,6 +101,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_arch_invalidate_secondary_tlbs
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
@@ -113,4 +116,3 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
|
||||
|
||||
@@ -24,11 +24,11 @@
|
||||
#include "nvstatus.h"
|
||||
|
||||
#if !defined(NV_PRINTF_STRING_SECTION)
|
||||
#if defined(NVRM) && NVCPU_IS_RISCV64
|
||||
#if defined(NVRM) && NVOS_IS_LIBOS
|
||||
#define NV_PRINTF_STRING_SECTION __attribute__ ((section (".logging")))
|
||||
#else // defined(NVRM) && NVCPU_IS_RISCV64
|
||||
#else // defined(NVRM) && NVOS_IS_LIBOS
|
||||
#define NV_PRINTF_STRING_SECTION
|
||||
#endif // defined(NVRM) && NVCPU_IS_RISCV64
|
||||
#endif // defined(NVRM) && NVOS_IS_LIBOS
|
||||
#endif // !defined(NV_PRINTF_STRING_SECTION)
|
||||
|
||||
/*
|
||||
|
||||
@@ -571,7 +571,6 @@ static void uvm_vm_open_managed_entry(struct vm_area_struct *vma)
|
||||
static void uvm_vm_close_managed(struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_processor_id_t gpu_id;
|
||||
bool make_zombie = false;
|
||||
|
||||
if (current->mm != NULL)
|
||||
@@ -606,12 +605,6 @@ static void uvm_vm_close_managed(struct vm_area_struct *vma)
|
||||
|
||||
uvm_destroy_vma_managed(vma, make_zombie);
|
||||
|
||||
// Notify GPU address spaces that the fault buffer needs to be flushed to
|
||||
// avoid finding stale entries that can be attributed to new VA ranges
|
||||
// reallocated at the same address.
|
||||
for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpu_va_spaces) {
|
||||
uvm_processor_mask_set_atomic(&va_space->needs_fault_buffer_flush, gpu_id);
|
||||
}
|
||||
uvm_va_space_up_write(va_space);
|
||||
|
||||
if (current->mm != NULL)
|
||||
|
||||
@@ -216,6 +216,10 @@ NV_STATUS UvmDeinitialize(void);
|
||||
// Note that it is not required to release VA ranges that were reserved with
|
||||
// UvmReserveVa().
|
||||
//
|
||||
// This is useful for per-process checkpoint and restore, where kernel-mode
|
||||
// state needs to be reconfigured to match the expectations of a pre-existing
|
||||
// user-mode process.
|
||||
//
|
||||
// UvmReopen() closes the open file returned by UvmGetFileDescriptor() and
|
||||
// replaces it with a new open file with the same name.
|
||||
//
|
||||
|
||||
@@ -114,6 +114,8 @@ static void flush_tlb_write_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
{
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate;
|
||||
|
||||
uvm_ats_smmu_invalidate_tlbs(gpu_va_space, addr, size);
|
||||
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
else
|
||||
@@ -588,4 +590,3 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@@ -29,8 +29,12 @@
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_va_space_mm.h"
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/mmu_context.h>
|
||||
|
||||
// linux/sched/mm.h is needed for mmget_not_zero and mmput to get the mm
|
||||
// reference required for the iommu_sva_bind_device() call. This header is not
|
||||
@@ -46,17 +50,271 @@
|
||||
#define UVM_IOMMU_SVA_BIND_DEVICE(dev, mm) iommu_sva_bind_device(dev, mm)
|
||||
#endif
|
||||
|
||||
// Base address of SMMU CMDQ-V for GSMMU0.
|
||||
#define SMMU_CMDQV_BASE_ADDR(smmu_base) (smmu_base + 0x200000)
|
||||
#define SMMU_CMDQV_BASE_LEN 0x00830000
|
||||
|
||||
// CMDQV configuration is done by firmware but we check status here.
|
||||
#define SMMU_CMDQV_CONFIG 0x0
|
||||
#define SMMU_CMDQV_CONFIG_CMDQV_EN BIT(0)
|
||||
|
||||
// Used to map a particular VCMDQ to a VINTF.
|
||||
#define SMMU_CMDQV_CMDQ_ALLOC_MAP(vcmdq_id) (0x200 + 0x4 * (vcmdq_id))
|
||||
#define SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC BIT(0)
|
||||
|
||||
// Shift for the field containing the index of the virtual interface
|
||||
// owning the VCMDQ.
|
||||
#define SMMU_CMDQV_CMDQ_ALLOC_MAP_VIRT_INTF_INDX_SHIFT 15
|
||||
|
||||
// Base address for the VINTF registers.
|
||||
#define SMMU_VINTF_BASE_ADDR(cmdqv_base_addr, vintf_id) (cmdqv_base_addr + 0x1000 + 0x100 * (vintf_id))
|
||||
|
||||
// Virtual interface (VINTF) configuration registers. The WAR only
|
||||
// works on baremetal so we need to configure ourselves as the
|
||||
// hypervisor owner.
|
||||
#define SMMU_VINTF_CONFIG 0x0
|
||||
#define SMMU_VINTF_CONFIG_ENABLE BIT(0)
|
||||
#define SMMU_VINTF_CONFIG_HYP_OWN BIT(17)
|
||||
|
||||
#define SMMU_VINTF_STATUS 0x0
|
||||
#define SMMU_VINTF_STATUS_ENABLED BIT(0)
|
||||
|
||||
// Caclulates the base address for a particular VCMDQ instance.
|
||||
#define SMMU_VCMDQ_BASE_ADDR(cmdqv_base_addr, vcmdq_id) (cmdqv_base_addr + 0x10000 + 0x80 * (vcmdq_id))
|
||||
|
||||
// SMMU command queue consumer index register. Updated by SMMU
|
||||
// when commands are consumed.
|
||||
#define SMMU_VCMDQ_CONS 0x0
|
||||
|
||||
// SMMU command queue producer index register. Updated by UVM when
|
||||
// commands are added to the queue.
|
||||
#define SMMU_VCMDQ_PROD 0x4
|
||||
|
||||
// Configuration register used to enable a VCMDQ.
|
||||
#define SMMU_VCMDQ_CONFIG 0x8
|
||||
#define SMMU_VCMDQ_CONFIG_ENABLE BIT(0)
|
||||
|
||||
// Status register used to check the VCMDQ is enabled.
|
||||
#define SMMU_VCMDQ_STATUS 0xc
|
||||
#define SMMU_VCMDQ_STATUS_ENABLED BIT(0)
|
||||
|
||||
// Base address offset for the VCMDQ registers.
|
||||
#define SMMU_VCMDQ_CMDQ_BASE 0x10000
|
||||
|
||||
// Size of the command queue. Each command is 8 bytes and we can't
|
||||
// have a command queue greater than one page.
|
||||
#define SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE 9
|
||||
#define SMMU_VCMDQ_CMDQ_ENTRIES (1UL << SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE)
|
||||
|
||||
// We always use VINTF63 for the WAR
|
||||
#define VINTF 63
|
||||
static void smmu_vintf_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
|
||||
{
|
||||
iowrite32(val, SMMU_VINTF_BASE_ADDR(smmu_cmdqv_base, VINTF) + reg);
|
||||
}
|
||||
|
||||
static NvU32 smmu_vintf_read32(void __iomem *smmu_cmdqv_base, int reg)
|
||||
{
|
||||
return ioread32(SMMU_VINTF_BASE_ADDR(smmu_cmdqv_base, VINTF) + reg);
|
||||
}
|
||||
|
||||
// We always use VCMDQ127 for the WAR
|
||||
#define VCMDQ 127
|
||||
void smmu_vcmdq_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
|
||||
{
|
||||
iowrite32(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
|
||||
}
|
||||
|
||||
NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg)
|
||||
{
|
||||
return ioread32(SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
|
||||
}
|
||||
|
||||
static void smmu_vcmdq_write64(void __iomem *smmu_cmdqv_base, int reg, NvU64 val)
|
||||
{
|
||||
iowrite64(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
|
||||
}
|
||||
|
||||
// Fix for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU
|
||||
// TLB invalidates on read-only to read-write upgrades
|
||||
static NV_STATUS uvm_ats_smmu_war_init(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_spin_loop_t spin;
|
||||
NV_STATUS status;
|
||||
unsigned long cmdqv_config;
|
||||
void __iomem *smmu_cmdqv_base;
|
||||
struct acpi_iort_node *node;
|
||||
struct acpi_iort_smmu_v3 *iort_smmu;
|
||||
|
||||
node = *(struct acpi_iort_node **) dev_get_platdata(parent_gpu->pci_dev->dev.iommu->iommu_dev->dev->parent);
|
||||
iort_smmu = (struct acpi_iort_smmu_v3 *) node->node_data;
|
||||
|
||||
smmu_cmdqv_base = ioremap(SMMU_CMDQV_BASE_ADDR(iort_smmu->base_address), SMMU_CMDQV_BASE_LEN);
|
||||
if (!smmu_cmdqv_base)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->smmu_war.smmu_cmdqv_base = smmu_cmdqv_base;
|
||||
cmdqv_config = ioread32(smmu_cmdqv_base + SMMU_CMDQV_CONFIG);
|
||||
if (!(cmdqv_config & SMMU_CMDQV_CONFIG_CMDQV_EN)) {
|
||||
status = NV_ERR_OBJECT_NOT_FOUND;
|
||||
goto out;
|
||||
}
|
||||
|
||||
// Allocate SMMU CMDQ pages for WAR
|
||||
parent_gpu->smmu_war.smmu_cmdq = alloc_page(NV_UVM_GFP_FLAGS | __GFP_ZERO);
|
||||
if (!parent_gpu->smmu_war.smmu_cmdq) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
// Initialise VINTF for the WAR
|
||||
smmu_vintf_write32(smmu_cmdqv_base, SMMU_VINTF_CONFIG, SMMU_VINTF_CONFIG_ENABLE | SMMU_VINTF_CONFIG_HYP_OWN);
|
||||
UVM_SPIN_WHILE(!(smmu_vintf_read32(smmu_cmdqv_base, SMMU_VINTF_STATUS) & SMMU_VINTF_STATUS_ENABLED), &spin);
|
||||
|
||||
// Allocate VCMDQ to VINTF
|
||||
iowrite32((VINTF << SMMU_CMDQV_CMDQ_ALLOC_MAP_VIRT_INTF_INDX_SHIFT) | SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC,
|
||||
smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
|
||||
|
||||
BUILD_BUG_ON((SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE + 3) > PAGE_SHIFT);
|
||||
smmu_vcmdq_write64(smmu_cmdqv_base, SMMU_VCMDQ_CMDQ_BASE,
|
||||
page_to_phys(parent_gpu->smmu_war.smmu_cmdq) | SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE);
|
||||
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONS, 0);
|
||||
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_PROD, 0);
|
||||
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONFIG, SMMU_VCMDQ_CONFIG_ENABLE);
|
||||
UVM_SPIN_WHILE(!(smmu_vcmdq_read32(smmu_cmdqv_base, SMMU_VCMDQ_STATUS) & SMMU_VCMDQ_STATUS_ENABLED), &spin);
|
||||
|
||||
uvm_mutex_init(&parent_gpu->smmu_war.smmu_lock, UVM_LOCK_ORDER_LEAF);
|
||||
parent_gpu->smmu_war.smmu_prod = 0;
|
||||
parent_gpu->smmu_war.smmu_cons = 0;
|
||||
|
||||
return NV_OK;
|
||||
|
||||
out:
|
||||
iounmap(parent_gpu->smmu_war.smmu_cmdqv_base);
|
||||
parent_gpu->smmu_war.smmu_cmdqv_base = NULL;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void uvm_ats_smmu_war_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
void __iomem *smmu_cmdqv_base = parent_gpu->smmu_war.smmu_cmdqv_base;
|
||||
NvU32 cmdq_alloc_map;
|
||||
|
||||
if (parent_gpu->smmu_war.smmu_cmdqv_base) {
|
||||
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONFIG, 0);
|
||||
cmdq_alloc_map = ioread32(smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
|
||||
iowrite32(cmdq_alloc_map & SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC, smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
|
||||
smmu_vintf_write32(smmu_cmdqv_base, SMMU_VINTF_CONFIG, 0);
|
||||
}
|
||||
|
||||
if (parent_gpu->smmu_war.smmu_cmdq)
|
||||
__free_page(parent_gpu->smmu_war.smmu_cmdq);
|
||||
|
||||
if (parent_gpu->smmu_war.smmu_cmdqv_base)
|
||||
iounmap(parent_gpu->smmu_war.smmu_cmdqv_base);
|
||||
}
|
||||
|
||||
// The SMMU on ARM64 can run under different translation regimes depending on
|
||||
// what features the OS and CPU variant support. The CPU for GH180 supports
|
||||
// virtualisation extensions and starts the kernel at EL2 meaning SMMU operates
|
||||
// under the NS-EL2-E2H translation regime. Therefore we need to use the
|
||||
// TLBI_EL2_* commands which invalidate TLB entries created under this
|
||||
// translation regime.
|
||||
#define CMDQ_OP_TLBI_EL2_ASID 0x21;
|
||||
#define CMDQ_OP_TLBI_EL2_VA 0x22;
|
||||
#define CMDQ_OP_CMD_SYNC 0x46
|
||||
|
||||
// Use the same maximum as used for MAX_TLBI_OPS in the upstream
|
||||
// kernel.
|
||||
#define UVM_MAX_TLBI_OPS (1UL << (PAGE_SHIFT - 3))
|
||||
|
||||
#if UVM_ATS_SMMU_WAR_REQUIRED()
|
||||
void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
|
||||
{
|
||||
struct mm_struct *mm = gpu_va_space->va_space->va_space_mm.mm;
|
||||
uvm_parent_gpu_t *parent_gpu = gpu_va_space->gpu->parent;
|
||||
struct {
|
||||
NvU64 low;
|
||||
NvU64 high;
|
||||
} *vcmdq;
|
||||
unsigned long vcmdq_prod;
|
||||
NvU64 end;
|
||||
uvm_spin_loop_t spin;
|
||||
NvU16 asid;
|
||||
|
||||
if (!parent_gpu->smmu_war.smmu_cmdqv_base)
|
||||
return;
|
||||
|
||||
asid = arm64_mm_context_get(mm);
|
||||
vcmdq = kmap(parent_gpu->smmu_war.smmu_cmdq);
|
||||
uvm_mutex_lock(&parent_gpu->smmu_war.smmu_lock);
|
||||
vcmdq_prod = parent_gpu->smmu_war.smmu_prod;
|
||||
|
||||
// Our queue management is very simple. The mutex prevents multiple
|
||||
// producers writing to the queue and all our commands require waiting for
|
||||
// the queue to drain so we know it's empty. If we can't fit enough commands
|
||||
// in the queue we just invalidate the whole ASID.
|
||||
//
|
||||
// The command queue is a cirular buffer with the MSB representing a wrap
|
||||
// bit that must toggle on each wrap. See the SMMU architecture
|
||||
// specification for more details.
|
||||
//
|
||||
// SMMU_VCMDQ_CMDQ_ENTRIES - 1 because we need to leave space for the
|
||||
// CMD_SYNC.
|
||||
if ((size >> PAGE_SHIFT) > min(UVM_MAX_TLBI_OPS, SMMU_VCMDQ_CMDQ_ENTRIES - 1)) {
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_TLBI_EL2_ASID;
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low |= (NvU64) asid << 48;
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = 0;
|
||||
vcmdq_prod++;
|
||||
}
|
||||
else {
|
||||
for (end = addr + size; addr < end; addr += PAGE_SIZE) {
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_TLBI_EL2_VA;
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low |= (NvU64) asid << 48;
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = addr & ~((1UL << 12) - 1);
|
||||
vcmdq_prod++;
|
||||
}
|
||||
}
|
||||
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_CMD_SYNC;
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = 0x0;
|
||||
vcmdq_prod++;
|
||||
|
||||
// MSB is the wrap bit
|
||||
vcmdq_prod &= (1UL << (SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE + 1)) - 1;
|
||||
parent_gpu->smmu_war.smmu_prod = vcmdq_prod;
|
||||
smmu_vcmdq_write32(parent_gpu->smmu_war.smmu_cmdqv_base, SMMU_VCMDQ_PROD, parent_gpu->smmu_war.smmu_prod);
|
||||
|
||||
UVM_SPIN_WHILE(
|
||||
(smmu_vcmdq_read32(parent_gpu->smmu_war.smmu_cmdqv_base, SMMU_VCMDQ_CONS) & GENMASK(19, 0)) != vcmdq_prod,
|
||||
&spin);
|
||||
|
||||
uvm_mutex_unlock(&parent_gpu->smmu_war.smmu_lock);
|
||||
kunmap(parent_gpu->smmu_war.smmu_cmdq);
|
||||
arm64_mm_context_put(mm);
|
||||
}
|
||||
#endif
|
||||
|
||||
NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = iommu_dev_enable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
if (ret)
|
||||
return errno_to_nv_status(ret);
|
||||
|
||||
return errno_to_nv_status(ret);
|
||||
if (UVM_ATS_SMMU_WAR_REQUIRED())
|
||||
return uvm_ats_smmu_war_init(parent_gpu);
|
||||
else
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
if (UVM_ATS_SMMU_WAR_REQUIRED())
|
||||
uvm_ats_smmu_war_deinit(parent_gpu);
|
||||
|
||||
iommu_dev_disable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
}
|
||||
|
||||
|
||||
@@ -53,6 +53,17 @@
|
||||
#define UVM_ATS_SVA_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
// If NV_ARCH_INVALIDATE_SECONDARY_TLBS is defined it means the upstream fix is
|
||||
// in place so no need for the WAR from Bug 4130089: [GH180][r535] WAR for
|
||||
// kernel not issuing SMMU TLB invalidates on read-only
|
||||
#if defined(NV_ARCH_INVALIDATE_SECONDARY_TLBS)
|
||||
#define UVM_ATS_SMMU_WAR_REQUIRED() 0
|
||||
#elif NVCPU_IS_AARCH64
|
||||
#define UVM_ATS_SMMU_WAR_REQUIRED() 1
|
||||
#else
|
||||
#define UVM_ATS_SMMU_WAR_REQUIRED() 0
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int placeholder;
|
||||
@@ -81,6 +92,17 @@ typedef struct
|
||||
|
||||
// LOCKING: None
|
||||
void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// Fix for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU
|
||||
// TLB invalidates on read-only to read-write upgrades
|
||||
#if UVM_ATS_SMMU_WAR_REQUIRED()
|
||||
void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size);
|
||||
#else
|
||||
static void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
|
||||
{
|
||||
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
static NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
@@ -111,6 +133,11 @@ typedef struct
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
|
||||
{
|
||||
|
||||
}
|
||||
#endif // UVM_ATS_SVA_SUPPORTED
|
||||
|
||||
#endif // __UVM_ATS_SVA_H__
|
||||
|
||||
@@ -2683,7 +2683,7 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
// caches vidmem (and sysmem), we place GPFIFO and GPPUT on sysmem to avoid
|
||||
// cache thrash. The memory access latency is reduced, despite the required
|
||||
// access through the bus, because no cache coherence message is exchanged.
|
||||
if (uvm_gpu_is_coherent(gpu->parent)) {
|
||||
if (uvm_parent_gpu_is_coherent(gpu->parent)) {
|
||||
manager->conf.gpfifo_loc = UVM_BUFFER_LOCATION_SYS;
|
||||
|
||||
// On GPUs with limited ESCHED addressing range, e.g., Volta on P9, RM
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2021 NVIDIA Corporation
|
||||
Copyright (c) 2013-2023 NVIDIA Corporation
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
@@ -233,18 +233,6 @@ unsigned uvm_get_stale_thread_id(void)
|
||||
return (unsigned)task_pid_vnr(current);
|
||||
}
|
||||
|
||||
//
|
||||
// A simple security rule for allowing access to UVM user space memory: if you
|
||||
// are the same user as the owner of the memory, or if you are root, then you
|
||||
// are granted access. The idea is to allow debuggers and profilers to work, but
|
||||
// without opening up any security holes.
|
||||
//
|
||||
NvBool uvm_user_id_security_check(uid_t euidTarget)
|
||||
{
|
||||
return (NV_CURRENT_EUID() == euidTarget) ||
|
||||
(UVM_ROOT_UID == euidTarget);
|
||||
}
|
||||
|
||||
void on_uvm_test_fail(void)
|
||||
{
|
||||
(void)NULL;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2021 NVIDIA Corporation
|
||||
Copyright (c) 2013-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -282,9 +282,6 @@ static inline void kmem_cache_destroy_safe(struct kmem_cache **ppCache)
|
||||
}
|
||||
}
|
||||
|
||||
static const uid_t UVM_ROOT_UID = 0;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU64 start_time_ns;
|
||||
@@ -335,7 +332,6 @@ NV_STATUS errno_to_nv_status(int errnoCode);
|
||||
int nv_status_to_errno(NV_STATUS status);
|
||||
unsigned uvm_get_stale_process_id(void);
|
||||
unsigned uvm_get_stale_thread_id(void);
|
||||
NvBool uvm_user_id_security_check(uid_t euidTarget);
|
||||
|
||||
extern int uvm_enable_builtin_tests;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -54,26 +54,23 @@ bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu)
|
||||
return uvm_conf_computing_get_mode(gpu->parent) == UVM_GPU_CONF_COMPUTE_MODE_HCC;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent)
|
||||
NV_STATUS uvm_conf_computing_init_parent_gpu(const uvm_parent_gpu_t *parent)
|
||||
{
|
||||
uvm_gpu_t *first_gpu;
|
||||
UvmGpuConfComputeMode cc, sys_cc;
|
||||
uvm_gpu_t *first;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
// The Confidential Computing state of the GPU should match that of the
|
||||
// system.
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled_parent(parent) == g_uvm_global.conf_computing_enabled);
|
||||
|
||||
// TODO: Bug 2844714: since we have no routine to traverse parent GPUs,
|
||||
// find first child GPU and get its parent.
|
||||
first_gpu = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);
|
||||
if (first_gpu == NULL)
|
||||
return;
|
||||
first = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);
|
||||
if (!first)
|
||||
return NV_OK;
|
||||
|
||||
// All GPUs derive Confidential Computing status from their parent. By
|
||||
// current policy all parent GPUs have identical Confidential Computing
|
||||
// status.
|
||||
UVM_ASSERT(uvm_conf_computing_get_mode(parent) == uvm_conf_computing_get_mode(first_gpu->parent));
|
||||
sys_cc = uvm_conf_computing_get_mode(first->parent);
|
||||
cc = uvm_conf_computing_get_mode(parent);
|
||||
|
||||
return cc == sys_cc ? NV_OK : NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
static void dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
|
||||
|
||||
@@ -60,8 +60,10 @@
|
||||
// UVM_METHOD_SIZE * 2 * 10 = 80.
|
||||
#define UVM_CONF_COMPUTING_SIGN_BUF_MAX_SIZE 80
|
||||
|
||||
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent);
|
||||
|
||||
// All GPUs derive confidential computing status from their parent.
|
||||
// By current policy all parent GPUs have identical confidential
|
||||
// computing status.
|
||||
NV_STATUS uvm_conf_computing_init_parent_gpu(const uvm_parent_gpu_t *parent);
|
||||
bool uvm_conf_computing_mode_enabled_parent(const uvm_parent_gpu_t *parent);
|
||||
bool uvm_conf_computing_mode_enabled(const uvm_gpu_t *gpu);
|
||||
bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu);
|
||||
|
||||
@@ -71,6 +71,11 @@ static void uvm_unregister_callbacks(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void sev_init(const UvmPlatformInfo *platform_info)
|
||||
{
|
||||
g_uvm_global.sev_enabled = platform_info->sevEnabled;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_global_init(void)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@@ -119,7 +124,8 @@ NV_STATUS uvm_global_init(void)
|
||||
|
||||
uvm_ats_init(&platform_info);
|
||||
g_uvm_global.num_simulated_devices = 0;
|
||||
g_uvm_global.conf_computing_enabled = platform_info.confComputingEnabled;
|
||||
|
||||
sev_init(&platform_info);
|
||||
|
||||
status = uvm_gpu_init();
|
||||
if (status != NV_OK) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -143,16 +143,11 @@ struct uvm_global_struct
|
||||
struct page *page;
|
||||
} unload_state;
|
||||
|
||||
// True if the VM has AMD's SEV, or equivalent HW security extensions such
|
||||
// as Intel's TDX, enabled. The flag is always false on the host.
|
||||
//
|
||||
// This value moves in tandem with that of Confidential Computing in the
|
||||
// GPU(s) in all supported configurations, so it is used as a proxy for the
|
||||
// Confidential Computing state.
|
||||
//
|
||||
// This field is set once during global initialization (uvm_global_init),
|
||||
// and can be read afterwards without acquiring any locks.
|
||||
bool conf_computing_enabled;
|
||||
// AMD Secure Encrypted Virtualization (SEV) status. True if VM has SEV
|
||||
// enabled. This field is set once during global initialization
|
||||
// (uvm_global_init), and can be read afterwards without acquiring any
|
||||
// locks.
|
||||
bool sev_enabled;
|
||||
};
|
||||
|
||||
// Initialize global uvm state
|
||||
@@ -238,10 +233,8 @@ static uvm_gpu_t *uvm_gpu_get_by_processor_id(uvm_processor_id_t id)
|
||||
return gpu;
|
||||
}
|
||||
|
||||
static uvmGpuSessionHandle uvm_gpu_session_handle(uvm_gpu_t *gpu)
|
||||
static uvmGpuSessionHandle uvm_global_session_handle(void)
|
||||
{
|
||||
if (gpu->parent->smc.enabled)
|
||||
return gpu->smc.rm_session_handle;
|
||||
return g_uvm_global.rm_session_handle;
|
||||
}
|
||||
|
||||
|
||||
@@ -99,8 +99,8 @@ static void fill_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo *gpu_in
|
||||
parent_gpu->system_bus.link_rate_mbyte_per_s = gpu_info->sysmemLinkRateMBps;
|
||||
|
||||
if (gpu_info->systemMemoryWindowSize > 0) {
|
||||
// memory_window_end is inclusive but uvm_gpu_is_coherent() checks
|
||||
// memory_window_end > memory_window_start as its condition.
|
||||
// memory_window_end is inclusive but uvm_parent_gpu_is_coherent()
|
||||
// checks memory_window_end > memory_window_start as its condition.
|
||||
UVM_ASSERT(gpu_info->systemMemoryWindowSize > 1);
|
||||
parent_gpu->system_bus.memory_window_start = gpu_info->systemMemoryWindowStart;
|
||||
parent_gpu->system_bus.memory_window_end = gpu_info->systemMemoryWindowStart +
|
||||
@@ -136,12 +136,12 @@ static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
|
||||
return status;
|
||||
|
||||
if (gpu_caps.numaEnabled) {
|
||||
UVM_ASSERT(uvm_gpu_is_coherent(gpu->parent));
|
||||
UVM_ASSERT(uvm_parent_gpu_is_coherent(gpu->parent));
|
||||
gpu->mem_info.numa.enabled = true;
|
||||
gpu->mem_info.numa.node_id = gpu_caps.numaNodeId;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(!uvm_gpu_is_coherent(gpu->parent));
|
||||
UVM_ASSERT(!uvm_parent_gpu_is_coherent(gpu->parent));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
@@ -1089,7 +1089,7 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceDeviceCreate(g_uvm_global.rm_session_handle,
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceDeviceCreate(uvm_global_session_handle(),
|
||||
gpu_info,
|
||||
gpu_uuid,
|
||||
&parent_gpu->rm_device,
|
||||
@@ -1099,7 +1099,12 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_conf_computing_check_parent_gpu(parent_gpu);
|
||||
status = uvm_conf_computing_init_parent_gpu(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Confidential computing: %s, GPU %s\n",
|
||||
nvstatusToString(status), parent_gpu->name);
|
||||
return status;
|
||||
}
|
||||
|
||||
parent_gpu->pci_dev = gpu_platform_info->pci_dev;
|
||||
parent_gpu->closest_cpu_numa_node = dev_to_node(&parent_gpu->pci_dev->dev);
|
||||
@@ -1161,19 +1166,8 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
// Presently, an RM client can only subscribe to a single partition per
|
||||
// GPU. Therefore, UVM needs to create several RM clients. For simplicity,
|
||||
// and since P2P is not supported when SMC partitions are created, we
|
||||
// create a client (session) per GPU partition.
|
||||
if (gpu->parent->smc.enabled) {
|
||||
UvmPlatformInfo platform_info;
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceSessionCreate(&gpu->smc.rm_session_handle, &platform_info));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Creating RM session failed: %s\n", nvstatusToString(status));
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceDeviceCreate(uvm_gpu_session_handle(gpu),
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceDeviceCreate(uvm_global_session_handle(),
|
||||
gpu_info,
|
||||
uvm_gpu_uuid(gpu),
|
||||
&gpu->smc.rm_device,
|
||||
@@ -1543,9 +1537,6 @@ static void deinit_gpu(uvm_gpu_t *gpu)
|
||||
if (gpu->parent->smc.enabled) {
|
||||
if (gpu->smc.rm_device != 0)
|
||||
uvm_rm_locked_call_void(nvUvmInterfaceDeviceDestroy(gpu->smc.rm_device));
|
||||
|
||||
if (gpu->smc.rm_session_handle != 0)
|
||||
uvm_rm_locked_call_void(nvUvmInterfaceSessionDestroy(gpu->smc.rm_session_handle));
|
||||
}
|
||||
|
||||
gpu->magic = 0;
|
||||
@@ -2575,7 +2566,7 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
uvm_mmu_destroy_peer_identity_mappings(gpu0, gpu1);
|
||||
uvm_mmu_destroy_peer_identity_mappings(gpu1, gpu0);
|
||||
|
||||
uvm_rm_locked_call_void(nvUvmInterfaceP2pObjectDestroy(uvm_gpu_session_handle(gpu0), p2p_handle));
|
||||
uvm_rm_locked_call_void(nvUvmInterfaceP2pObjectDestroy(uvm_global_session_handle(), p2p_handle));
|
||||
|
||||
UVM_ASSERT(uvm_gpu_get(gpu0->global_id) == gpu0);
|
||||
UVM_ASSERT(uvm_gpu_get(gpu1->global_id) == gpu1);
|
||||
@@ -2701,9 +2692,9 @@ uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_p
|
||||
return id;
|
||||
}
|
||||
|
||||
uvm_gpu_peer_t *uvm_gpu_index_peer_caps(const uvm_gpu_id_t gpu_id1, const uvm_gpu_id_t gpu_id2)
|
||||
uvm_gpu_peer_t *uvm_gpu_index_peer_caps(const uvm_gpu_id_t gpu_id0, const uvm_gpu_id_t gpu_id1)
|
||||
{
|
||||
NvU32 table_index = uvm_gpu_peer_table_index(gpu_id1, gpu_id2);
|
||||
NvU32 table_index = uvm_gpu_peer_table_index(gpu_id0, gpu_id1);
|
||||
return &g_uvm_global.peers[table_index];
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -167,7 +167,7 @@ struct uvm_service_block_context_struct
|
||||
} per_processor_masks[UVM_ID_MAX_PROCESSORS];
|
||||
|
||||
// State used by the VA block routines called by the servicing routine
|
||||
uvm_va_block_context_t block_context;
|
||||
uvm_va_block_context_t *block_context;
|
||||
|
||||
// Prefetch state hint
|
||||
uvm_perf_prefetch_hint_t prefetch_hint;
|
||||
@@ -263,7 +263,10 @@ struct uvm_fault_service_batch_context_struct
|
||||
|
||||
NvU32 num_coalesced_faults;
|
||||
|
||||
bool has_fatal_faults;
|
||||
// One of the VA spaces in this batch which had fatal faults. If NULL, no
|
||||
// faults were fatal. More than one VA space could have fatal faults, but we
|
||||
// pick one to be the target of the cancel sequence.
|
||||
uvm_va_space_t *fatal_va_space;
|
||||
|
||||
bool has_throttled_faults;
|
||||
|
||||
@@ -825,8 +828,6 @@ struct uvm_gpu_struct
|
||||
{
|
||||
NvU32 swizz_id;
|
||||
|
||||
uvmGpuSessionHandle rm_session_handle;
|
||||
|
||||
// RM device handle used in many of the UVM/RM APIs.
|
||||
//
|
||||
// Do not read this field directly, use uvm_gpu_device_handle instead.
|
||||
@@ -1162,6 +1163,16 @@ struct uvm_parent_gpu_struct
|
||||
NvU64 memory_window_start;
|
||||
NvU64 memory_window_end;
|
||||
} system_bus;
|
||||
|
||||
// WAR to issue ATS TLB invalidation commands ourselves.
|
||||
struct
|
||||
{
|
||||
uvm_mutex_t smmu_lock;
|
||||
struct page *smmu_cmdq;
|
||||
void __iomem *smmu_cmdqv_base;
|
||||
unsigned long smmu_prod;
|
||||
unsigned long smmu_cons;
|
||||
} smmu_war;
|
||||
};
|
||||
|
||||
static const char *uvm_gpu_name(uvm_gpu_t *gpu)
|
||||
@@ -1336,7 +1347,7 @@ static NvU64 uvm_gpu_retained_count(uvm_gpu_t *gpu)
|
||||
void uvm_parent_gpu_kref_put(uvm_parent_gpu_t *gpu);
|
||||
|
||||
// Calculates peer table index using GPU ids.
|
||||
NvU32 uvm_gpu_peer_table_index(uvm_gpu_id_t gpu_id1, uvm_gpu_id_t gpu_id2);
|
||||
NvU32 uvm_gpu_peer_table_index(const uvm_gpu_id_t gpu_id0, const uvm_gpu_id_t gpu_id1);
|
||||
|
||||
// Either retains an existing PCIe peer entry or creates a new one. In both
|
||||
// cases the two GPUs are also each retained.
|
||||
@@ -1355,7 +1366,7 @@ uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu
|
||||
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
|
||||
|
||||
// Get the P2P capabilities between the gpus with the given indexes
|
||||
uvm_gpu_peer_t *uvm_gpu_index_peer_caps(uvm_gpu_id_t gpu_id1, uvm_gpu_id_t gpu_id2);
|
||||
uvm_gpu_peer_t *uvm_gpu_index_peer_caps(const uvm_gpu_id_t gpu_id0, const uvm_gpu_id_t gpu_id1);
|
||||
|
||||
// Get the P2P capabilities between the given gpus
|
||||
static uvm_gpu_peer_t *uvm_gpu_peer_caps(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
|
||||
@@ -1363,10 +1374,10 @@ static uvm_gpu_peer_t *uvm_gpu_peer_caps(const uvm_gpu_t *gpu0, const uvm_gpu_t
|
||||
return uvm_gpu_index_peer_caps(gpu0->id, gpu1->id);
|
||||
}
|
||||
|
||||
static bool uvm_gpus_are_nvswitch_connected(uvm_gpu_t *gpu1, uvm_gpu_t *gpu2)
|
||||
static bool uvm_gpus_are_nvswitch_connected(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
|
||||
{
|
||||
if (gpu1->parent->nvswitch_info.is_nvswitch_connected && gpu2->parent->nvswitch_info.is_nvswitch_connected) {
|
||||
UVM_ASSERT(uvm_gpu_peer_caps(gpu1, gpu2)->link_type >= UVM_GPU_LINK_NVLINK_2);
|
||||
if (gpu0->parent->nvswitch_info.is_nvswitch_connected && gpu1->parent->nvswitch_info.is_nvswitch_connected) {
|
||||
UVM_ASSERT(uvm_gpu_peer_caps(gpu0, gpu1)->link_type >= UVM_GPU_LINK_NVLINK_2);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1511,7 +1522,7 @@ bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
// addresses.
|
||||
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
|
||||
|
||||
static bool uvm_gpu_is_coherent(const uvm_parent_gpu_t *parent_gpu)
|
||||
static bool uvm_parent_gpu_is_coherent(const uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return parent_gpu->system_bus.memory_window_end > parent_gpu->system_bus.memory_window_start;
|
||||
}
|
||||
|
||||
@@ -985,7 +985,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
return NV_OK;
|
||||
|
||||
if (uvm_processor_mask_test(&va_block->resident, processor))
|
||||
residency_mask = uvm_va_block_resident_mask_get(va_block, processor);
|
||||
residency_mask = uvm_va_block_resident_mask_get(va_block, processor, NUMA_NO_NODE);
|
||||
else
|
||||
residency_mask = NULL;
|
||||
|
||||
@@ -1036,8 +1036,8 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
|
||||
// If the underlying VMA is gone, skip HMM migrations.
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = uvm_hmm_find_vma(service_context->block_context.mm,
|
||||
&service_context->block_context.hmm.vma,
|
||||
status = uvm_hmm_find_vma(service_context->block_context->mm,
|
||||
&service_context->block_context->hmm.vma,
|
||||
address);
|
||||
if (status == NV_ERR_INVALID_ADDRESS)
|
||||
continue;
|
||||
@@ -1048,7 +1048,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
policy = uvm_va_policy_get(va_block, address);
|
||||
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&service_context->block_context,
|
||||
service_context->block_context,
|
||||
page_index,
|
||||
processor,
|
||||
uvm_fault_access_type_mask_bit(UVM_FAULT_ACCESS_TYPE_PREFETCH),
|
||||
@@ -1083,7 +1083,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
// Remove pages that are already resident in the destination processors
|
||||
for_each_id_in_mask(id, &update_processors) {
|
||||
bool migrate_pages;
|
||||
uvm_page_mask_t *residency_mask = uvm_va_block_resident_mask_get(va_block, id);
|
||||
uvm_page_mask_t *residency_mask = uvm_va_block_resident_mask_get(va_block, id, NUMA_NO_NODE);
|
||||
UVM_ASSERT(residency_mask);
|
||||
|
||||
migrate_pages = uvm_page_mask_andnot(&service_context->per_processor_masks[uvm_id_value(id)].new_residency,
|
||||
@@ -1101,9 +1101,9 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
if (service_context->block_context.mm) {
|
||||
if (service_context->block_context->mm) {
|
||||
status = uvm_hmm_find_policy_vma_and_outer(va_block,
|
||||
&service_context->block_context.hmm.vma,
|
||||
&service_context->block_context->hmm.vma,
|
||||
first_page_index,
|
||||
&policy,
|
||||
&outer);
|
||||
@@ -1206,7 +1206,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
|
||||
service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
|
||||
service_context->num_retries = 0;
|
||||
service_context->block_context.mm = mm;
|
||||
service_context->block_context->mm = mm;
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_hmm_service_context_init(service_context);
|
||||
|
||||
@@ -292,6 +292,7 @@ NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
char kthread_name[TASK_COMM_LEN + 1];
|
||||
uvm_va_block_context_t *block_context;
|
||||
|
||||
if (parent_gpu->replayable_faults_supported) {
|
||||
status = uvm_gpu_fault_buffer_init(parent_gpu);
|
||||
@@ -311,6 +312,12 @@ NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
if (!parent_gpu->isr.replayable_faults.stats.cpu_exec_count)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
block_context = uvm_va_block_context_alloc(NULL);
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.block_service_context.block_context = block_context;
|
||||
|
||||
parent_gpu->isr.replayable_faults.handling = true;
|
||||
|
||||
snprintf(kthread_name, sizeof(kthread_name), "UVM GPU%u BH", uvm_id_value(parent_gpu->id));
|
||||
@@ -333,6 +340,12 @@ NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
if (!parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
block_context = uvm_va_block_context_alloc(NULL);
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context = block_context;
|
||||
|
||||
parent_gpu->isr.non_replayable_faults.handling = true;
|
||||
|
||||
snprintf(kthread_name, sizeof(kthread_name), "UVM GPU%u KC", uvm_id_value(parent_gpu->id));
|
||||
@@ -356,6 +369,13 @@ NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
return status;
|
||||
}
|
||||
|
||||
block_context = uvm_va_block_context_alloc(NULL);
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context =
|
||||
block_context;
|
||||
|
||||
nv_kthread_q_item_init(&parent_gpu->isr.access_counters.bottom_half_q_item,
|
||||
access_counters_isr_bottom_half_entry,
|
||||
parent_gpu);
|
||||
@@ -410,6 +430,8 @@ void uvm_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_va_block_context_t *block_context;
|
||||
|
||||
// Return ownership to RM:
|
||||
if (parent_gpu->isr.replayable_faults.was_handling) {
|
||||
// No user threads could have anything left on
|
||||
@@ -439,8 +461,18 @@ void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
// It is safe to deinitialize access counters even if they have not been
|
||||
// successfully initialized.
|
||||
uvm_gpu_deinit_access_counters(parent_gpu);
|
||||
block_context =
|
||||
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
}
|
||||
|
||||
if (parent_gpu->non_replayable_faults_supported) {
|
||||
block_context = parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
}
|
||||
|
||||
block_context = parent_gpu->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
uvm_kvfree(parent_gpu->isr.replayable_faults.stats.cpu_exec_count);
|
||||
uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
|
||||
uvm_kvfree(parent_gpu->isr.access_counters.stats.cpu_exec_count);
|
||||
|
||||
@@ -370,7 +370,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Check logical permissions
|
||||
status = uvm_va_block_check_logical_permissions(va_block,
|
||||
&service_context->block_context,
|
||||
service_context->block_context,
|
||||
gpu->id,
|
||||
uvm_va_block_cpu_page_index(va_block,
|
||||
fault_entry->fault_address),
|
||||
@@ -393,7 +393,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Compute new residency and update the masks
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&service_context->block_context,
|
||||
service_context->block_context,
|
||||
page_index,
|
||||
gpu->id,
|
||||
fault_entry->access_type_mask,
|
||||
@@ -629,7 +629,7 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
uvm_gpu_va_space_t *gpu_va_space;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
uvm_va_block_context_t *va_block_context =
|
||||
&gpu->parent->fault_buffer_info.non_replayable.block_service_context.block_context;
|
||||
gpu->parent->fault_buffer_info.non_replayable.block_service_context.block_context;
|
||||
|
||||
status = uvm_gpu_fault_entry_to_va_space(gpu, fault_entry, &va_space);
|
||||
if (status != NV_OK) {
|
||||
@@ -655,7 +655,7 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
// to remain valid until we release. If no mm is registered, we
|
||||
// can only service managed faults, not ATS/HMM faults.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
va_block_context->mm = mm;
|
||||
uvm_va_block_context_init(va_block_context, mm);
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
|
||||
@@ -1180,7 +1180,11 @@ static void mark_fault_fatal(uvm_fault_service_batch_context_t *batch_context,
|
||||
fault_entry->replayable.cancel_va_mode = cancel_va_mode;
|
||||
|
||||
utlb->has_fatal_faults = true;
|
||||
batch_context->has_fatal_faults = true;
|
||||
|
||||
if (!batch_context->fatal_va_space) {
|
||||
UVM_ASSERT(fault_entry->va_space);
|
||||
batch_context->fatal_va_space = fault_entry->va_space;
|
||||
}
|
||||
}
|
||||
|
||||
static void fault_entry_duplicate_flags(uvm_fault_service_batch_context_t *batch_context,
|
||||
@@ -1230,7 +1234,7 @@ static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
|
||||
UvmEventFatalReason fatal_reason;
|
||||
uvm_fault_cancel_va_mode_t cancel_va_mode;
|
||||
uvm_fault_access_type_t ret = UVM_FAULT_ACCESS_TYPE_COUNT;
|
||||
uvm_va_block_context_t *va_block_context = &service_block_context->block_context;
|
||||
uvm_va_block_context_t *va_block_context = service_block_context->block_context;
|
||||
|
||||
perm_status = uvm_va_block_check_logical_permissions(va_block,
|
||||
va_block_context,
|
||||
@@ -1345,7 +1349,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
policy = uvm_hmm_find_policy_end(va_block,
|
||||
block_context->block_context.hmm.vma,
|
||||
block_context->block_context->hmm.vma,
|
||||
ordered_fault_cache[first_fault_index]->fault_address,
|
||||
&end);
|
||||
}
|
||||
@@ -1469,7 +1473,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Compute new residency and update the masks
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&block_context->block_context,
|
||||
block_context->block_context,
|
||||
page_index,
|
||||
gpu->id,
|
||||
service_access_type_mask,
|
||||
@@ -1511,8 +1515,8 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
++block_context->num_retries;
|
||||
|
||||
if (status == NV_OK && batch_context->has_fatal_faults)
|
||||
status = uvm_va_block_set_cancel(va_block, &block_context->block_context, gpu);
|
||||
if (status == NV_OK && batch_context->fatal_va_space)
|
||||
status = uvm_va_block_set_cancel(va_block, block_context->block_context, gpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -1860,7 +1864,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_va_block_context_t *va_block_context =
|
||||
&gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[fault_index];
|
||||
struct mm_struct *mm = va_block_context->mm;
|
||||
NvU64 fault_address = current_entry->fault_address;
|
||||
@@ -1937,14 +1941,198 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
return status;
|
||||
}
|
||||
|
||||
// Called when a fault in the batch has been marked fatal. Flush the buffer
|
||||
// under the VA and mmap locks to remove any potential stale fatal faults, then
|
||||
// service all new faults for just that VA space and cancel those which are
|
||||
// fatal. Faults in other VA spaces are replayed when done and will be processed
|
||||
// when normal fault servicing resumes.
|
||||
static NV_STATUS service_fault_batch_for_cancel(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 i;
|
||||
uvm_va_space_t *va_space = batch_context->fatal_va_space;
|
||||
uvm_gpu_va_space_t *gpu_va_space = NULL;
|
||||
struct mm_struct *mm;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.replayable.block_service_context;
|
||||
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||
|
||||
UVM_ASSERT(gpu->parent->replayable_faults_supported);
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
// Perform the flush and re-fetch while holding the mmap_lock and the
|
||||
// VA space lock. This avoids stale faults because it prevents any vma
|
||||
// modifications (mmap, munmap, mprotect) from happening between the time HW
|
||||
// takes the fault and we cancel it.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
uvm_va_block_context_init(va_block_context, mm);
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// We saw fatal faults in this VA space before. Flush while holding
|
||||
// mmap_lock to make sure those faults come back (aren't stale).
|
||||
//
|
||||
// We need to wait until all old fault messages have arrived before
|
||||
// flushing, hence UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT.
|
||||
status = fault_buffer_flush_locked(gpu,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
|
||||
UVM_FAULT_REPLAY_TYPE_START,
|
||||
batch_context);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// Wait for the flush's replay to finish to give the legitimate faults a
|
||||
// chance to show up in the buffer again.
|
||||
status = uvm_tracker_wait(&replayable_faults->replay_tracker);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// We expect all replayed faults to have arrived in the buffer so we can re-
|
||||
// service them. The replay-and-wait sequence above will ensure they're all
|
||||
// in the HW buffer. When GSP owns the HW buffer, we also have to wait for
|
||||
// GSP to copy all available faults from the HW buffer into the shadow
|
||||
// buffer.
|
||||
//
|
||||
// TODO: Bug 2533557: This flush does not actually guarantee that GSP will
|
||||
// copy over all faults.
|
||||
status = hw_fault_buffer_flush_locked(gpu->parent);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// If there is no GPU VA space for the GPU, ignore all faults in the VA
|
||||
// space. This can happen if the GPU VA space has been destroyed since we
|
||||
// unlocked the VA space in service_fault_batch. That means the fatal faults
|
||||
// are stale, because unregistering the GPU VA space requires preempting the
|
||||
// context and detaching all channels in that VA space. Restart fault
|
||||
// servicing from the top.
|
||||
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
|
||||
if (!gpu_va_space)
|
||||
goto done;
|
||||
|
||||
// Re-parse the new faults
|
||||
batch_context->num_invalid_prefetch_faults = 0;
|
||||
batch_context->num_duplicate_faults = 0;
|
||||
batch_context->num_replays = 0;
|
||||
batch_context->fatal_va_space = NULL;
|
||||
batch_context->has_throttled_faults = false;
|
||||
|
||||
status = fetch_fault_buffer_entries(gpu, batch_context, FAULT_FETCH_MODE_ALL);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// No more faults left. Either the previously-seen fatal entry was stale, or
|
||||
// RM killed the context underneath us.
|
||||
if (batch_context->num_cached_faults == 0)
|
||||
goto done;
|
||||
|
||||
++batch_context->batch_id;
|
||||
|
||||
status = preprocess_fault_batch(gpu, batch_context);
|
||||
if (status != NV_OK) {
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
|
||||
// Another flush happened due to stale faults or a context-fatal
|
||||
// error. The previously-seen fatal fault might not exist anymore,
|
||||
// so restart fault servicing from the top.
|
||||
status = NV_OK;
|
||||
}
|
||||
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Search for the target VA space
|
||||
for (i = 0; i < batch_context->num_coalesced_faults; i++) {
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
|
||||
UVM_ASSERT(current_entry->va_space);
|
||||
if (current_entry->va_space == va_space)
|
||||
break;
|
||||
}
|
||||
|
||||
while (i < batch_context->num_coalesced_faults) {
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
|
||||
|
||||
if (current_entry->va_space != va_space)
|
||||
break;
|
||||
|
||||
// service_fault_batch_dispatch() doesn't expect unserviceable faults.
|
||||
// Just cancel them directly.
|
||||
if (current_entry->is_fatal) {
|
||||
status = cancel_fault_precise_va(gpu, current_entry, UVM_FAULT_CANCEL_VA_MODE_ALL);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
++i;
|
||||
}
|
||||
else {
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
NvU32 block_faults;
|
||||
|
||||
ats_invalidate->write_faults_in_batch = false;
|
||||
uvm_hmm_service_context_init(service_context);
|
||||
|
||||
// Service all the faults that we can. We only really need to search
|
||||
// for fatal faults, but attempting to service all is the easiest
|
||||
// way to do that.
|
||||
status = service_fault_batch_dispatch(va_space, gpu_va_space, batch_context, i, &block_faults, false);
|
||||
if (status != NV_OK) {
|
||||
// TODO: Bug 3900733: clean up locking in service_fault_batch().
|
||||
// We need to drop lock and retry. That means flushing and
|
||||
// starting over.
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
|
||||
status = NV_OK;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// Invalidate TLBs before cancel to ensure that fatal faults don't
|
||||
// get stuck in HW behind non-fatal faults to the same line.
|
||||
status = uvm_ats_invalidate_tlbs(gpu_va_space, ats_invalidate, &batch_context->tracker);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
while (block_faults-- > 0) {
|
||||
current_entry = batch_context->ordered_fault_cache[i];
|
||||
if (current_entry->is_fatal) {
|
||||
status = cancel_fault_precise_va(gpu, current_entry, current_entry->replayable.cancel_va_mode);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
|
||||
if (status == NV_OK) {
|
||||
// There are two reasons to flush the fault buffer here.
|
||||
//
|
||||
// 1) Functional. We need to replay both the serviced non-fatal faults
|
||||
// and the skipped faults in other VA spaces. The former need to be
|
||||
// restarted and the latter need to be replayed so the normal fault
|
||||
// service mechanism can fetch and process them.
|
||||
//
|
||||
// 2) Performance. After cancelling the fatal faults, a flush removes
|
||||
// any potential duplicated fault that may have been added while
|
||||
// processing the faults in this batch. This flush also avoids doing
|
||||
// unnecessary processing after the fatal faults have been cancelled,
|
||||
// so all the rest are unlikely to remain after a replay because the
|
||||
// context is probably in the process of dying.
|
||||
status = fault_buffer_flush_locked(gpu,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
|
||||
UVM_FAULT_REPLAY_TYPE_START,
|
||||
batch_context);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
// Scan the ordered view of faults and group them by different va_blocks
|
||||
// (managed faults) and service faults for each va_block, in batch.
|
||||
// Service non-managed faults one at a time as they are encountered during the
|
||||
// scan.
|
||||
//
|
||||
// This function returns NV_WARN_MORE_PROCESSING_REQUIRED if the fault buffer
|
||||
// was flushed because the needs_fault_buffer_flush flag was set on some GPU VA
|
||||
// space
|
||||
// Fatal faults are marked for later processing by the caller.
|
||||
static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
fault_service_mode_t service_mode,
|
||||
uvm_fault_service_batch_context_t *batch_context)
|
||||
@@ -1959,7 +2147,7 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
|
||||
uvm_service_block_context_t *service_context =
|
||||
&gpu->parent->fault_buffer_info.replayable.block_service_context;
|
||||
uvm_va_block_context_t *va_block_context = &service_context->block_context;
|
||||
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||
|
||||
UVM_ASSERT(gpu->parent->replayable_faults_supported);
|
||||
|
||||
@@ -1995,41 +2183,28 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
// to remain valid until we release. If no mm is registered, we
|
||||
// can only service managed faults, not ATS/HMM faults.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
va_block_context->mm = mm;
|
||||
uvm_va_block_context_init(va_block_context, mm);
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
|
||||
if (uvm_processor_mask_test_and_clear_atomic(&va_space->needs_fault_buffer_flush, gpu->id)) {
|
||||
status = fault_buffer_flush_locked(gpu,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
|
||||
UVM_FAULT_REPLAY_TYPE_START,
|
||||
batch_context);
|
||||
if (status == NV_OK)
|
||||
status = NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// The case where there is no valid GPU VA space for the GPU in this
|
||||
// VA space is handled next
|
||||
}
|
||||
|
||||
// Some faults could be already fatal if they cannot be handled by
|
||||
// the UVM driver
|
||||
if (current_entry->is_fatal) {
|
||||
++i;
|
||||
batch_context->has_fatal_faults = true;
|
||||
if (!batch_context->fatal_va_space)
|
||||
batch_context->fatal_va_space = va_space;
|
||||
|
||||
utlb->has_fatal_faults = true;
|
||||
UVM_ASSERT(utlb->num_pending_faults > 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id)) {
|
||||
if (!gpu_va_space) {
|
||||
// If there is no GPU VA space for the GPU, ignore the fault. This
|
||||
// can happen if a GPU VA space is destroyed without explicitly
|
||||
// freeing all memory ranges (destroying the VA range triggers a
|
||||
// flush of the fault buffer) and there are stale entries in the
|
||||
// freeing all memory ranges and there are stale entries in the
|
||||
// buffer that got fixed by the servicing in a previous batch.
|
||||
++i;
|
||||
continue;
|
||||
@@ -2057,7 +2232,7 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
i += block_faults;
|
||||
|
||||
// Don't issue replays in cancel mode
|
||||
if (replay_per_va_block && !batch_context->has_fatal_faults) {
|
||||
if (replay_per_va_block && !batch_context->fatal_va_space) {
|
||||
status = push_replay_on_gpu(gpu, UVM_FAULT_REPLAY_TYPE_START, batch_context);
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
@@ -2069,8 +2244,6 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
}
|
||||
}
|
||||
|
||||
// Only clobber status if invalidate_status != NV_OK, since status may also
|
||||
// contain NV_WARN_MORE_PROCESSING_REQUIRED.
|
||||
if (va_space != NULL) {
|
||||
NV_STATUS invalidate_status = uvm_ats_invalidate_tlbs(gpu_va_space, ats_invalidate, &batch_context->tracker);
|
||||
if (invalidate_status != NV_OK)
|
||||
@@ -2278,64 +2451,6 @@ static NvU32 is_fatal_fault_in_buffer(uvm_fault_service_batch_context_t *batch_c
|
||||
return false;
|
||||
}
|
||||
|
||||
// Cancel just the faults flagged as fatal in the given fault service batch
|
||||
// context.
|
||||
static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS fault_status;
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(gpu->parent->fault_cancel_va_supported);
|
||||
|
||||
for (i = 0; i < batch_context->num_coalesced_faults; ++i) {
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
|
||||
|
||||
UVM_ASSERT(current_entry->va_space);
|
||||
|
||||
if (current_entry->va_space != va_space) {
|
||||
// Fault on a different va_space, drop the lock of the old one...
|
||||
if (va_space != NULL)
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
va_space = current_entry->va_space;
|
||||
|
||||
// ... and take the lock of the new one
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// We don't need to check whether a buffer flush is required
|
||||
// (due to VA range destruction). Once a fault is flagged as fatal
|
||||
// we need to cancel it, even if its VA range no longer exists.
|
||||
}
|
||||
|
||||
// See the comment for the same check in cancel_faults_all
|
||||
if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id))
|
||||
continue;
|
||||
|
||||
if (current_entry->is_fatal) {
|
||||
status = cancel_fault_precise_va(gpu, current_entry, current_entry->replayable.cancel_va_mode);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (va_space != NULL)
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
// See the comment on flushing in cancel_faults_all
|
||||
fault_status = fault_buffer_flush_locked(gpu,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
|
||||
UVM_FAULT_REPLAY_TYPE_START,
|
||||
batch_context);
|
||||
|
||||
// We report the first encountered error.
|
||||
if (status == NV_OK)
|
||||
status = fault_status;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Cancel all faults in the given fault service batch context, even those not
|
||||
// marked as fatal.
|
||||
static NV_STATUS cancel_faults_all(uvm_gpu_t *gpu,
|
||||
@@ -2344,56 +2459,51 @@ static NV_STATUS cancel_faults_all(uvm_gpu_t *gpu,
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS fault_status;
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
NvU32 i;
|
||||
NvU32 i = 0;
|
||||
|
||||
UVM_ASSERT(gpu->parent->fault_cancel_va_supported);
|
||||
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
|
||||
|
||||
for (i = 0; i < batch_context->num_coalesced_faults; ++i) {
|
||||
while (i < batch_context->num_coalesced_faults && status == NV_OK) {
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
|
||||
uvm_fault_cancel_va_mode_t cancel_va_mode;
|
||||
uvm_va_space_t *va_space = current_entry->va_space;
|
||||
bool skip_va_space;
|
||||
|
||||
UVM_ASSERT(current_entry->va_space);
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
if (current_entry->va_space != va_space) {
|
||||
// Fault on a different va_space, drop the lock of the old one...
|
||||
if (va_space != NULL)
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
va_space = current_entry->va_space;
|
||||
// If there is no GPU VA space for the GPU, ignore all faults in
|
||||
// that VA space. This can happen if the GPU VA space has been
|
||||
// destroyed since we unlocked the VA space in service_fault_batch.
|
||||
// Ignoring the fault avoids targetting a PDB that might have been
|
||||
// reused by another process.
|
||||
skip_va_space = !uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
|
||||
|
||||
// ... and take the lock of the new one
|
||||
uvm_va_space_down_read(va_space);
|
||||
for (;
|
||||
i < batch_context->num_coalesced_faults && current_entry->va_space == va_space;
|
||||
current_entry = batch_context->ordered_fault_cache[++i]) {
|
||||
uvm_fault_cancel_va_mode_t cancel_va_mode;
|
||||
|
||||
if (skip_va_space)
|
||||
continue;
|
||||
|
||||
if (current_entry->is_fatal) {
|
||||
UVM_ASSERT(current_entry->fatal_reason != UvmEventFatalReasonInvalid);
|
||||
cancel_va_mode = current_entry->replayable.cancel_va_mode;
|
||||
}
|
||||
else {
|
||||
current_entry->fatal_reason = reason;
|
||||
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
}
|
||||
|
||||
status = cancel_fault_precise_va(gpu, current_entry, cancel_va_mode);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id)) {
|
||||
// If there is no GPU VA space for the GPU, ignore the fault.
|
||||
// This can happen if the GPU VA did not exist in
|
||||
// service_fault_batch(), or it was destroyed since then.
|
||||
// This is to avoid targetting a PDB that might have been reused
|
||||
// by another process.
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the fault was already marked fatal, use its reason and cancel
|
||||
// mode. Otherwise use the provided reason.
|
||||
if (current_entry->is_fatal) {
|
||||
UVM_ASSERT(current_entry->fatal_reason != UvmEventFatalReasonInvalid);
|
||||
cancel_va_mode = current_entry->replayable.cancel_va_mode;
|
||||
}
|
||||
else {
|
||||
current_entry->fatal_reason = reason;
|
||||
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
}
|
||||
|
||||
status = cancel_fault_precise_va(gpu, current_entry, cancel_va_mode);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
if (va_space != NULL)
|
||||
uvm_va_space_up_read(va_space);
|
||||
}
|
||||
|
||||
// Because each cancel itself triggers a replay, there may be a large number
|
||||
// of new duplicated faults in the buffer after cancelling all the known
|
||||
@@ -2537,7 +2647,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
|
||||
batch_context->num_invalid_prefetch_faults = 0;
|
||||
batch_context->num_replays = 0;
|
||||
batch_context->has_fatal_faults = false;
|
||||
batch_context->fatal_va_space = NULL;
|
||||
batch_context->has_throttled_faults = false;
|
||||
|
||||
// 5) Fetch all faults from buffer
|
||||
@@ -2584,9 +2694,6 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
// 8) Service all non-fatal faults and mark all non-serviceable faults
|
||||
// as fatal
|
||||
status = service_fault_batch(gpu, FAULT_SERVICE_MODE_CANCEL, batch_context);
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
|
||||
continue;
|
||||
|
||||
UVM_ASSERT(batch_context->num_replays == 0);
|
||||
if (status == NV_ERR_NO_MEMORY)
|
||||
continue;
|
||||
@@ -2594,7 +2701,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
break;
|
||||
|
||||
// No more fatal faults left, we are done
|
||||
if (!batch_context->has_fatal_faults)
|
||||
if (!batch_context->fatal_va_space)
|
||||
break;
|
||||
|
||||
// 9) Search for uTLBs that contain fatal faults and meet the
|
||||
@@ -2616,9 +2723,9 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
|
||||
static NV_STATUS cancel_faults_precise(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
|
||||
{
|
||||
UVM_ASSERT(batch_context->has_fatal_faults);
|
||||
UVM_ASSERT(batch_context->fatal_va_space);
|
||||
if (gpu->parent->fault_cancel_va_supported)
|
||||
return cancel_faults_precise_va(gpu, batch_context);
|
||||
return service_fault_batch_for_cancel(gpu, batch_context);
|
||||
|
||||
return cancel_faults_precise_tlb(gpu, batch_context);
|
||||
}
|
||||
@@ -2674,7 +2781,7 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
|
||||
batch_context->num_invalid_prefetch_faults = 0;
|
||||
batch_context->num_duplicate_faults = 0;
|
||||
batch_context->num_replays = 0;
|
||||
batch_context->has_fatal_faults = false;
|
||||
batch_context->fatal_va_space = NULL;
|
||||
batch_context->has_throttled_faults = false;
|
||||
|
||||
status = fetch_fault_buffer_entries(gpu, batch_context, FAULT_FETCH_MODE_BATCH_READY);
|
||||
@@ -2702,9 +2809,6 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
|
||||
// was flushed
|
||||
num_replays += batch_context->num_replays;
|
||||
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
|
||||
continue;
|
||||
|
||||
enable_disable_prefetch_faults(gpu->parent, batch_context);
|
||||
|
||||
if (status != NV_OK) {
|
||||
@@ -2718,10 +2822,17 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
|
||||
break;
|
||||
}
|
||||
|
||||
if (batch_context->has_fatal_faults) {
|
||||
if (batch_context->fatal_va_space) {
|
||||
status = uvm_tracker_wait(&batch_context->tracker);
|
||||
if (status == NV_OK)
|
||||
if (status == NV_OK) {
|
||||
status = cancel_faults_precise(gpu, batch_context);
|
||||
if (status == NV_OK) {
|
||||
// Cancel handling should've issued at least one replay
|
||||
UVM_ASSERT(batch_context->num_replays > 0);
|
||||
++num_batches;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -794,7 +794,7 @@ uvm_membar_t uvm_hal_downgrade_membar_type(uvm_gpu_t *gpu, bool is_local_vidmem)
|
||||
// memory, including those from other processors like the CPU or peer GPUs,
|
||||
// must come through this GPU's L2. In all current architectures, MEMBAR_GPU
|
||||
// is sufficient to resolve ordering at the L2 level.
|
||||
if (is_local_vidmem && !uvm_gpu_is_coherent(gpu->parent) && !uvm_downgrade_force_membar_sys)
|
||||
if (is_local_vidmem && !uvm_parent_gpu_is_coherent(gpu->parent) && !uvm_downgrade_force_membar_sys)
|
||||
return UVM_MEMBAR_GPU;
|
||||
|
||||
// If the mapped memory was remote, or if a coherence protocol can cache
|
||||
|
||||
@@ -60,6 +60,8 @@ module_param(uvm_disable_hmm, bool, 0444);
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_pmm_gpu.h"
|
||||
#include "uvm_hal_types.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_va_block_types.h"
|
||||
#include "uvm_va_space_mm.h"
|
||||
#include "uvm_va_space.h"
|
||||
@@ -110,20 +112,7 @@ typedef struct
|
||||
|
||||
bool uvm_hmm_is_enabled_system_wide(void)
|
||||
{
|
||||
if (uvm_disable_hmm)
|
||||
return false;
|
||||
|
||||
if (g_uvm_global.ats.enabled)
|
||||
return false;
|
||||
|
||||
// Confidential Computing and HMM impose mutually exclusive constraints. In
|
||||
// Confidential Computing the GPU can only access pages resident in vidmem,
|
||||
// but in HMM pages may be required to be resident in sysmem: file backed
|
||||
// VMAs, huge pages, etc.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return false;
|
||||
|
||||
return uvm_va_space_mm_enabled_system();
|
||||
return !uvm_disable_hmm && !g_uvm_global.ats.enabled && uvm_va_space_mm_enabled_system();
|
||||
}
|
||||
|
||||
bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
|
||||
@@ -140,6 +129,100 @@ static uvm_va_block_t *hmm_va_block_from_node(uvm_range_tree_node_t *node)
|
||||
return container_of(node, uvm_va_block_t, hmm.node);
|
||||
}
|
||||
|
||||
// Copies the contents of the source device-private page to the
|
||||
// destination CPU page. This will invalidate mappings, so cannot be
|
||||
// called while holding any va_block locks.
|
||||
static NV_STATUS uvm_hmm_copy_devmem_page(struct page *dst_page, struct page *src_page, uvm_tracker_t *tracker)
|
||||
{
|
||||
uvm_gpu_phys_address_t src_addr;
|
||||
uvm_gpu_phys_address_t dst_addr;
|
||||
uvm_gpu_chunk_t *gpu_chunk;
|
||||
NvU64 dma_addr;
|
||||
uvm_push_t push;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
// Holding a reference on the device-private page ensures the gpu
|
||||
// is already retained. This is because when a GPU is unregistered
|
||||
// all device-private pages are migrated back to the CPU and freed
|
||||
// before releasing the GPU. Therefore if we could get a reference
|
||||
// to the page the GPU must be retained.
|
||||
UVM_ASSERT(is_device_private_page(src_page) && page_count(src_page));
|
||||
gpu_chunk = uvm_pmm_devmem_page_to_chunk(src_page);
|
||||
gpu = uvm_gpu_chunk_get_gpu(gpu_chunk);
|
||||
status = uvm_mmu_chunk_map(gpu_chunk);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = uvm_gpu_map_cpu_pages(gpu->parent, dst_page, PAGE_SIZE, &dma_addr);
|
||||
if (status != NV_OK)
|
||||
goto out_unmap_gpu;
|
||||
|
||||
dst_addr = uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_addr);
|
||||
src_addr = uvm_gpu_phys_address(UVM_APERTURE_VID, gpu_chunk->address);
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_GPU_TO_CPU,
|
||||
tracker,
|
||||
&push,
|
||||
"Copy for remote process fault");
|
||||
if (status != NV_OK)
|
||||
goto out_unmap_cpu;
|
||||
|
||||
gpu->parent->ce_hal->memcopy(&push,
|
||||
uvm_gpu_address_copy(gpu, dst_addr),
|
||||
uvm_gpu_address_copy(gpu, src_addr),
|
||||
PAGE_SIZE);
|
||||
uvm_push_end(&push);
|
||||
status = uvm_tracker_add_push_safe(tracker, &push);
|
||||
|
||||
out_unmap_cpu:
|
||||
uvm_gpu_unmap_cpu_pages(gpu->parent, dma_addr, PAGE_SIZE);
|
||||
|
||||
out_unmap_gpu:
|
||||
uvm_mmu_chunk_unmap(gpu_chunk, NULL);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
|
||||
{
|
||||
unsigned long src_pfn = 0;
|
||||
unsigned long dst_pfn = 0;
|
||||
struct page *dst_page;
|
||||
NV_STATUS status = NV_OK;
|
||||
int ret;
|
||||
|
||||
ret = migrate_device_range(&src_pfn, pfn, 1);
|
||||
if (ret)
|
||||
return errno_to_nv_status(ret);
|
||||
|
||||
if (src_pfn & MIGRATE_PFN_MIGRATE) {
|
||||
uvm_tracker_t tracker = UVM_TRACKER_INIT();
|
||||
|
||||
dst_page = alloc_page(GFP_HIGHUSER_MOVABLE);
|
||||
if (!dst_page) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
lock_page(dst_page);
|
||||
if (WARN_ON(uvm_hmm_copy_devmem_page(dst_page, migrate_pfn_to_page(src_pfn), &tracker) != NV_OK))
|
||||
memzero_page(dst_page, 0, PAGE_SIZE);
|
||||
|
||||
dst_pfn = migrate_pfn(page_to_pfn(dst_page));
|
||||
migrate_device_pages(&src_pfn, &dst_pfn, 1);
|
||||
uvm_tracker_wait_deinit(&tracker);
|
||||
}
|
||||
|
||||
out:
|
||||
migrate_device_finalize(&src_pfn, &dst_pfn, 1);
|
||||
|
||||
if (!(src_pfn & MIGRATE_PFN_MIGRATE))
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_hmm_va_space_t *hmm_va_space = &va_space->hmm;
|
||||
@@ -199,6 +282,9 @@ void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_
|
||||
{
|
||||
uvm_range_tree_node_t *node;
|
||||
uvm_va_block_t *va_block;
|
||||
struct range range = gpu->pmm.devmem.pagemap.range;
|
||||
unsigned long pfn;
|
||||
bool retry;
|
||||
|
||||
if (!uvm_hmm_is_enabled(va_space))
|
||||
return;
|
||||
@@ -207,6 +293,29 @@ void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
// There could be pages with page->zone_device_data pointing to the va_space
|
||||
// which may be about to be freed. Migrate those back to the CPU so we don't
|
||||
// fault on them. Normally infinite retries are bad, but we don't have any
|
||||
// option here. Device-private pages can't be pinned so migration should
|
||||
// eventually succeed. Even if we did eventually bail out of the loop we'd
|
||||
// just stall in memunmap_pages() anyway.
|
||||
do {
|
||||
retry = false;
|
||||
|
||||
for (pfn = __phys_to_pfn(range.start); pfn <= __phys_to_pfn(range.end); pfn++) {
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
UVM_ASSERT(is_device_private_page(page));
|
||||
|
||||
// This check is racy because nothing stops the page being freed and
|
||||
// even reused. That doesn't matter though - worst case the
|
||||
// migration fails, we retry and find the va_space doesn't match.
|
||||
if (page->zone_device_data == va_space)
|
||||
if (uvm_hmm_pmm_gpu_evict_pfn(pfn) != NV_OK)
|
||||
retry = true;
|
||||
}
|
||||
} while (retry);
|
||||
|
||||
uvm_range_tree_for_each(node, &va_space->hmm.blocks) {
|
||||
va_block = hmm_va_block_from_node(node);
|
||||
|
||||
@@ -568,7 +677,7 @@ bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
void uvm_hmm_service_context_init(uvm_service_block_context_t *service_context)
|
||||
{
|
||||
// TODO: Bug 4050579: Remove this when swap cached pages can be migrated.
|
||||
service_context->block_context.hmm.swap_cached = false;
|
||||
service_context->block_context->hmm.swap_cached = false;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_migrate_begin(uvm_va_block_t *va_block)
|
||||
@@ -631,47 +740,6 @@ static NV_STATUS hmm_migrate_range(uvm_va_block_t *va_block,
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_hmm_evict_va_blocks(uvm_va_space_t *va_space)
|
||||
{
|
||||
// We can't use uvm_va_space_mm_retain(), because the va_space_mm
|
||||
// should already be dead by now.
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
uvm_hmm_va_space_t *hmm_va_space = &va_space->hmm;
|
||||
uvm_range_tree_node_t *node, *next;
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_va_block_context_t *block_context;
|
||||
|
||||
uvm_down_read_mmap_lock(mm);
|
||||
uvm_va_space_down_write(va_space);
|
||||
|
||||
uvm_range_tree_for_each_safe(node, next, &hmm_va_space->blocks) {
|
||||
uvm_va_block_region_t region;
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
va_block = hmm_va_block_from_node(node);
|
||||
block_context = uvm_va_space_block_context(va_space, mm);
|
||||
uvm_hmm_migrate_begin_wait(va_block);
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
for_each_va_block_vma_region(va_block, mm, vma, ®ion) {
|
||||
if (!uvm_hmm_vma_is_valid(vma, vma->vm_start, false))
|
||||
continue;
|
||||
|
||||
block_context->hmm.vma = vma;
|
||||
uvm_hmm_va_block_migrate_locked(va_block,
|
||||
NULL,
|
||||
block_context,
|
||||
UVM_ID_CPU,
|
||||
region,
|
||||
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
|
||||
}
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
uvm_hmm_migrate_finish(va_block);
|
||||
}
|
||||
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_up_read_mmap_lock(mm);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr)
|
||||
{
|
||||
uvm_va_block_test_t *block_test;
|
||||
@@ -1476,40 +1544,59 @@ static NV_STATUS hmm_va_block_cpu_page_populate(uvm_va_block_t *va_block,
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, page_index);
|
||||
status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk, page_index);
|
||||
if (status != NV_OK) {
|
||||
uvm_cpu_chunk_remove_from_block(va_block, page_index);
|
||||
uvm_cpu_chunk_remove_from_block(va_block, page_to_nid(page), page_index);
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void hmm_va_block_cpu_page_unpopulate(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index)
|
||||
static void hmm_va_block_cpu_unpopulate_chunk(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
int chunk_nid,
|
||||
uvm_page_index_t page_index)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(va_block, page_index);
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
|
||||
|
||||
if (!chunk)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(!uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) ||
|
||||
!uvm_page_mask_test(&va_block->cpu.resident, page_index));
|
||||
!uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index));
|
||||
UVM_ASSERT(uvm_cpu_chunk_get_size(chunk) == PAGE_SIZE);
|
||||
|
||||
uvm_cpu_chunk_remove_from_block(va_block, page_index);
|
||||
uvm_cpu_chunk_remove_from_block(va_block, chunk_nid, page_index);
|
||||
uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk, page_index);
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
}
|
||||
|
||||
static void hmm_va_block_cpu_page_unpopulate(uvm_va_block_t *va_block, uvm_page_index_t page_index, struct page *page)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
|
||||
|
||||
if (page) {
|
||||
chunk = uvm_cpu_chunk_get_chunk_for_page(va_block, page_to_nid(page), page_index);
|
||||
hmm_va_block_cpu_unpopulate_chunk(va_block, chunk, page_to_nid(page), page_index);
|
||||
}
|
||||
else {
|
||||
int nid;
|
||||
|
||||
for_each_possible_uvm_node(nid) {
|
||||
chunk = uvm_cpu_chunk_get_chunk_for_page(va_block, nid, page_index);
|
||||
hmm_va_block_cpu_unpopulate_chunk(va_block, chunk, nid, page_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool hmm_va_block_cpu_page_is_same(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
struct page *page)
|
||||
{
|
||||
struct page *old_page = uvm_cpu_chunk_get_cpu_page(va_block, page_index);
|
||||
struct page *old_page = uvm_va_block_get_cpu_page(va_block, page_index);
|
||||
|
||||
UVM_ASSERT(uvm_cpu_chunk_is_hmm(uvm_cpu_chunk_get_chunk_for_page(va_block, page_index)));
|
||||
UVM_ASSERT(uvm_cpu_chunk_is_hmm(uvm_cpu_chunk_get_chunk_for_page(va_block, page_to_nid(page), page_index)));
|
||||
return old_page == page;
|
||||
}
|
||||
|
||||
@@ -1522,7 +1609,7 @@ static void clear_service_context_masks(uvm_service_block_context_t *service_con
|
||||
uvm_processor_id_t new_residency,
|
||||
uvm_page_index_t page_index)
|
||||
{
|
||||
uvm_page_mask_clear(&service_context->block_context.caller_page_mask, page_index);
|
||||
uvm_page_mask_clear(&service_context->block_context->caller_page_mask, page_index);
|
||||
|
||||
uvm_page_mask_clear(&service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency,
|
||||
page_index);
|
||||
@@ -1549,7 +1636,6 @@ static void cpu_mapping_set(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index)
|
||||
{
|
||||
uvm_processor_mask_set(&va_block->mapped, UVM_ID_CPU);
|
||||
uvm_page_mask_set(&va_block->maybe_mapped_pages, page_index);
|
||||
uvm_page_mask_set(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_READ], page_index);
|
||||
if (is_write)
|
||||
uvm_page_mask_set(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_WRITE], page_index);
|
||||
@@ -1699,7 +1785,7 @@ static NV_STATUS sync_page_and_chunk_state(uvm_va_block_t *va_block,
|
||||
// migrate_vma_finalize() will release the reference so we should
|
||||
// clear our pointer to it.
|
||||
// TODO: Bug 3660922: Need to handle read duplication at some point.
|
||||
hmm_va_block_cpu_page_unpopulate(va_block, page_index);
|
||||
hmm_va_block_cpu_page_unpopulate(va_block, page_index, page);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1725,7 +1811,7 @@ static void clean_up_non_migrating_page(uvm_va_block_t *va_block,
|
||||
else {
|
||||
UVM_ASSERT(page_ref_count(dst_page) == 1);
|
||||
|
||||
hmm_va_block_cpu_page_unpopulate(va_block, page_index);
|
||||
hmm_va_block_cpu_page_unpopulate(va_block, page_index, dst_page);
|
||||
}
|
||||
|
||||
unlock_page(dst_page);
|
||||
@@ -1760,7 +1846,7 @@ static void lock_block_cpu_page(uvm_va_block_t *va_block,
|
||||
unsigned long *dst_pfns,
|
||||
uvm_page_mask_t *same_devmem_page_mask)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(va_block, page_index);
|
||||
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(va_block, page_to_nid(src_page), page_index);
|
||||
uvm_va_block_region_t chunk_region;
|
||||
struct page *dst_page;
|
||||
|
||||
@@ -1786,7 +1872,7 @@ static void lock_block_cpu_page(uvm_va_block_t *va_block,
|
||||
// hmm_va_block_cpu_page_unpopulate() or block_kill(). If the page
|
||||
// does not migrate, it will be freed though.
|
||||
UVM_ASSERT(!uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) ||
|
||||
!uvm_page_mask_test(&va_block->cpu.resident, page_index));
|
||||
!uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index));
|
||||
UVM_ASSERT(chunk->type == UVM_CPU_CHUNK_TYPE_PHYSICAL);
|
||||
UVM_ASSERT(page_ref_count(dst_page) == 1);
|
||||
uvm_cpu_chunk_make_hmm(chunk);
|
||||
@@ -1934,7 +2020,7 @@ static NV_STATUS alloc_and_copy_to_cpu(uvm_va_block_t *va_block,
|
||||
}
|
||||
|
||||
UVM_ASSERT(!uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) ||
|
||||
!uvm_page_mask_test(&va_block->cpu.resident, page_index));
|
||||
!uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index));
|
||||
|
||||
// Allocate a user system memory page for the destination.
|
||||
// This is the typical case since Linux will free the source page when
|
||||
@@ -2012,8 +2098,8 @@ static NV_STATUS uvm_hmm_devmem_fault_alloc_and_copy(uvm_hmm_devmem_fault_contex
|
||||
service_context = devmem_fault_context->service_context;
|
||||
va_block_retry = devmem_fault_context->va_block_retry;
|
||||
va_block = devmem_fault_context->va_block;
|
||||
src_pfns = service_context->block_context.hmm.src_pfns;
|
||||
dst_pfns = service_context->block_context.hmm.dst_pfns;
|
||||
src_pfns = service_context->block_context->hmm.src_pfns;
|
||||
dst_pfns = service_context->block_context->hmm.dst_pfns;
|
||||
|
||||
// Build the migration page mask.
|
||||
// Note that thrashing pinned pages and prefetch pages are already
|
||||
@@ -2022,7 +2108,7 @@ static NV_STATUS uvm_hmm_devmem_fault_alloc_and_copy(uvm_hmm_devmem_fault_contex
|
||||
uvm_page_mask_copy(page_mask, &service_context->per_processor_masks[UVM_ID_CPU_VALUE].new_residency);
|
||||
|
||||
status = alloc_and_copy_to_cpu(va_block,
|
||||
service_context->block_context.hmm.vma,
|
||||
service_context->block_context->hmm.vma,
|
||||
src_pfns,
|
||||
dst_pfns,
|
||||
service_context->region,
|
||||
@@ -2057,8 +2143,8 @@ static NV_STATUS uvm_hmm_devmem_fault_finalize_and_map(uvm_hmm_devmem_fault_cont
|
||||
prefetch_hint = &service_context->prefetch_hint;
|
||||
va_block = devmem_fault_context->va_block;
|
||||
va_block_retry = devmem_fault_context->va_block_retry;
|
||||
src_pfns = service_context->block_context.hmm.src_pfns;
|
||||
dst_pfns = service_context->block_context.hmm.dst_pfns;
|
||||
src_pfns = service_context->block_context->hmm.src_pfns;
|
||||
dst_pfns = service_context->block_context->hmm.dst_pfns;
|
||||
region = service_context->region;
|
||||
|
||||
page_mask = &devmem_fault_context->page_mask;
|
||||
@@ -2165,8 +2251,7 @@ static NV_STATUS populate_region(uvm_va_block_t *va_block,
|
||||
|
||||
// Since we have a stable snapshot of the CPU pages, we can
|
||||
// update the residency and protection information.
|
||||
uvm_processor_mask_set(&va_block->resident, UVM_ID_CPU);
|
||||
uvm_page_mask_set(&va_block->cpu.resident, page_index);
|
||||
uvm_va_block_cpu_set_resident_page(va_block, page_to_nid(page), page_index);
|
||||
|
||||
cpu_mapping_set(va_block, pfns[page_index] & HMM_PFN_WRITE, page_index);
|
||||
}
|
||||
@@ -2253,7 +2338,7 @@ static void hmm_release_atomic_pages(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index;
|
||||
|
||||
for_each_va_block_page_in_region(page_index, region) {
|
||||
struct page *page = service_context->block_context.hmm.pages[page_index];
|
||||
struct page *page = service_context->block_context->hmm.pages[page_index];
|
||||
|
||||
if (!page)
|
||||
continue;
|
||||
@@ -2269,14 +2354,14 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
|
||||
uvm_service_block_context_t *service_context)
|
||||
{
|
||||
uvm_va_block_region_t region = service_context->region;
|
||||
struct page **pages = service_context->block_context.hmm.pages;
|
||||
struct page **pages = service_context->block_context->hmm.pages;
|
||||
int npages;
|
||||
uvm_page_index_t page_index;
|
||||
uvm_make_resident_cause_t cause;
|
||||
NV_STATUS status;
|
||||
|
||||
if (!uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) ||
|
||||
!uvm_page_mask_region_full(&va_block->cpu.resident, region)) {
|
||||
!uvm_va_block_cpu_is_region_resident_on(va_block, NUMA_NO_NODE, region)) {
|
||||
// There is an atomic GPU fault. We need to make sure no pages are
|
||||
// GPU resident so that make_device_exclusive_range() doesn't call
|
||||
// migrate_to_ram() and cause a va_space lock recursion problem.
|
||||
@@ -2289,7 +2374,7 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
|
||||
|
||||
status = uvm_hmm_va_block_migrate_locked(va_block,
|
||||
va_block_retry,
|
||||
&service_context->block_context,
|
||||
service_context->block_context,
|
||||
UVM_ID_CPU,
|
||||
region,
|
||||
cause);
|
||||
@@ -2299,7 +2384,7 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
|
||||
// make_device_exclusive_range() will try to call migrate_to_ram()
|
||||
// and deadlock with ourself if the data isn't CPU resident.
|
||||
if (!uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) ||
|
||||
!uvm_page_mask_region_full(&va_block->cpu.resident, region)) {
|
||||
!uvm_va_block_cpu_is_region_resident_on(va_block, NUMA_NO_NODE, region)) {
|
||||
status = NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
goto done;
|
||||
}
|
||||
@@ -2309,7 +2394,7 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
|
||||
// mmap() files so we check for that here and report a fatal fault.
|
||||
// Otherwise with the current Linux 6.1 make_device_exclusive_range(),
|
||||
// it doesn't make the page exclusive and we end up in an endless loop.
|
||||
if (service_context->block_context.hmm.vma->vm_flags & VM_SHARED) {
|
||||
if (service_context->block_context->hmm.vma->vm_flags & (VM_SHARED | VM_HUGETLB)) {
|
||||
status = NV_ERR_NOT_SUPPORTED;
|
||||
goto done;
|
||||
}
|
||||
@@ -2318,7 +2403,7 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
npages = make_device_exclusive_range(service_context->block_context.mm,
|
||||
npages = make_device_exclusive_range(service_context->block_context->mm,
|
||||
uvm_va_block_cpu_page_address(va_block, region.first),
|
||||
uvm_va_block_cpu_page_address(va_block, region.outer - 1) + PAGE_SIZE,
|
||||
pages + region.first,
|
||||
@@ -2356,15 +2441,13 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
|
||||
if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) {
|
||||
UVM_ASSERT(hmm_va_block_cpu_page_is_same(va_block, page_index, page));
|
||||
UVM_ASSERT(uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU));
|
||||
UVM_ASSERT(uvm_page_mask_test(&va_block->cpu.resident, page_index));
|
||||
UVM_ASSERT(uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index));
|
||||
}
|
||||
else {
|
||||
NV_STATUS s = hmm_va_block_cpu_page_populate(va_block, page_index, page);
|
||||
|
||||
if (s == NV_OK) {
|
||||
uvm_processor_mask_set(&va_block->resident, UVM_ID_CPU);
|
||||
uvm_page_mask_set(&va_block->cpu.resident, page_index);
|
||||
}
|
||||
if (s == NV_OK)
|
||||
uvm_va_block_cpu_set_resident_page(va_block, page_to_nid(page), page_index);
|
||||
}
|
||||
|
||||
cpu_mapping_clear(va_block, page_index);
|
||||
@@ -2419,7 +2502,7 @@ static NV_STATUS hmm_block_cpu_fault_locked(uvm_processor_id_t processor_id,
|
||||
uvm_service_block_context_t *service_context)
|
||||
{
|
||||
uvm_va_block_region_t region = service_context->region;
|
||||
struct migrate_vma *args = &service_context->block_context.hmm.migrate_vma_args;
|
||||
struct migrate_vma *args = &service_context->block_context->hmm.migrate_vma_args;
|
||||
NV_STATUS status;
|
||||
int ret;
|
||||
uvm_hmm_devmem_fault_context_t fault_context = {
|
||||
@@ -2453,8 +2536,8 @@ static NV_STATUS hmm_block_cpu_fault_locked(uvm_processor_id_t processor_id,
|
||||
}
|
||||
|
||||
status = hmm_make_resident_cpu(va_block,
|
||||
service_context->block_context.hmm.vma,
|
||||
service_context->block_context.hmm.src_pfns,
|
||||
service_context->block_context->hmm.vma,
|
||||
service_context->block_context->hmm.src_pfns,
|
||||
region,
|
||||
service_context->access_type,
|
||||
&fault_context.same_devmem_page_mask);
|
||||
@@ -2476,9 +2559,9 @@ static NV_STATUS hmm_block_cpu_fault_locked(uvm_processor_id_t processor_id,
|
||||
}
|
||||
}
|
||||
|
||||
args->vma = service_context->block_context.hmm.vma;
|
||||
args->src = service_context->block_context.hmm.src_pfns + region.first;
|
||||
args->dst = service_context->block_context.hmm.dst_pfns + region.first;
|
||||
args->vma = service_context->block_context->hmm.vma;
|
||||
args->src = service_context->block_context->hmm.src_pfns + region.first;
|
||||
args->dst = service_context->block_context->hmm.dst_pfns + region.first;
|
||||
args->start = uvm_va_block_region_start(va_block, region);
|
||||
args->end = uvm_va_block_region_end(va_block, region) + 1;
|
||||
args->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
|
||||
@@ -2558,7 +2641,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
// TODO: Bug 4050579: Remove this when swap cached pages can be
|
||||
// migrated.
|
||||
if (service_context) {
|
||||
service_context->block_context.hmm.swap_cached = true;
|
||||
service_context->block_context->hmm.swap_cached = true;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -2574,7 +2657,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) {
|
||||
UVM_ASSERT(hmm_va_block_cpu_page_is_same(va_block, page_index, src_page));
|
||||
UVM_ASSERT(uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU));
|
||||
UVM_ASSERT(uvm_page_mask_test(&va_block->cpu.resident, page_index));
|
||||
UVM_ASSERT(uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index));
|
||||
}
|
||||
else {
|
||||
status = hmm_va_block_cpu_page_populate(va_block, page_index, src_page);
|
||||
@@ -2588,8 +2671,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
|
||||
// migrate_vma_setup() was able to isolate and lock the page;
|
||||
// therefore, it is CPU resident and not mapped.
|
||||
uvm_processor_mask_set(&va_block->resident, UVM_ID_CPU);
|
||||
uvm_page_mask_set(&va_block->cpu.resident, page_index);
|
||||
uvm_va_block_cpu_set_resident_page(va_block, page_to_nid(src_page), page_index);
|
||||
}
|
||||
|
||||
// The call to migrate_vma_setup() will have inserted a migration
|
||||
@@ -2604,7 +2686,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) {
|
||||
UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block, page_index));
|
||||
|
||||
hmm_va_block_cpu_page_unpopulate(va_block, page_index);
|
||||
hmm_va_block_cpu_page_unpopulate(va_block, page_index, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2618,7 +2700,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
}
|
||||
|
||||
if (uvm_page_mask_empty(page_mask) ||
|
||||
(service_context && service_context->block_context.hmm.swap_cached))
|
||||
(service_context && service_context->block_context->hmm.swap_cached))
|
||||
status = NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
|
||||
if (status != NV_OK)
|
||||
@@ -2649,8 +2731,8 @@ static NV_STATUS uvm_hmm_gpu_fault_alloc_and_copy(struct vm_area_struct *vma,
|
||||
service_context = uvm_hmm_gpu_fault_event->service_context;
|
||||
region = service_context->region;
|
||||
prefetch_hint = &service_context->prefetch_hint;
|
||||
src_pfns = service_context->block_context.hmm.src_pfns;
|
||||
dst_pfns = service_context->block_context.hmm.dst_pfns;
|
||||
src_pfns = service_context->block_context->hmm.src_pfns;
|
||||
dst_pfns = service_context->block_context->hmm.dst_pfns;
|
||||
|
||||
// Build the migration mask.
|
||||
// Note that thrashing pinned pages are already accounted for in
|
||||
@@ -2708,8 +2790,8 @@ static NV_STATUS uvm_hmm_gpu_fault_finalize_and_map(uvm_hmm_gpu_fault_event_t *u
|
||||
va_block = uvm_hmm_gpu_fault_event->va_block;
|
||||
va_block_retry = uvm_hmm_gpu_fault_event->va_block_retry;
|
||||
service_context = uvm_hmm_gpu_fault_event->service_context;
|
||||
src_pfns = service_context->block_context.hmm.src_pfns;
|
||||
dst_pfns = service_context->block_context.hmm.dst_pfns;
|
||||
src_pfns = service_context->block_context->hmm.src_pfns;
|
||||
dst_pfns = service_context->block_context->hmm.dst_pfns;
|
||||
region = service_context->region;
|
||||
page_mask = &uvm_hmm_gpu_fault_event->page_mask;
|
||||
|
||||
@@ -2752,11 +2834,11 @@ NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_service_block_context_t *service_context)
|
||||
{
|
||||
struct mm_struct *mm = service_context->block_context.mm;
|
||||
struct vm_area_struct *vma = service_context->block_context.hmm.vma;
|
||||
struct mm_struct *mm = service_context->block_context->mm;
|
||||
struct vm_area_struct *vma = service_context->block_context->hmm.vma;
|
||||
uvm_va_block_region_t region = service_context->region;
|
||||
uvm_hmm_gpu_fault_event_t uvm_hmm_gpu_fault_event;
|
||||
struct migrate_vma *args = &service_context->block_context.hmm.migrate_vma_args;
|
||||
struct migrate_vma *args = &service_context->block_context->hmm.migrate_vma_args;
|
||||
int ret;
|
||||
NV_STATUS status = NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
@@ -2780,8 +2862,8 @@ NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
uvm_hmm_gpu_fault_event.service_context = service_context;
|
||||
|
||||
args->vma = vma;
|
||||
args->src = service_context->block_context.hmm.src_pfns + region.first;
|
||||
args->dst = service_context->block_context.hmm.dst_pfns + region.first;
|
||||
args->src = service_context->block_context->hmm.src_pfns + region.first;
|
||||
args->dst = service_context->block_context->hmm.dst_pfns + region.first;
|
||||
args->start = uvm_va_block_region_start(va_block, region);
|
||||
args->end = uvm_va_block_region_end(va_block, region) + 1;
|
||||
args->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | MIGRATE_VMA_SELECT_SYSTEM;
|
||||
@@ -2815,8 +2897,8 @@ NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
// since migrate_vma_setup() would have reported that information.
|
||||
// Try to make it resident in system memory and retry the migration.
|
||||
status = hmm_make_resident_cpu(va_block,
|
||||
service_context->block_context.hmm.vma,
|
||||
service_context->block_context.hmm.src_pfns,
|
||||
service_context->block_context->hmm.vma,
|
||||
service_context->block_context->hmm.src_pfns,
|
||||
region,
|
||||
service_context->access_type,
|
||||
NULL);
|
||||
@@ -2962,16 +3044,6 @@ static NV_STATUS uvm_hmm_migrate_finalize(uvm_hmm_migrate_event_t *uvm_hmm_migra
|
||||
&uvm_hmm_migrate_event->same_devmem_page_mask);
|
||||
}
|
||||
|
||||
static bool is_resident(uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
if (!uvm_processor_mask_test(&va_block->resident, dest_id))
|
||||
return false;
|
||||
|
||||
return uvm_page_mask_region_full(uvm_va_block_resident_mask_get(va_block, dest_id), region);
|
||||
}
|
||||
|
||||
// Note that migrate_vma_*() doesn't handle asynchronous migrations so the
|
||||
// migration flag UVM_MIGRATE_FLAG_SKIP_CPU_MAP doesn't have an effect.
|
||||
// TODO: Bug 3900785: investigate ways to implement async migration.
|
||||
@@ -3063,9 +3135,7 @@ NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_page_mask_init_from_region(page_mask, region, NULL);
|
||||
|
||||
for_each_id_in_mask(id, &va_block->resident) {
|
||||
if (!uvm_page_mask_andnot(page_mask,
|
||||
page_mask,
|
||||
uvm_va_block_resident_mask_get(va_block, id)))
|
||||
if (!uvm_page_mask_andnot(page_mask, page_mask, uvm_va_block_resident_mask_get(va_block, id, NUMA_NO_NODE)))
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -3193,6 +3263,7 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
uvm_page_mask_t *page_mask = &uvm_hmm_migrate_event.page_mask;
|
||||
const uvm_va_policy_t *policy;
|
||||
uvm_va_policy_node_t *node;
|
||||
uvm_page_mask_t *cpu_resident_mask = uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE);
|
||||
unsigned long npages;
|
||||
NV_STATUS status;
|
||||
|
||||
@@ -3215,7 +3286,7 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
// Pages resident on the GPU should not have a resident page in system
|
||||
// memory.
|
||||
// TODO: Bug 3660922: Need to handle read duplication at some point.
|
||||
UVM_ASSERT(uvm_page_mask_region_empty(&va_block->cpu.resident, region));
|
||||
UVM_ASSERT(uvm_page_mask_region_empty(cpu_resident_mask, region));
|
||||
|
||||
status = alloc_and_copy_to_cpu(va_block,
|
||||
NULL,
|
||||
@@ -3314,35 +3385,34 @@ NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||
NULL);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
|
||||
NV_STATUS uvm_hmm_remote_cpu_fault(struct vm_fault *vmf)
|
||||
{
|
||||
unsigned long src_pfn = 0;
|
||||
unsigned long dst_pfn = 0;
|
||||
struct page *dst_page;
|
||||
NV_STATUS status = NV_OK;
|
||||
unsigned long src_pfn;
|
||||
unsigned long dst_pfn;
|
||||
struct migrate_vma args;
|
||||
struct page *src_page = vmf->page;
|
||||
uvm_tracker_t tracker = UVM_TRACKER_INIT();
|
||||
int ret;
|
||||
|
||||
ret = migrate_device_range(&src_pfn, pfn, 1);
|
||||
if (ret)
|
||||
return errno_to_nv_status(ret);
|
||||
args.vma = vmf->vma;
|
||||
args.src = &src_pfn;
|
||||
args.dst = &dst_pfn;
|
||||
args.start = nv_page_fault_va(vmf);
|
||||
args.end = args.start + PAGE_SIZE;
|
||||
args.pgmap_owner = &g_uvm_global;
|
||||
args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
|
||||
args.fault_page = src_page;
|
||||
|
||||
// We don't call migrate_vma_setup_locked() here because we don't
|
||||
// have a va_block and don't want to ignore invalidations.
|
||||
ret = migrate_vma_setup(&args);
|
||||
UVM_ASSERT(!ret);
|
||||
|
||||
if (src_pfn & MIGRATE_PFN_MIGRATE) {
|
||||
// All the code for copying a vidmem page to sysmem relies on
|
||||
// having a va_block. However certain combinations of mremap()
|
||||
// and fork() can result in device-private pages being mapped
|
||||
// in a child process without a va_block.
|
||||
//
|
||||
// We don't expect the above to be a common occurance so for
|
||||
// now we allocate a fresh zero page when evicting without a
|
||||
// va_block. However this results in child processes losing
|
||||
// data so make sure we warn about it. Ideally we would just
|
||||
// not migrate and SIGBUS the child if it tries to access the
|
||||
// page. However that would prevent unloading of the driver so
|
||||
// we're stuck with this until we fix the problem.
|
||||
// TODO: Bug 3902536: add code to migrate GPU memory without having a
|
||||
// va_block.
|
||||
WARN_ON(1);
|
||||
dst_page = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_ZERO);
|
||||
struct page *dst_page;
|
||||
|
||||
dst_page = alloc_page(GFP_HIGHUSER_MOVABLE);
|
||||
if (!dst_page) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
@@ -3351,11 +3421,15 @@ NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
|
||||
lock_page(dst_page);
|
||||
dst_pfn = migrate_pfn(page_to_pfn(dst_page));
|
||||
|
||||
migrate_device_pages(&src_pfn, &dst_pfn, 1);
|
||||
status = uvm_hmm_copy_devmem_page(dst_page, src_page, &tracker);
|
||||
if (status == NV_OK)
|
||||
status = uvm_tracker_wait_deinit(&tracker);
|
||||
}
|
||||
|
||||
migrate_vma_pages(&args);
|
||||
|
||||
out:
|
||||
migrate_device_finalize(&src_pfn, &dst_pfn, 1);
|
||||
migrate_vma_finalize(&args);
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -3606,4 +3680,3 @@ bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
|
||||
}
|
||||
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
||||
|
||||
@@ -307,10 +307,10 @@ typedef struct
|
||||
uvm_migrate_mode_t mode,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// Evicts all va_blocks in the va_space to the CPU. Unlike the
|
||||
// other va_block eviction functions this is based on virtual
|
||||
// address and therefore takes mmap_lock for read.
|
||||
void uvm_hmm_evict_va_blocks(uvm_va_space_t *va_space);
|
||||
// Handle a fault to a device-private page from a process other than the
|
||||
// process which created the va_space that originally allocated the
|
||||
// device-private page.
|
||||
NV_STATUS uvm_hmm_remote_cpu_fault(struct vm_fault *vmf);
|
||||
|
||||
// This sets the va_block_context->hmm.src_pfns[] to the ZONE_DEVICE private
|
||||
// PFN for the GPU chunk memory.
|
||||
@@ -343,14 +343,6 @@ typedef struct
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Migrate a GPU device-private page to system memory. This is
|
||||
// called to remove CPU page table references to device private
|
||||
// struct pages for the given GPU after all other references in
|
||||
// va_blocks have been released and the GPU is in the process of
|
||||
// being removed/torn down. Note that there is no mm, VMA,
|
||||
// va_block or any user channel activity on this GPU.
|
||||
NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn);
|
||||
|
||||
// This returns what would be the intersection of va_block start/end and
|
||||
// VMA start/end-1 for the given 'lookup_address' if
|
||||
// uvm_hmm_va_block_find_create() was called.
|
||||
@@ -592,8 +584,10 @@ typedef struct
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static void uvm_hmm_evict_va_blocks(uvm_va_space_t *va_space)
|
||||
static NV_STATUS uvm_hmm_remote_cpu_fault(struct vm_fault *vmf)
|
||||
{
|
||||
UVM_ASSERT(0);
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_evict_chunk_prep(uvm_va_block_t *va_block,
|
||||
@@ -622,11 +616,6 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_range_bounds(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 lookup_address,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
Copyright (c) 2020-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -59,12 +59,12 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
// Physical CE writes to vidmem are non-coherent with respect to the CPU on
|
||||
// GH180.
|
||||
parent_gpu->ce_phys_vidmem_write_supported = !uvm_gpu_is_coherent(parent_gpu);
|
||||
parent_gpu->ce_phys_vidmem_write_supported = !uvm_parent_gpu_is_coherent(parent_gpu);
|
||||
|
||||
// TODO: Bug 4174553: [HGX-SkinnyJoe][GH180] channel errors discussion/debug
|
||||
// portion for the uvm tests became nonresponsive after
|
||||
// some time and then failed even after reboot
|
||||
parent_gpu->peer_copy_mode = uvm_gpu_is_coherent(parent_gpu) ?
|
||||
parent_gpu->peer_copy_mode = uvm_parent_gpu_is_coherent(parent_gpu) ?
|
||||
UVM_GPU_PEER_COPY_MODE_VIRTUAL : g_uvm_global.peer_copy_mode;
|
||||
|
||||
// All GR context buffers may be mapped to 57b wide VAs. All "compute" units
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2023 NVIDIA Corporation
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -368,10 +368,7 @@ static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static void make_pde_hopper(void *entry,
|
||||
uvm_mmu_page_table_alloc_t **phys_allocs,
|
||||
NvU32 depth,
|
||||
uvm_page_directory_t *child_dir)
|
||||
static void make_pde_hopper(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
|
||||
{
|
||||
NvU32 entry_count = entries_per_index_hopper(depth);
|
||||
NvU64 *entry_bits = (NvU64 *)entry;
|
||||
|
||||
@@ -128,8 +128,9 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
// present if we see the callback.
|
||||
//
|
||||
// The callback was added in commit 0f0a327fa12cd55de5e7f8c05a70ac3d047f405e,
|
||||
// v3.19 (2014-11-13).
|
||||
#if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE)
|
||||
// v3.19 (2014-11-13) and renamed in commit 1af5a8109904.
|
||||
#if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE) || \
|
||||
defined(NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS)
|
||||
#define UVM_CAN_USE_MMU_NOTIFIERS() 1
|
||||
#else
|
||||
#define UVM_CAN_USE_MMU_NOTIFIERS() 0
|
||||
@@ -348,6 +349,47 @@ static inline NvU64 NV_GETTIME(void)
|
||||
(bit) = find_next_zero_bit((addr), (size), (bit) + 1))
|
||||
#endif
|
||||
|
||||
#if !defined(NV_FIND_NEXT_BIT_WRAP_PRESENT)
|
||||
static inline unsigned long find_next_bit_wrap(const unsigned long *addr, unsigned long size, unsigned long offset)
|
||||
{
|
||||
unsigned long bit = find_next_bit(addr, size, offset);
|
||||
|
||||
if (bit < size)
|
||||
return bit;
|
||||
|
||||
bit = find_first_bit(addr, offset);
|
||||
return bit < offset ? bit : size;
|
||||
}
|
||||
#endif
|
||||
|
||||
// for_each_set_bit_wrap and __for_each_wrap were introduced in v6.1-rc1
|
||||
// by commit 4fe49b3b97c2640147c46519c2a6fdb06df34f5f
|
||||
#if !defined(for_each_set_bit_wrap)
|
||||
static inline unsigned long __for_each_wrap(const unsigned long *bitmap,
|
||||
unsigned long size,
|
||||
unsigned long start,
|
||||
unsigned long n)
|
||||
{
|
||||
unsigned long bit;
|
||||
|
||||
if (n > start) {
|
||||
bit = find_next_bit(bitmap, size, n);
|
||||
if (bit < size)
|
||||
return bit;
|
||||
|
||||
n = 0;
|
||||
}
|
||||
|
||||
bit = find_next_bit(bitmap, start, n);
|
||||
return bit < start ? bit : size;
|
||||
}
|
||||
|
||||
#define for_each_set_bit_wrap(bit, addr, size, start) \
|
||||
for ((bit) = find_next_bit_wrap((addr), (size), (start)); \
|
||||
(bit) < (size); \
|
||||
(bit) = __for_each_wrap((addr), (size), (start), (bit) + 1))
|
||||
#endif
|
||||
|
||||
// Added in 2.6.24
|
||||
#ifndef ACCESS_ONCE
|
||||
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
|
||||
@@ -579,4 +621,5 @@ static inline pgprot_t uvm_pgprot_decrypted(pgprot_t prot)
|
||||
#include <asm/page.h>
|
||||
#define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x)))
|
||||
#endif
|
||||
|
||||
#endif // _UVM_LINUX_H
|
||||
|
||||
@@ -355,6 +355,7 @@ static uvm_membar_t va_range_downgrade_membar(uvm_va_range_t *va_range, uvm_ext_
|
||||
if (!ext_gpu_map->mem_handle)
|
||||
return UVM_MEMBAR_GPU;
|
||||
|
||||
// EGM uses the same barriers as sysmem.
|
||||
return uvm_hal_downgrade_membar_type(ext_gpu_map->gpu,
|
||||
!ext_gpu_map->is_sysmem && ext_gpu_map->gpu == ext_gpu_map->owning_gpu);
|
||||
}
|
||||
@@ -633,6 +634,8 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
const UvmGpuMemoryInfo *mem_info)
|
||||
{
|
||||
uvm_gpu_t *owning_gpu;
|
||||
if (mem_info->egm)
|
||||
UVM_ASSERT(mem_info->sysmem);
|
||||
|
||||
if (!mem_info->deviceDescendant && !mem_info->sysmem) {
|
||||
ext_gpu_map->owning_gpu = NULL;
|
||||
@@ -641,6 +644,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
}
|
||||
// This is a local or peer allocation, so the owning GPU must have been
|
||||
// registered.
|
||||
// This also checks for if EGM owning GPU is registered.
|
||||
owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
|
||||
if (!owning_gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
@@ -651,13 +655,10 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
// crashes when it's eventually freed.
|
||||
// TODO: Bug 1811006: Bug tracking the RM issue, its fix might change the
|
||||
// semantics of sysmem allocations.
|
||||
if (mem_info->sysmem) {
|
||||
ext_gpu_map->owning_gpu = owning_gpu;
|
||||
ext_gpu_map->is_sysmem = true;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
if (owning_gpu != mapping_gpu) {
|
||||
// Check if peer access for peer memory is enabled.
|
||||
// This path also handles EGM allocations.
|
||||
if (owning_gpu != mapping_gpu && (!mem_info->sysmem || mem_info->egm)) {
|
||||
// TODO: Bug 1757136: In SLI, the returned UUID may be different but a
|
||||
// local mapping must be used. We need to query SLI groups to know
|
||||
// that.
|
||||
@@ -666,7 +667,9 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
}
|
||||
|
||||
ext_gpu_map->owning_gpu = owning_gpu;
|
||||
ext_gpu_map->is_sysmem = false;
|
||||
ext_gpu_map->is_sysmem = mem_info->sysmem;
|
||||
ext_gpu_map->is_egm = mem_info->egm;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@@ -719,6 +722,7 @@ static NV_STATUS uvm_ext_gpu_map_split(uvm_range_tree_t *tree,
|
||||
new->gpu = existing_map->gpu;
|
||||
new->owning_gpu = existing_map->owning_gpu;
|
||||
new->is_sysmem = existing_map->is_sysmem;
|
||||
new->is_egm = existing_map->is_egm;
|
||||
|
||||
// Initialize the new ext_gpu_map tracker as a copy of the existing_map tracker.
|
||||
// This way, any operations on any of the two ext_gpu_maps will be able to
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -106,10 +106,7 @@ static NvU64 small_half_pde_maxwell(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static void make_pde_maxwell(void *entry,
|
||||
uvm_mmu_page_table_alloc_t **phys_allocs,
|
||||
NvU32 depth,
|
||||
uvm_page_directory_t *child_dir)
|
||||
static void make_pde_maxwell(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
|
||||
{
|
||||
NvU64 pde_bits = 0;
|
||||
UVM_ASSERT(depth == 0);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -93,9 +93,8 @@ static bool sysmem_can_be_mapped_on_gpu(uvm_mem_t *sysmem)
|
||||
{
|
||||
UVM_ASSERT(uvm_mem_is_sysmem(sysmem));
|
||||
|
||||
// In Confidential Computing, only unprotected memory can be mapped on the
|
||||
// GPU
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
// If SEV is enabled, only unprotected memory can be mapped
|
||||
if (g_uvm_global.sev_enabled)
|
||||
return uvm_mem_is_sysmem_dma(sysmem);
|
||||
|
||||
return true;
|
||||
@@ -738,7 +737,7 @@ static NV_STATUS mem_map_cpu_to_sysmem_kernel(uvm_mem_t *mem)
|
||||
pages[page_index] = mem_cpu_page(mem, page_index * PAGE_SIZE);
|
||||
}
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled && uvm_mem_is_sysmem_dma(mem))
|
||||
if (g_uvm_global.sev_enabled && uvm_mem_is_sysmem_dma(mem))
|
||||
prot = uvm_pgprot_decrypted(PAGE_KERNEL_NOENC);
|
||||
|
||||
mem->kernel.cpu_addr = vmap(pages, num_pages, VM_MAP, prot);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -44,10 +44,10 @@ static NvU32 first_page_size(NvU32 page_sizes)
|
||||
|
||||
static inline NV_STATUS __alloc_map_sysmem(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **sys_mem)
|
||||
{
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
if (g_uvm_global.sev_enabled)
|
||||
return uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(size, gpu, current->mm, sys_mem);
|
||||
|
||||
return uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, sys_mem);
|
||||
else
|
||||
return uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, sys_mem);
|
||||
}
|
||||
|
||||
static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
|
||||
@@ -335,6 +335,9 @@ error:
|
||||
|
||||
static bool should_test_page_size(size_t alloc_size, NvU32 page_size)
|
||||
{
|
||||
if (g_uvm_global.sev_enabled)
|
||||
return false;
|
||||
|
||||
if (g_uvm_global.num_simulated_devices == 0)
|
||||
return true;
|
||||
|
||||
|
||||
@@ -130,9 +130,9 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS tracker_status;
|
||||
|
||||
// Save the mask of unmapped pages because it will change after the
|
||||
// Get the mask of unmapped pages because it will change after the
|
||||
// first map operation
|
||||
uvm_page_mask_complement(&va_block_context->caller_page_mask, &va_block->maybe_mapped_pages);
|
||||
uvm_va_block_unmapped_pages_get(va_block, region, &va_block_context->caller_page_mask);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block) && !UVM_ID_IS_CPU(dest_id)) {
|
||||
// Do not map pages that are already resident on the CPU. This is in
|
||||
@@ -147,7 +147,7 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
|
||||
// such pages at all, when migrating.
|
||||
uvm_page_mask_andnot(&va_block_context->caller_page_mask,
|
||||
&va_block_context->caller_page_mask,
|
||||
uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU));
|
||||
uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE));
|
||||
}
|
||||
|
||||
// Only map those pages that are not mapped anywhere else (likely due
|
||||
@@ -377,7 +377,7 @@ static bool va_block_should_do_cpu_preunmap(uvm_va_block_t *va_block,
|
||||
|
||||
mapped_pages_cpu = uvm_va_block_map_mask_get(va_block, UVM_ID_CPU);
|
||||
if (uvm_processor_mask_test(&va_block->resident, dest_id)) {
|
||||
const uvm_page_mask_t *resident_pages_dest = uvm_va_block_resident_mask_get(va_block, dest_id);
|
||||
const uvm_page_mask_t *resident_pages_dest = uvm_va_block_resident_mask_get(va_block, dest_id, NUMA_NO_NODE);
|
||||
uvm_page_mask_t *do_not_unmap_pages = &va_block_context->scratch_page_mask;
|
||||
|
||||
// TODO: Bug 1877578
|
||||
|
||||
@@ -672,14 +672,6 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
|
||||
.finalize_and_map = uvm_migrate_vma_finalize_and_map_helper,
|
||||
};
|
||||
|
||||
// WAR for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
|
||||
// invalidates on read-only to read-write upgrades
|
||||
//
|
||||
// This code path isn't used on GH180 but we need to maintain consistent
|
||||
// behaviour on systems that do.
|
||||
if (!vma_is_anonymous(args->vma))
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
ret = migrate_vma(&uvm_migrate_vma_ops, args->vma, args->start, args->end, args->src, args->dst, state);
|
||||
if (ret < 0)
|
||||
return errno_to_nv_status(ret);
|
||||
@@ -693,24 +685,6 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
|
||||
if (ret < 0)
|
||||
return errno_to_nv_status(ret);
|
||||
|
||||
// TODO: Bug 2419180: support file-backed pages in migrate_vma, when
|
||||
// support for it is added to the Linux kernel
|
||||
//
|
||||
// A side-effect of migrate_vma_setup() is it calls mmu notifiers even if a
|
||||
// page can't be migrated (eg. because it's a non-anonymous mapping). We
|
||||
// need this side-effect for SMMU on GH180 to ensure any cached read-only
|
||||
// entries are flushed from SMMU on permission upgrade.
|
||||
//
|
||||
// TODO: Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
|
||||
// invalidates on read-only to read-write upgrades
|
||||
//
|
||||
// The above WAR doesn't work for HugeTLBfs mappings because
|
||||
// migrate_vma_setup() will fail in that case.
|
||||
if (!vma_is_anonymous(args->vma)) {
|
||||
migrate_vma_finalize(args);
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
}
|
||||
|
||||
uvm_migrate_vma_alloc_and_copy(args, state);
|
||||
if (state->status == NV_OK) {
|
||||
migrate_vma_pages(args);
|
||||
@@ -884,13 +858,9 @@ static NV_STATUS migrate_pageable_vma(struct vm_area_struct *vma,
|
||||
start = max(start, vma->vm_start);
|
||||
outer = min(outer, vma->vm_end);
|
||||
|
||||
// migrate_vma only supports anonymous VMAs. We check for those after
|
||||
// calling migrate_vma_setup() to workaround Bug 4130089. We need to check
|
||||
// for HugeTLB VMAs here because migrate_vma_setup() will return a fatal
|
||||
// error for those.
|
||||
// TODO: Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
|
||||
// invalidates on read-only to read-write upgrades
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
// TODO: Bug 2419180: support file-backed pages in migrate_vma, when
|
||||
// support for it is added to the Linux kernel
|
||||
if (!vma_is_anonymous(vma))
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
if (uvm_processor_mask_empty(&va_space->registered_gpus))
|
||||
|
||||
@@ -51,7 +51,7 @@ typedef struct
|
||||
#if defined(CONFIG_MIGRATE_VMA_HELPER)
|
||||
#define UVM_MIGRATE_VMA_SUPPORTED 1
|
||||
#else
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_migrate_vma_setup
|
||||
#if defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MIGRATE_VMA_SETUP_PRESENT)
|
||||
#define UVM_MIGRATE_VMA_SUPPORTED 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -323,153 +323,37 @@ static void uvm_mmu_page_table_cpu_memset_16(uvm_gpu_t *gpu,
|
||||
uvm_mmu_page_table_cpu_unmap(gpu, phys_alloc);
|
||||
}
|
||||
|
||||
static void pde_fill_cpu(uvm_page_tree_t *tree,
|
||||
uvm_page_directory_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr)
|
||||
{
|
||||
NvU64 pde_data[2], entry_size;
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(uvm_mmu_use_cpu(tree));
|
||||
|
||||
entry_size = tree->hal->entry_size(directory->depth);
|
||||
UVM_ASSERT(sizeof(pde_data) >= entry_size);
|
||||
|
||||
for (i = 0; i < pde_count; i++) {
|
||||
tree->hal->make_pde(pde_data, phys_addr, directory->depth, directory->entries[start_index + i]);
|
||||
|
||||
if (entry_size == sizeof(pde_data[0]))
|
||||
uvm_mmu_page_table_cpu_memset_8(tree->gpu, &directory->phys_alloc, start_index + i, pde_data[0], 1);
|
||||
else
|
||||
uvm_mmu_page_table_cpu_memset_16(tree->gpu, &directory->phys_alloc, start_index + i, pde_data, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void pde_fill_gpu(uvm_page_tree_t *tree,
|
||||
uvm_page_directory_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
NvU64 pde_data[2], entry_size;
|
||||
uvm_gpu_address_t pde_entry_addr = uvm_mmu_gpu_address(tree->gpu, directory->phys_alloc.addr);
|
||||
NvU32 max_inline_entries;
|
||||
uvm_push_flag_t push_membar_flag = UVM_PUSH_FLAG_COUNT;
|
||||
uvm_gpu_address_t inline_data_addr;
|
||||
uvm_push_inline_data_t inline_data;
|
||||
NvU32 entry_count, i, j;
|
||||
|
||||
UVM_ASSERT(!uvm_mmu_use_cpu(tree));
|
||||
|
||||
entry_size = tree->hal->entry_size(directory->depth);
|
||||
UVM_ASSERT(sizeof(pde_data) >= entry_size);
|
||||
|
||||
max_inline_entries = UVM_PUSH_INLINE_DATA_MAX_SIZE / entry_size;
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
|
||||
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_NONE;
|
||||
else if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
|
||||
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_GPU;
|
||||
|
||||
pde_entry_addr.address += start_index * entry_size;
|
||||
|
||||
for (i = 0; i < pde_count;) {
|
||||
// All but the first memory operation can be pipelined. We respect the
|
||||
// caller's pipelining settings for the first push.
|
||||
if (i != 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
|
||||
entry_count = min(pde_count - i, max_inline_entries);
|
||||
|
||||
// No membar is needed until the last memory operation. Otherwise,
|
||||
// use caller's membar flag.
|
||||
if ((i + entry_count) < pde_count)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
else if (push_membar_flag != UVM_PUSH_FLAG_COUNT)
|
||||
uvm_push_set_flag(push, push_membar_flag);
|
||||
|
||||
uvm_push_inline_data_begin(push, &inline_data);
|
||||
for (j = 0; j < entry_count; j++) {
|
||||
tree->hal->make_pde(pde_data, phys_addr, directory->depth, directory->entries[start_index + i + j]);
|
||||
uvm_push_inline_data_add(&inline_data, pde_data, entry_size);
|
||||
}
|
||||
inline_data_addr = uvm_push_inline_data_end(&inline_data);
|
||||
|
||||
tree->gpu->parent->ce_hal->memcopy(push, pde_entry_addr, inline_data_addr, entry_count * entry_size);
|
||||
|
||||
i += entry_count;
|
||||
pde_entry_addr.address += entry_size * entry_count;
|
||||
}
|
||||
}
|
||||
|
||||
// pde_fill() populates pde_count PDE entries (starting at start_index) with
|
||||
// the same mapping, i.e., with the same physical address (phys_addr).
|
||||
// pde_fill() is optimized for pde_count == 1, which is the common case. The
|
||||
// map_remap() function is the only case where pde_count > 1, only used on GA100
|
||||
// GPUs for 512MB page size mappings.
|
||||
static void pde_fill(uvm_page_tree_t *tree,
|
||||
uvm_page_directory_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
UVM_ASSERT(start_index + pde_count <= uvm_mmu_page_tree_entries(tree, directory->depth, UVM_PAGE_SIZE_AGNOSTIC));
|
||||
|
||||
if (push)
|
||||
pde_fill_gpu(tree, directory, start_index, pde_count, phys_addr, push);
|
||||
else
|
||||
pde_fill_cpu(tree, directory, start_index, pde_count, phys_addr);
|
||||
}
|
||||
|
||||
static void phys_mem_init(uvm_page_tree_t *tree, NvU32 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
|
||||
{
|
||||
NvU32 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
|
||||
NvU64 clear_bits[2];
|
||||
uvm_mmu_mode_hal_t *hal = tree->hal;
|
||||
|
||||
// Passing in NULL for the phys_allocs will mark the child entries as
|
||||
// invalid.
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
|
||||
// Init with an invalid PTE or clean PDE. Only Maxwell PDEs can have more
|
||||
// than 512 entries. We initialize them all with the same clean PDE.
|
||||
// Additionally, only ATS systems may require clean PDEs bit settings based
|
||||
// on the mapping VA.
|
||||
if (dir->depth == tree->hal->page_table_depth(page_size) || (entries_count > 512 && !g_uvm_global.ats.enabled)) {
|
||||
NvU64 clear_bits[2];
|
||||
|
||||
// If it is not a PTE, make a clean PDE.
|
||||
if (dir->depth != tree->hal->page_table_depth(page_size)) {
|
||||
tree->hal->make_pde(clear_bits, phys_allocs, dir->depth, dir->entries[0]);
|
||||
|
||||
// Make sure that using only clear_bits[0] will work.
|
||||
UVM_ASSERT(tree->hal->entry_size(dir->depth) == sizeof(clear_bits[0]) || clear_bits[0] == clear_bits[1]);
|
||||
}
|
||||
else {
|
||||
*clear_bits = 0;
|
||||
}
|
||||
|
||||
// Initialize the memory to a reasonable value.
|
||||
if (push) {
|
||||
tree->gpu->parent->ce_hal->memset_8(push,
|
||||
uvm_mmu_gpu_address(tree->gpu, dir->phys_alloc.addr),
|
||||
*clear_bits,
|
||||
dir->phys_alloc.size);
|
||||
}
|
||||
else {
|
||||
uvm_mmu_page_table_cpu_memset_8(tree->gpu,
|
||||
&dir->phys_alloc,
|
||||
0,
|
||||
*clear_bits,
|
||||
dir->phys_alloc.size / sizeof(*clear_bits));
|
||||
}
|
||||
if (dir->depth == tree->hal->page_table_depth(page_size)) {
|
||||
*clear_bits = 0; // Invalid PTE
|
||||
}
|
||||
else {
|
||||
pde_fill(tree, dir, 0, entries_count, phys_allocs, push);
|
||||
// passing in NULL for the phys_allocs will mark the child entries as invalid
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
hal->make_pde(clear_bits, phys_allocs, dir->depth);
|
||||
|
||||
// Make sure that using only clear_bits[0] will work
|
||||
UVM_ASSERT(hal->entry_size(dir->depth) == sizeof(clear_bits[0]) || clear_bits[0] == clear_bits[1]);
|
||||
}
|
||||
|
||||
// initialize the memory to a reasonable value
|
||||
if (push) {
|
||||
tree->gpu->parent->ce_hal->memset_8(push,
|
||||
uvm_mmu_gpu_address(tree->gpu, dir->phys_alloc.addr),
|
||||
*clear_bits,
|
||||
dir->phys_alloc.size);
|
||||
}
|
||||
else {
|
||||
uvm_mmu_page_table_cpu_memset_8(tree->gpu,
|
||||
&dir->phys_alloc,
|
||||
0,
|
||||
*clear_bits,
|
||||
dir->phys_alloc.size / sizeof(*clear_bits));
|
||||
}
|
||||
}
|
||||
|
||||
static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
|
||||
@@ -483,10 +367,8 @@ static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
|
||||
NvLength phys_alloc_size = hal->allocation_size(depth, page_size);
|
||||
uvm_page_directory_t *dir;
|
||||
|
||||
// The page tree doesn't cache PTEs so space is not allocated for entries
|
||||
// that are always PTEs.
|
||||
// 2M PTEs may later become PDEs so pass UVM_PAGE_SIZE_AGNOSTIC, not
|
||||
// page_size.
|
||||
// The page tree doesn't cache PTEs so space is not allocated for entries that are always PTEs.
|
||||
// 2M PTEs may later become PDEs so pass UVM_PAGE_SIZE_AGNOSTIC, not page_size.
|
||||
if (depth == hal->page_table_depth(UVM_PAGE_SIZE_AGNOSTIC))
|
||||
entry_count = 0;
|
||||
else
|
||||
@@ -527,6 +409,108 @@ static inline NvU32 index_to_entry(uvm_mmu_mode_hal_t *hal, NvU32 entry_index, N
|
||||
return hal->entries_per_index(depth) * entry_index + hal->entry_offset(depth, page_size);
|
||||
}
|
||||
|
||||
static void pde_fill_cpu(uvm_page_tree_t *tree,
|
||||
NvU32 depth,
|
||||
uvm_mmu_page_table_alloc_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr)
|
||||
{
|
||||
NvU64 pde_data[2], entry_size;
|
||||
|
||||
UVM_ASSERT(uvm_mmu_use_cpu(tree));
|
||||
entry_size = tree->hal->entry_size(depth);
|
||||
UVM_ASSERT(sizeof(pde_data) >= entry_size);
|
||||
|
||||
tree->hal->make_pde(pde_data, phys_addr, depth);
|
||||
|
||||
if (entry_size == sizeof(pde_data[0]))
|
||||
uvm_mmu_page_table_cpu_memset_8(tree->gpu, directory, start_index, pde_data[0], pde_count);
|
||||
else
|
||||
uvm_mmu_page_table_cpu_memset_16(tree->gpu, directory, start_index, pde_data, pde_count);
|
||||
}
|
||||
|
||||
static void pde_fill_gpu(uvm_page_tree_t *tree,
|
||||
NvU32 depth,
|
||||
uvm_mmu_page_table_alloc_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
NvU64 pde_data[2], entry_size;
|
||||
uvm_gpu_address_t pde_entry_addr = uvm_mmu_gpu_address(tree->gpu, directory->addr);
|
||||
|
||||
UVM_ASSERT(!uvm_mmu_use_cpu(tree));
|
||||
|
||||
entry_size = tree->hal->entry_size(depth);
|
||||
UVM_ASSERT(sizeof(pde_data) >= entry_size);
|
||||
|
||||
tree->hal->make_pde(pde_data, phys_addr, depth);
|
||||
pde_entry_addr.address += start_index * entry_size;
|
||||
|
||||
if (entry_size == sizeof(pde_data[0])) {
|
||||
tree->gpu->parent->ce_hal->memset_8(push, pde_entry_addr, pde_data[0], sizeof(pde_data[0]) * pde_count);
|
||||
}
|
||||
else {
|
||||
NvU32 max_inline_entries = UVM_PUSH_INLINE_DATA_MAX_SIZE / sizeof(pde_data);
|
||||
uvm_gpu_address_t inline_data_addr;
|
||||
uvm_push_inline_data_t inline_data;
|
||||
uvm_push_flag_t push_membar_flag = UVM_PUSH_FLAG_COUNT;
|
||||
NvU32 i;
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
|
||||
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_NONE;
|
||||
else if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
|
||||
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_GPU;
|
||||
|
||||
for (i = 0; i < pde_count;) {
|
||||
NvU32 j;
|
||||
NvU32 entry_count = min(pde_count - i, max_inline_entries);
|
||||
|
||||
uvm_push_inline_data_begin(push, &inline_data);
|
||||
for (j = 0; j < entry_count; j++)
|
||||
uvm_push_inline_data_add(&inline_data, pde_data, sizeof(pde_data));
|
||||
inline_data_addr = uvm_push_inline_data_end(&inline_data);
|
||||
|
||||
// All but the first memcopy can be pipelined. We respect the
|
||||
// caller's pipelining settings for the first push.
|
||||
if (i != 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
|
||||
// No membar is needed until the last copy. Otherwise, use
|
||||
// caller's membar flag.
|
||||
if (i + entry_count < pde_count)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
else if (push_membar_flag != UVM_PUSH_FLAG_COUNT)
|
||||
uvm_push_set_flag(push, push_membar_flag);
|
||||
|
||||
tree->gpu->parent->ce_hal->memcopy(push, pde_entry_addr, inline_data_addr, entry_count * sizeof(pde_data));
|
||||
|
||||
i += entry_count;
|
||||
pde_entry_addr.address += sizeof(pde_data) * entry_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pde_fill() populates pde_count PDE entries (starting at start_index) with
|
||||
// the same mapping, i.e., with the same physical address (phys_addr).
|
||||
static void pde_fill(uvm_page_tree_t *tree,
|
||||
NvU32 depth,
|
||||
uvm_mmu_page_table_alloc_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
UVM_ASSERT(start_index + pde_count <= uvm_mmu_page_tree_entries(tree, depth, UVM_PAGE_SIZE_AGNOSTIC));
|
||||
|
||||
if (push)
|
||||
pde_fill_gpu(tree, depth, directory, start_index, pde_count, phys_addr, push);
|
||||
else
|
||||
pde_fill_cpu(tree, depth, directory, start_index, pde_count, phys_addr);
|
||||
}
|
||||
|
||||
static uvm_page_directory_t *host_pde_write(uvm_page_directory_t *dir,
|
||||
uvm_page_directory_t *parent,
|
||||
NvU32 index_in_parent)
|
||||
@@ -556,7 +540,7 @@ static void pde_write(uvm_page_tree_t *tree,
|
||||
phys_allocs[i] = &entry->phys_alloc;
|
||||
}
|
||||
|
||||
pde_fill(tree, dir, entry_index, 1, phys_allocs, push);
|
||||
pde_fill(tree, dir->depth, &dir->phys_alloc, entry_index, 1, phys_allocs, push);
|
||||
}
|
||||
|
||||
static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU32 entry_index, NvU32 page_size)
|
||||
@@ -829,11 +813,8 @@ static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU32 page_size, uvm
|
||||
|
||||
static void map_remap_deinit(uvm_page_tree_t *tree)
|
||||
{
|
||||
if (tree->map_remap.pde0) {
|
||||
phys_mem_deallocate(tree, &tree->map_remap.pde0->phys_alloc);
|
||||
uvm_kvfree(tree->map_remap.pde0);
|
||||
tree->map_remap.pde0 = NULL;
|
||||
}
|
||||
if (tree->map_remap.pde0.size)
|
||||
phys_mem_deallocate(tree, &tree->map_remap.pde0);
|
||||
|
||||
if (tree->map_remap.ptes_invalid_4k.size)
|
||||
phys_mem_deallocate(tree, &tree->map_remap.ptes_invalid_4k);
|
||||
@@ -858,16 +839,10 @@ static NV_STATUS map_remap_init(uvm_page_tree_t *tree)
|
||||
// PDE1-depth(512M) PTE. We first map it to the pde0 directory, then we
|
||||
// return the PTE for the get_ptes()'s caller.
|
||||
if (tree->hal->page_sizes() & UVM_PAGE_SIZE_512M) {
|
||||
tree->map_remap.pde0 = allocate_directory(tree,
|
||||
UVM_PAGE_SIZE_2M,
|
||||
tree->hal->page_table_depth(UVM_PAGE_SIZE_2M),
|
||||
UVM_PMM_ALLOC_FLAGS_EVICT);
|
||||
if (tree->map_remap.pde0 == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
status = allocate_page_table(tree, UVM_PAGE_SIZE_2M, &tree->map_remap.pde0);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
status = page_tree_begin_acquire(tree, &tree->tracker, &push, "map remap init");
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
@@ -889,23 +864,22 @@ static NV_STATUS map_remap_init(uvm_page_tree_t *tree)
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
NvU32 depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_4K) - 1;
|
||||
size_t index_4k = tree->hal->entry_offset(depth, UVM_PAGE_SIZE_4K);
|
||||
NvU32 pde0_entries = tree->map_remap.pde0->phys_alloc.size / tree->hal->entry_size(tree->map_remap.pde0->depth);
|
||||
|
||||
// pde0 depth equals UVM_PAGE_SIZE_2M.
|
||||
NvU32 pde0_depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_2M);
|
||||
NvU32 pde0_entries = tree->map_remap.pde0.size / tree->hal->entry_size(pde0_depth);
|
||||
|
||||
// The big-page entry is NULL which makes it an invalid entry.
|
||||
phys_allocs[index_4k] = &tree->map_remap.ptes_invalid_4k;
|
||||
|
||||
// By default CE operations include a MEMBAR_SYS. MEMBAR_GPU is
|
||||
// sufficient when pde0 is allocated in VIDMEM.
|
||||
if (tree->map_remap.pde0->phys_alloc.addr.aperture == UVM_APERTURE_VID)
|
||||
if (tree->map_remap.pde0.addr.aperture == UVM_APERTURE_VID)
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
|
||||
|
||||
// This is an orphan directory, make_pde() requires a directory to
|
||||
// compute the VA. The UVM depth map_remap() operates on is not in the
|
||||
// range make_pde() must operate. We only need to supply the fields used
|
||||
// by make_pde() to not access invalid memory addresses.
|
||||
|
||||
pde_fill(tree,
|
||||
tree->map_remap.pde0,
|
||||
pde0_depth,
|
||||
&tree->map_remap.pde0,
|
||||
0,
|
||||
pde0_entries,
|
||||
(uvm_mmu_page_table_alloc_t **)&phys_allocs,
|
||||
@@ -932,10 +906,11 @@ error:
|
||||
// --------------|-------------------------||----------------|----------------
|
||||
// vidmem | - || vidmem | false
|
||||
// sysmem | - || sysmem | false
|
||||
// default | <not set> || vidmem | true
|
||||
// default | <not set> || vidmem | true (1)
|
||||
// default | vidmem || vidmem | false
|
||||
// default | sysmem || sysmem | false
|
||||
//
|
||||
// (1) When SEV mode is enabled, the fallback path is disabled.
|
||||
//
|
||||
// In SR-IOV heavy the the page tree must be in vidmem, to prevent guest drivers
|
||||
// from updating GPU page tables without hypervisor knowledge.
|
||||
@@ -951,27 +926,28 @@ error:
|
||||
//
|
||||
static void page_tree_set_location(uvm_page_tree_t *tree, uvm_aperture_t location)
|
||||
{
|
||||
bool should_location_be_vidmem;
|
||||
UVM_ASSERT(tree->gpu != NULL);
|
||||
UVM_ASSERT_MSG((location == UVM_APERTURE_VID) ||
|
||||
(location == UVM_APERTURE_SYS) ||
|
||||
(location == UVM_APERTURE_DEFAULT),
|
||||
"Invalid location %s (%d)\n", uvm_aperture_string(location), (int)location);
|
||||
|
||||
// The page tree of a "fake" GPU used during page tree testing can be in
|
||||
// sysmem in scenarios where a "real" GPU must be in vidmem. Fake GPUs can
|
||||
// be identified by having no channel manager.
|
||||
if (tree->gpu->channel_manager != NULL) {
|
||||
should_location_be_vidmem = uvm_gpu_is_virt_mode_sriov_heavy(tree->gpu)
|
||||
|| uvm_conf_computing_mode_enabled(tree->gpu);
|
||||
|
||||
if (uvm_gpu_is_virt_mode_sriov_heavy(tree->gpu))
|
||||
UVM_ASSERT(location == UVM_APERTURE_VID);
|
||||
else if (uvm_conf_computing_mode_enabled(tree->gpu))
|
||||
UVM_ASSERT(location == UVM_APERTURE_VID);
|
||||
}
|
||||
// The page tree of a "fake" GPU used during page tree testing can be in
|
||||
// sysmem even if should_location_be_vidmem is true. A fake GPU can be
|
||||
// identified by having no channel manager.
|
||||
if ((tree->gpu->channel_manager != NULL) && should_location_be_vidmem)
|
||||
UVM_ASSERT(location == UVM_APERTURE_VID);
|
||||
|
||||
if (location == UVM_APERTURE_DEFAULT) {
|
||||
if (page_table_aperture == UVM_APERTURE_DEFAULT) {
|
||||
tree->location = UVM_APERTURE_VID;
|
||||
tree->location_sys_fallback = true;
|
||||
|
||||
// See the comment (1) above.
|
||||
tree->location_sys_fallback = !g_uvm_global.sev_enabled;
|
||||
}
|
||||
else {
|
||||
tree->location = page_table_aperture;
|
||||
@@ -1358,9 +1334,10 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uv
|
||||
if (uvm_page_table_range_aperture(range) == UVM_APERTURE_VID)
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
|
||||
|
||||
phys_alloc[0] = &tree->map_remap.pde0->phys_alloc;
|
||||
phys_alloc[0] = &tree->map_remap.pde0;
|
||||
pde_fill(tree,
|
||||
range->table,
|
||||
range->table->depth,
|
||||
&range->table->phys_alloc,
|
||||
range->start_index,
|
||||
range->entry_count,
|
||||
(uvm_mmu_page_table_alloc_t **)&phys_alloc,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -219,7 +219,7 @@ struct uvm_mmu_mode_hal_struct
|
||||
// point to two items for dual PDEs).
|
||||
// any of allocs are allowed to be NULL, in which case they are to be
|
||||
// treated as empty.
|
||||
void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, NvU32 depth, uvm_page_directory_t *child_dir);
|
||||
void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, NvU32 depth);
|
||||
|
||||
// size of an entry in a directory/table. Generally either 8 or 16 bytes.
|
||||
// (in the case of Pascal dual PDEs)
|
||||
@@ -229,7 +229,7 @@ struct uvm_mmu_mode_hal_struct
|
||||
NvU32 (*entries_per_index)(NvU32 depth);
|
||||
|
||||
// For dual PDEs, this is ether 1 or 0, depending on the page size.
|
||||
// This is used to index the host copy only. GPU PDEs are always entirely
|
||||
// This is used to index the host copy only. GPU PDEs are always entirely
|
||||
// re-written using make_pde.
|
||||
NvLength (*entry_offset)(NvU32 depth, NvU32 page_size);
|
||||
|
||||
@@ -295,8 +295,9 @@ struct uvm_page_tree_struct
|
||||
|
||||
// PDE0 where all big-page entries are invalid, and small-page entries
|
||||
// point to ptes_invalid_4k.
|
||||
// pde0 is used on Pascal+ GPUs, i.e., they have the same PDE format.
|
||||
uvm_page_directory_t *pde0;
|
||||
// pde0 is only used on Pascal-Ampere, i.e., they have the same PDE
|
||||
// format.
|
||||
uvm_mmu_page_table_alloc_t pde0;
|
||||
} map_remap;
|
||||
|
||||
// Tracker for all GPU operations on the tree
|
||||
@@ -364,32 +365,21 @@ void uvm_page_tree_deinit(uvm_page_tree_t *tree);
|
||||
// the same page size without an intervening put_ptes. To duplicate a subset of
|
||||
// an existing range or change the size of an existing range, use
|
||||
// uvm_page_table_range_get_upper() and/or uvm_page_table_range_shrink().
|
||||
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *range);
|
||||
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start, NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *range);
|
||||
|
||||
// Same as uvm_page_tree_get_ptes(), but doesn't synchronize the GPU work.
|
||||
//
|
||||
// All pending operations can be waited on with uvm_page_tree_wait().
|
||||
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *range);
|
||||
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start, NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *range);
|
||||
|
||||
// Returns a single-entry page table range for the addresses passed.
|
||||
// The size parameter must be a page size supported by this tree.
|
||||
// This is equivalent to calling uvm_page_tree_get_ptes() with size equal to
|
||||
// page_size.
|
||||
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *single);
|
||||
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start,
|
||||
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *single);
|
||||
|
||||
// For a single-entry page table range, write the PDE (which could be a dual
|
||||
// PDE) to the GPU.
|
||||
@@ -488,8 +478,8 @@ NV_STATUS uvm_page_table_range_vec_create(uvm_page_tree_t *tree,
|
||||
// new_range_vec will contain the upper portion of range_vec, starting at
|
||||
// new_end + 1.
|
||||
//
|
||||
// new_end + 1 is required to be within the address range of range_vec and be
|
||||
// aligned to range_vec's page_size.
|
||||
// new_end + 1 is required to be within the address range of range_vec and be aligned to
|
||||
// range_vec's page_size.
|
||||
//
|
||||
// On failure, the original range vector is left unmodified.
|
||||
NV_STATUS uvm_page_table_range_vec_split_upper(uvm_page_table_range_vec_t *range_vec,
|
||||
@@ -511,22 +501,18 @@ void uvm_page_table_range_vec_destroy(uvm_page_table_range_vec_t *range_vec);
|
||||
// for each offset.
|
||||
// The caller_data pointer is what the caller passed in as caller_data to
|
||||
// uvm_page_table_range_vec_write_ptes().
|
||||
typedef NvU64 (*uvm_page_table_range_pte_maker_t)(uvm_page_table_range_vec_t *range_vec,
|
||||
NvU64 offset,
|
||||
void *caller_data);
|
||||
typedef NvU64 (*uvm_page_table_range_pte_maker_t)(uvm_page_table_range_vec_t *range_vec, NvU64 offset,
|
||||
void *caller_data);
|
||||
|
||||
// Write all PTEs covered by the range vector using the given PTE making
|
||||
// function.
|
||||
// Write all PTEs covered by the range vector using the given PTE making function.
|
||||
//
|
||||
// After writing all the PTEs a TLB invalidate operation is performed including
|
||||
// the passed in tlb_membar.
|
||||
//
|
||||
// See comments about uvm_page_table_range_pte_maker_t for details about the
|
||||
// PTE making callback.
|
||||
NV_STATUS uvm_page_table_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec,
|
||||
uvm_membar_t tlb_membar,
|
||||
uvm_page_table_range_pte_maker_t pte_maker,
|
||||
void *caller_data);
|
||||
NV_STATUS uvm_page_table_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec, uvm_membar_t tlb_membar,
|
||||
uvm_page_table_range_pte_maker_t pte_maker, void *caller_data);
|
||||
|
||||
// Set all PTEs covered by the range vector to an empty PTE
|
||||
//
|
||||
@@ -650,9 +636,8 @@ static NvU64 uvm_page_table_range_size(uvm_page_table_range_t *range)
|
||||
|
||||
// Get the physical address of the entry at entry_index within the range
|
||||
// (counted from range->start_index).
|
||||
static uvm_gpu_phys_address_t uvm_page_table_range_entry_address(uvm_page_tree_t *tree,
|
||||
uvm_page_table_range_t *range,
|
||||
size_t entry_index)
|
||||
static uvm_gpu_phys_address_t uvm_page_table_range_entry_address(uvm_page_tree_t *tree, uvm_page_table_range_t *range,
|
||||
size_t entry_index)
|
||||
{
|
||||
NvU32 entry_size = uvm_mmu_pte_size(tree, range->page_size);
|
||||
uvm_gpu_phys_address_t entry = range->table->phys_alloc.addr;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -146,15 +146,9 @@ static void fake_tlb_invals_disable(void)
|
||||
g_fake_tlb_invals_tracking_enabled = false;
|
||||
}
|
||||
|
||||
// Fake TLB invalidate VA that just saves off the parameters so that they can be
|
||||
// verified later.
|
||||
static void fake_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar)
|
||||
// Fake TLB invalidate VA that just saves off the parameters so that they can be verified later
|
||||
static void fake_tlb_invalidate_va(uvm_push_t *push, uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth, NvU64 base, NvU64 size, NvU32 page_size, uvm_membar_t membar)
|
||||
{
|
||||
if (!g_fake_tlb_invals_tracking_enabled)
|
||||
return;
|
||||
@@ -216,8 +210,8 @@ static bool assert_and_reset_last_invalidate(NvU32 expected_depth, bool expected
|
||||
}
|
||||
if ((g_last_fake_inval->membar == UVM_MEMBAR_NONE) == expected_membar) {
|
||||
UVM_TEST_PRINT("Expected %s membar, got %s instead\n",
|
||||
expected_membar ? "a" : "no",
|
||||
uvm_membar_string(g_last_fake_inval->membar));
|
||||
expected_membar ? "a" : "no",
|
||||
uvm_membar_string(g_last_fake_inval->membar));
|
||||
result = false;
|
||||
}
|
||||
|
||||
@@ -236,8 +230,7 @@ static bool assert_last_invalidate_all(NvU32 expected_depth, bool expected_memba
|
||||
}
|
||||
if (g_last_fake_inval->base != 0 || g_last_fake_inval->size != -1) {
|
||||
UVM_TEST_PRINT("Expected invalidate all but got range [0x%llx, 0x%llx) instead\n",
|
||||
g_last_fake_inval->base,
|
||||
g_last_fake_inval->base + g_last_fake_inval->size);
|
||||
g_last_fake_inval->base, g_last_fake_inval->base + g_last_fake_inval->size);
|
||||
return false;
|
||||
}
|
||||
if (g_last_fake_inval->depth != expected_depth) {
|
||||
@@ -254,16 +247,15 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
|
||||
UVM_ASSERT(g_fake_tlb_invals_tracking_enabled);
|
||||
|
||||
if (g_fake_invals_count == 0) {
|
||||
UVM_TEST_PRINT("Expected an invalidate for range [0x%llx, 0x%llx), but got none\n", base, base + size);
|
||||
UVM_TEST_PRINT("Expected an invalidate for range [0x%llx, 0x%llx), but got none\n",
|
||||
base, base + size);
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((inval->base != base || inval->size != size) && inval->base != 0 && inval->size != -1) {
|
||||
UVM_TEST_PRINT("Expected invalidate range [0x%llx, 0x%llx), but got range [0x%llx, 0x%llx) instead\n",
|
||||
base,
|
||||
base + size,
|
||||
inval->base,
|
||||
inval->base + inval->size);
|
||||
base, base + size,
|
||||
inval->base, inval->base + inval->size);
|
||||
return false;
|
||||
}
|
||||
if (inval->depth != expected_depth) {
|
||||
@@ -278,13 +270,7 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool assert_invalidate_range(NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
bool allow_inval_all,
|
||||
NvU32 range_depth,
|
||||
NvU32 all_depth,
|
||||
bool expected_membar)
|
||||
static bool assert_invalidate_range(NvU64 base, NvU64 size, NvU32 page_size, bool allow_inval_all, NvU32 range_depth, NvU32 all_depth, bool expected_membar)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
@@ -502,6 +488,7 @@ static NV_STATUS alloc_adjacent_pde_64k_memory(uvm_gpu_t *gpu)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
|
||||
static NV_STATUS alloc_nearby_pde_64k_memory(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_page_tree_t tree;
|
||||
@@ -855,7 +842,6 @@ static NV_STATUS get_two_free_apart(uvm_gpu_t *gpu)
|
||||
TEST_CHECK_RET(range2.entry_count == 256);
|
||||
TEST_CHECK_RET(range2.table->ref_count == 512);
|
||||
TEST_CHECK_RET(range1.table == range2.table);
|
||||
|
||||
// 4k page is second entry in a dual PDE
|
||||
TEST_CHECK_RET(range1.table == tree.root->entries[0]->entries[0]->entries[0]->entries[1]);
|
||||
TEST_CHECK_RET(range1.start_index == 256);
|
||||
@@ -885,7 +871,6 @@ static NV_STATUS get_overlapping_dual_pdes(uvm_gpu_t *gpu)
|
||||
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_64K, size, size, &range64k), NV_OK);
|
||||
TEST_CHECK_RET(range64k.entry_count == 16);
|
||||
TEST_CHECK_RET(range64k.table->ref_count == 16);
|
||||
|
||||
// 4k page is second entry in a dual PDE
|
||||
TEST_CHECK_RET(range64k.table == tree.root->entries[0]->entries[0]->entries[0]->entries[0]);
|
||||
TEST_CHECK_RET(range64k.start_index == 16);
|
||||
@@ -1045,13 +1030,10 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
|
||||
|
||||
// Depth 4
|
||||
NvU64 extent_pte = UVM_PAGE_SIZE_2M;
|
||||
|
||||
// Depth 3
|
||||
NvU64 extent_pde0 = extent_pte * (1ull << 8);
|
||||
|
||||
// Depth 2
|
||||
NvU64 extent_pde1 = extent_pde0 * (1ull << 9);
|
||||
|
||||
// Depth 1
|
||||
NvU64 extent_pde2 = extent_pde1 * (1ull << 9);
|
||||
|
||||
@@ -1099,11 +1081,7 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 min_page_size,
|
||||
NvU32 max_page_size)
|
||||
static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree, NvU64 base, NvU64 size, NvU32 min_page_size, NvU32 max_page_size)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_push_t push;
|
||||
@@ -1227,11 +1205,7 @@ static bool assert_range_vec_ptes(uvm_page_table_range_vec_t *range_vec, bool ex
|
||||
NvU64 expected_pte = expecting_cleared ? 0 : range_vec->size + offset;
|
||||
if (*pte != expected_pte) {
|
||||
UVM_TEST_PRINT("PTE is 0x%llx instead of 0x%llx for offset 0x%llx within range [0x%llx, 0x%llx)\n",
|
||||
*pte,
|
||||
expected_pte,
|
||||
offset,
|
||||
range_vec->start,
|
||||
range_vec->size);
|
||||
*pte, expected_pte, offset, range_vec->start, range_vec->size);
|
||||
return false;
|
||||
}
|
||||
offset += range_vec->page_size;
|
||||
@@ -1252,11 +1226,7 @@ static NV_STATUS test_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec
|
||||
TEST_CHECK_RET(data.status == NV_OK);
|
||||
TEST_CHECK_RET(data.count == range_vec->size / range_vec->page_size);
|
||||
TEST_CHECK_RET(assert_invalidate_range_specific(g_last_fake_inval,
|
||||
range_vec->start,
|
||||
range_vec->size,
|
||||
range_vec->page_size,
|
||||
page_table_depth,
|
||||
membar != UVM_MEMBAR_NONE));
|
||||
range_vec->start, range_vec->size, range_vec->page_size, page_table_depth, membar != UVM_MEMBAR_NONE));
|
||||
TEST_CHECK_RET(assert_range_vec_ptes(range_vec, false));
|
||||
|
||||
fake_tlb_invals_disable();
|
||||
@@ -1279,11 +1249,7 @@ static NV_STATUS test_range_vec_clear_ptes(uvm_page_table_range_vec_t *range_vec
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
uvm_page_table_range_vec_t **range_vec_out)
|
||||
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree, NvU64 start, NvU64 size, NvU32 page_size, uvm_page_table_range_vec_t **range_vec_out)
|
||||
{
|
||||
uvm_page_table_range_vec_t *range_vec;
|
||||
uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;
|
||||
@@ -1586,17 +1552,17 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
|
||||
|
||||
memset(phys_allocs, 0, sizeof(phys_allocs));
|
||||
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits == 0x0L);
|
||||
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits == 0x1BBBBBBD99999992LL);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits == 0x9999999E1BBBBBB1LL);
|
||||
|
||||
for (j = 0; j <= 2; j++) {
|
||||
@@ -1666,7 +1632,6 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
|
||||
|
||||
// big versions have [11:8] set as well to test the page table merging
|
||||
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999900LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_big_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBBB00LL);
|
||||
@@ -1674,31 +1639,31 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
// Make sure cleared PDEs work as expected
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0);
|
||||
|
||||
memset(pde_bits, 0xFF, sizeof(pde_bits));
|
||||
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
|
||||
|
||||
// Sys and vidmem PDEs
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);
|
||||
|
||||
// Dual PDEs
|
||||
phys_allocs[0] = &alloc_big_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);
|
||||
|
||||
phys_allocs[0] = &alloc_big_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);
|
||||
|
||||
// uncached, i.e., the sysmem data is not cached in GPU's L2 cache. Clear
|
||||
@@ -1762,36 +1727,36 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
// Make sure cleared PDEs work as expected
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0);
|
||||
|
||||
memset(pde_bits, 0xFF, sizeof(pde_bits));
|
||||
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
|
||||
|
||||
// Sys and vidmem PDEs
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);
|
||||
|
||||
// Dual PDEs
|
||||
phys_allocs[0] = &alloc_big_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);
|
||||
|
||||
phys_allocs[0] = &alloc_big_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);
|
||||
|
||||
// NO_ATS PDE1 (depth 2)
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 2, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 2);
|
||||
if (g_uvm_global.ats.enabled)
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB2A);
|
||||
else
|
||||
@@ -1840,32 +1805,32 @@ static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
// Make sure cleared PDEs work as expected
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0);
|
||||
|
||||
// Cleared PDEs work as expected for big and small PDEs.
|
||||
memset(pde_bits, 0xFF, sizeof(pde_bits));
|
||||
hal->make_pde(pde_bits, phys_allocs, 4, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 4);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
|
||||
|
||||
// Sys and vidmem PDEs, uncached ATS allowed.
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x999999999900C);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBB00A);
|
||||
|
||||
// Dual PDEs, uncached.
|
||||
phys_allocs[0] = &alloc_big_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 4, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 4);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x999999999991C && pde_bits[1] == 0xBBBBBBB01A);
|
||||
|
||||
phys_allocs[0] = &alloc_big_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 4, NULL);
|
||||
hal->make_pde(pde_bits, phys_allocs, 4);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBBB1A && pde_bits[1] == 0x999999999901C);
|
||||
|
||||
// uncached, i.e., the sysmem data is not cached in GPU's L2 cache, and
|
||||
@@ -2338,8 +2303,7 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
|
||||
gpu->parent = parent_gpu;
|
||||
|
||||
// At least test_tlb_invalidates() relies on global state
|
||||
// (g_tlb_invalidate_*) so make sure only one test instance can run at a
|
||||
// time.
|
||||
// (g_tlb_invalidate_*) so make sure only one test instance can run at a time.
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
// Allocate the fake TLB tracking state. Notably tests still need to enable
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2020 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -140,10 +140,7 @@ static NvU64 small_half_pde_pascal(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static void make_pde_pascal(void *entry,
|
||||
uvm_mmu_page_table_alloc_t **phys_allocs,
|
||||
NvU32 depth,
|
||||
uvm_page_directory_t *child_dir)
|
||||
static void make_pde_pascal(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
|
||||
{
|
||||
NvU32 entry_count = entries_per_index_pascal(depth);
|
||||
NvU64 *entry_bits = (NvU64 *)entry;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2019 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -22,10 +22,7 @@
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_perf_events.h"
|
||||
#include "uvm_va_block.h"
|
||||
#include "uvm_va_range.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
#include "uvm_test.h"
|
||||
|
||||
// Global variable used to check that callbacks are correctly executed
|
||||
@@ -46,10 +43,7 @@ static NV_STATUS test_events(uvm_va_space_t *va_space)
|
||||
NV_STATUS status;
|
||||
uvm_perf_event_data_t event_data;
|
||||
|
||||
uvm_va_block_t block;
|
||||
|
||||
test_data = 0;
|
||||
|
||||
memset(&event_data, 0, sizeof(event_data));
|
||||
|
||||
// Use CPU id to avoid triggering the GPU stats update code
|
||||
@@ -58,6 +52,7 @@ static NV_STATUS test_events(uvm_va_space_t *va_space)
|
||||
// Register a callback for page fault
|
||||
status = uvm_perf_register_event_callback(&va_space->perf_events, UVM_PERF_EVENT_FAULT, callback_inc_1);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
// Register a callback for page fault
|
||||
status = uvm_perf_register_event_callback(&va_space->perf_events, UVM_PERF_EVENT_FAULT, callback_inc_2);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
@@ -65,13 +60,14 @@ static NV_STATUS test_events(uvm_va_space_t *va_space)
|
||||
// va_space read lock is required for page fault event notification
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// Notify (fake) page fault. The two registered callbacks for this event increment the value of test_value
|
||||
event_data.fault.block = █
|
||||
// Notify (fake) page fault. The two registered callbacks for this event
|
||||
// increment the value of test_value
|
||||
uvm_perf_event_notify(&va_space->perf_events, UVM_PERF_EVENT_FAULT, &event_data);
|
||||
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
// test_data was initialized to zero. It should have been incremented by 1 and 2, respectively in the callbacks
|
||||
// test_data was initialized to zero. It should have been incremented by 1
|
||||
// and 2, respectively in the callbacks
|
||||
TEST_CHECK_GOTO(test_data == 3, done);
|
||||
|
||||
done:
|
||||
@@ -96,4 +92,3 @@ NV_STATUS uvm_test_perf_events_sanity(UVM_TEST_PERF_EVENTS_SANITY_PARAMS *params
|
||||
done:
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@@ -355,7 +355,7 @@ static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_blo
|
||||
uvm_page_mask_zero(prefetch_pages);
|
||||
|
||||
if (UVM_ID_IS_CPU(new_residency) || va_block->gpus[uvm_id_gpu_index(new_residency)] != NULL)
|
||||
resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency);
|
||||
resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency, NUMA_NO_NODE);
|
||||
|
||||
// If this is a first-touch fault and the destination processor is the
|
||||
// preferred location, populate the whole max_prefetch_region.
|
||||
|
||||
@@ -164,7 +164,7 @@ typedef struct
|
||||
|
||||
uvm_spinlock_t lock;
|
||||
|
||||
uvm_va_block_context_t va_block_context;
|
||||
uvm_va_block_context_t *va_block_context;
|
||||
|
||||
// Flag used to avoid scheduling delayed unpinning operations after
|
||||
// uvm_perf_thrashing_stop has been called.
|
||||
@@ -601,6 +601,14 @@ static va_space_thrashing_info_t *va_space_thrashing_info_create(uvm_va_space_t
|
||||
|
||||
va_space_thrashing = uvm_kvmalloc_zero(sizeof(*va_space_thrashing));
|
||||
if (va_space_thrashing) {
|
||||
uvm_va_block_context_t *block_context = uvm_va_block_context_alloc(NULL);
|
||||
|
||||
if (!block_context) {
|
||||
uvm_kvfree(va_space_thrashing);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
va_space_thrashing->pinned_pages.va_block_context = block_context;
|
||||
va_space_thrashing->va_space = va_space;
|
||||
|
||||
va_space_thrashing_info_init_params(va_space_thrashing);
|
||||
@@ -621,6 +629,7 @@ static void va_space_thrashing_info_destroy(uvm_va_space_t *va_space)
|
||||
|
||||
if (va_space_thrashing) {
|
||||
uvm_perf_module_type_unset_data(va_space->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
|
||||
uvm_va_block_context_free(va_space_thrashing->pinned_pages.va_block_context);
|
||||
uvm_kvfree(va_space_thrashing);
|
||||
}
|
||||
}
|
||||
@@ -1104,7 +1113,7 @@ static NV_STATUS unmap_remote_pinned_pages(uvm_va_block_t *va_block,
|
||||
!uvm_processor_mask_test(&policy->accessed_by, processor_id));
|
||||
|
||||
if (uvm_processor_mask_test(&va_block->resident, processor_id)) {
|
||||
const uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, processor_id);
|
||||
const uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, processor_id, NUMA_NO_NODE);
|
||||
|
||||
if (!uvm_page_mask_andnot(&va_block_context->caller_page_mask,
|
||||
&block_thrashing->pinned_pages.mask,
|
||||
@@ -1312,9 +1321,8 @@ void thrashing_event_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_
|
||||
|
||||
if (block_thrashing->last_time_stamp == 0 ||
|
||||
uvm_id_equal(block_thrashing->last_processor, processor_id) ||
|
||||
time_stamp - block_thrashing->last_time_stamp > va_space_thrashing->params.lapse_ns) {
|
||||
time_stamp - block_thrashing->last_time_stamp > va_space_thrashing->params.lapse_ns)
|
||||
goto done;
|
||||
}
|
||||
|
||||
num_block_pages = uvm_va_block_size(va_block) / PAGE_SIZE;
|
||||
|
||||
@@ -1803,7 +1811,7 @@ static void thrashing_unpin_pages(struct work_struct *work)
|
||||
struct delayed_work *dwork = to_delayed_work(work);
|
||||
va_space_thrashing_info_t *va_space_thrashing = container_of(dwork, va_space_thrashing_info_t, pinned_pages.dwork);
|
||||
uvm_va_space_t *va_space = va_space_thrashing->va_space;
|
||||
uvm_va_block_context_t *va_block_context = &va_space_thrashing->pinned_pages.va_block_context;
|
||||
uvm_va_block_context_t *va_block_context = va_space_thrashing->pinned_pages.va_block_context;
|
||||
|
||||
// Take the VA space lock so that VA blocks don't go away during this
|
||||
// operation.
|
||||
@@ -1937,7 +1945,6 @@ void uvm_perf_thrashing_unload(uvm_va_space_t *va_space)
|
||||
|
||||
// Make sure that there are not pending work items
|
||||
if (va_space_thrashing) {
|
||||
UVM_ASSERT(va_space_thrashing->pinned_pages.in_va_space_teardown);
|
||||
UVM_ASSERT(list_empty(&va_space_thrashing->pinned_pages.list));
|
||||
|
||||
va_space_thrashing_info_destroy(va_space);
|
||||
|
||||
@@ -3377,76 +3377,47 @@ uvm_gpu_id_t uvm_pmm_devmem_page_to_gpu_id(struct page *page)
|
||||
return gpu->id;
|
||||
}
|
||||
|
||||
static void evict_orphan_pages(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT);
|
||||
UVM_ASSERT(chunk->suballoc);
|
||||
|
||||
for (i = 0; i < num_subchunks(chunk); i++) {
|
||||
uvm_gpu_chunk_t *subchunk = chunk->suballoc->subchunks[i];
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
|
||||
if (subchunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT) {
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
|
||||
evict_orphan_pages(pmm, subchunk);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (subchunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED && subchunk->is_referenced) {
|
||||
unsigned long pfn = uvm_pmm_gpu_devmem_get_pfn(pmm, subchunk);
|
||||
|
||||
// TODO: Bug 3368756: add support for large GPU pages.
|
||||
UVM_ASSERT(uvm_gpu_chunk_get_size(subchunk) == PAGE_SIZE);
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
|
||||
// The above check for subchunk state is racy because the
|
||||
// chunk may be freed after the lock is dropped. It is
|
||||
// still safe to proceed in that case because the struct
|
||||
// page reference will have dropped to zero and cannot
|
||||
// have been re-allocated as this is only called during
|
||||
// GPU teardown. Therefore migrate_device_range() will
|
||||
// simply fail.
|
||||
uvm_hmm_pmm_gpu_evict_pfn(pfn);
|
||||
continue;
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
}
|
||||
}
|
||||
|
||||
// Free any orphan pages.
|
||||
// This should be called as part of removing a GPU: after all work is stopped
|
||||
// and all va_blocks have been destroyed. There normally won't be any
|
||||
// device private struct page references left but there can be cases after
|
||||
// fork() where a child process still holds a reference. This function searches
|
||||
// for pages that still have a reference and migrates the page to the GPU in
|
||||
// order to release the reference in the CPU page table.
|
||||
static void uvm_pmm_gpu_free_orphan_pages(uvm_pmm_gpu_t *pmm)
|
||||
// Check there are no orphan pages. This should be only called as part of
|
||||
// removing a GPU: after all work is stopped and all va_blocks have been
|
||||
// destroyed. By now there should be no device-private page references left as
|
||||
// there are no va_space's left on this GPU and orphan pages should be removed
|
||||
// by va_space destruction or unregistration from the GPU.
|
||||
static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
|
||||
{
|
||||
size_t i;
|
||||
bool ret = true;
|
||||
unsigned long pfn;
|
||||
struct range range = pmm->devmem.pagemap.range;
|
||||
|
||||
if (!pmm->initialized)
|
||||
return;
|
||||
|
||||
// This is only safe to call during GPU teardown where chunks
|
||||
// cannot be re-allocated.
|
||||
UVM_ASSERT(uvm_gpu_retained_count(uvm_pmm_to_gpu(pmm)) == 0);
|
||||
if (!pmm->initialized || !uvm_hmm_is_enabled_system_wide())
|
||||
return ret;
|
||||
|
||||
// Scan all the root chunks looking for subchunks which are still
|
||||
// referenced. This is slow, but we only do this when unregistering a GPU
|
||||
// and is not critical for performance.
|
||||
// referenced.
|
||||
for (i = 0; i < pmm->root_chunks.count; i++) {
|
||||
uvm_gpu_root_chunk_t *root_chunk = &pmm->root_chunks.array[i];
|
||||
|
||||
root_chunk_lock(pmm, root_chunk);
|
||||
if (root_chunk->chunk.state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT)
|
||||
evict_orphan_pages(pmm, &root_chunk->chunk);
|
||||
ret = false;
|
||||
root_chunk_unlock(pmm, root_chunk);
|
||||
}
|
||||
|
||||
for (pfn = __phys_to_pfn(range.start); pfn <= __phys_to_pfn(range.end); pfn++) {
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
if (!is_device_private_page(page)) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (page_count(page)) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void devmem_page_free(struct page *page)
|
||||
@@ -3479,7 +3450,7 @@ static vm_fault_t devmem_fault(struct vm_fault *vmf)
|
||||
{
|
||||
uvm_va_space_t *va_space = vmf->page->zone_device_data;
|
||||
|
||||
if (!va_space || va_space->va_space_mm.mm != vmf->vma->vm_mm)
|
||||
if (!va_space)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
return uvm_va_space_cpu_fault_hmm(va_space, vmf->vma, vmf);
|
||||
@@ -3568,8 +3539,9 @@ static void devmem_deinit(uvm_pmm_gpu_t *pmm)
|
||||
{
|
||||
}
|
||||
|
||||
static void uvm_pmm_gpu_free_orphan_pages(uvm_pmm_gpu_t *pmm)
|
||||
static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
||||
@@ -3744,7 +3716,7 @@ void uvm_pmm_gpu_deinit(uvm_pmm_gpu_t *pmm)
|
||||
|
||||
gpu = uvm_pmm_to_gpu(pmm);
|
||||
|
||||
uvm_pmm_gpu_free_orphan_pages(pmm);
|
||||
UVM_ASSERT(uvm_pmm_gpu_check_orphan_pages(pmm));
|
||||
nv_kthread_q_flush(&gpu->parent->lazy_free_q);
|
||||
UVM_ASSERT(list_empty(&pmm->root_chunks.va_block_lazy_free));
|
||||
release_free_root_chunks(pmm);
|
||||
|
||||
@@ -749,6 +749,7 @@ NV_STATUS uvm_cpu_chunk_map_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
}
|
||||
|
||||
static struct page *uvm_cpu_chunk_alloc_page(uvm_chunk_size_t alloc_size,
|
||||
int nid,
|
||||
uvm_cpu_chunk_alloc_flags_t alloc_flags)
|
||||
{
|
||||
gfp_t kernel_alloc_flags;
|
||||
@@ -764,18 +765,27 @@ static struct page *uvm_cpu_chunk_alloc_page(uvm_chunk_size_t alloc_size,
|
||||
|
||||
kernel_alloc_flags |= GFP_HIGHUSER;
|
||||
|
||||
// For allocation sizes higher than PAGE_SIZE, use __GFP_NORETRY in
|
||||
// order to avoid higher allocation latency from the kernel compacting
|
||||
// memory to satisfy the request.
|
||||
// For allocation sizes higher than PAGE_SIZE, use __GFP_NORETRY in order
|
||||
// to avoid higher allocation latency from the kernel compacting memory to
|
||||
// satisfy the request.
|
||||
// Use __GFP_NOWARN to avoid printing allocation failure to the kernel log.
|
||||
// High order allocation failures are handled gracefully by the caller.
|
||||
if (alloc_size > PAGE_SIZE)
|
||||
kernel_alloc_flags |= __GFP_COMP | __GFP_NORETRY;
|
||||
kernel_alloc_flags |= __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN;
|
||||
|
||||
if (alloc_flags & UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO)
|
||||
kernel_alloc_flags |= __GFP_ZERO;
|
||||
|
||||
page = alloc_pages(kernel_alloc_flags, get_order(alloc_size));
|
||||
if (page && (alloc_flags & UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO))
|
||||
SetPageDirty(page);
|
||||
UVM_ASSERT(nid < num_online_nodes());
|
||||
if (nid == NUMA_NO_NODE)
|
||||
page = alloc_pages(kernel_alloc_flags, get_order(alloc_size));
|
||||
else
|
||||
page = alloc_pages_node(nid, kernel_alloc_flags, get_order(alloc_size));
|
||||
|
||||
if (page) {
|
||||
if (alloc_flags & UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO)
|
||||
SetPageDirty(page);
|
||||
}
|
||||
|
||||
return page;
|
||||
}
|
||||
@@ -805,6 +815,7 @@ static uvm_cpu_physical_chunk_t *uvm_cpu_chunk_create(uvm_chunk_size_t alloc_siz
|
||||
|
||||
NV_STATUS uvm_cpu_chunk_alloc(uvm_chunk_size_t alloc_size,
|
||||
uvm_cpu_chunk_alloc_flags_t alloc_flags,
|
||||
int nid,
|
||||
uvm_cpu_chunk_t **new_chunk)
|
||||
{
|
||||
uvm_cpu_physical_chunk_t *chunk;
|
||||
@@ -812,7 +823,7 @@ NV_STATUS uvm_cpu_chunk_alloc(uvm_chunk_size_t alloc_size,
|
||||
|
||||
UVM_ASSERT(new_chunk);
|
||||
|
||||
page = uvm_cpu_chunk_alloc_page(alloc_size, alloc_flags);
|
||||
page = uvm_cpu_chunk_alloc_page(alloc_size, nid, alloc_flags);
|
||||
if (!page)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
@@ -847,6 +858,13 @@ NV_STATUS uvm_cpu_chunk_alloc_hmm(struct page *page,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
int uvm_cpu_chunk_get_numa_node(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
UVM_ASSERT(chunk);
|
||||
UVM_ASSERT(chunk->page);
|
||||
return page_to_nid(chunk->page);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_cpu_chunk_split(uvm_cpu_chunk_t *chunk, uvm_cpu_chunk_t **new_chunks)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
@@ -304,11 +304,24 @@ uvm_chunk_sizes_mask_t uvm_cpu_chunk_get_allocation_sizes(void);
|
||||
|
||||
// Allocate a physical CPU chunk of the specified size.
|
||||
//
|
||||
// The nid argument is used to indicate a memory node preference. If the
|
||||
// value is a memory node ID, the chunk allocation will be attempted on
|
||||
// that memory node. If the chunk cannot be allocated on that memory node,
|
||||
// it will be allocated on any memory node allowed by the process's policy.
|
||||
//
|
||||
// If the value of nid is a memory node ID that is not in the set of
|
||||
// current process's allowed memory nodes, it will be allocated on one of the
|
||||
// nodes in the allowed set.
|
||||
//
|
||||
// If the value of nid is NUMA_NO_NODE, the chunk will be allocated from any
|
||||
// of the allowed memory nodes by the process policy.
|
||||
//
|
||||
// If a CPU chunk allocation succeeds, NV_OK is returned. new_chunk will be set
|
||||
// to point to the newly allocated chunk. On failure, NV_ERR_NO_MEMORY is
|
||||
// returned.
|
||||
NV_STATUS uvm_cpu_chunk_alloc(uvm_chunk_size_t alloc_size,
|
||||
uvm_cpu_chunk_alloc_flags_t flags,
|
||||
int nid,
|
||||
uvm_cpu_chunk_t **new_chunk);
|
||||
|
||||
// Allocate a HMM CPU chunk.
|
||||
@@ -375,6 +388,9 @@ static uvm_cpu_logical_chunk_t *uvm_cpu_chunk_to_logical(uvm_cpu_chunk_t *chunk)
|
||||
return container_of((chunk), uvm_cpu_logical_chunk_t, common);
|
||||
}
|
||||
|
||||
// Return the NUMA node ID of the physical page backing the chunk.
|
||||
int uvm_cpu_chunk_get_numa_node(uvm_cpu_chunk_t *chunk);
|
||||
|
||||
// Free a CPU chunk.
|
||||
// This may not result in the immediate freeing of the physical pages of the
|
||||
// chunk if this is a logical chunk and there are other logical chunks holding
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2019 NVIDIA Corporation
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -664,6 +664,7 @@ done:
|
||||
|
||||
static NV_STATUS test_cpu_chunk_alloc(uvm_chunk_size_t size,
|
||||
uvm_cpu_chunk_alloc_flags_t flags,
|
||||
int nid,
|
||||
uvm_cpu_chunk_t **out_chunk)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
@@ -675,7 +676,7 @@ static NV_STATUS test_cpu_chunk_alloc(uvm_chunk_size_t size,
|
||||
// It is possible that the allocation fails due to lack of large pages
|
||||
// rather than an API issue, which will result in a false negative.
|
||||
// However, that should be very rare.
|
||||
TEST_NV_CHECK_RET(uvm_cpu_chunk_alloc(size, flags, &chunk));
|
||||
TEST_NV_CHECK_RET(uvm_cpu_chunk_alloc(size, flags, nid, &chunk));
|
||||
|
||||
// Check general state of the chunk:
|
||||
// - chunk should be a physical chunk,
|
||||
@@ -685,6 +686,12 @@ static NV_STATUS test_cpu_chunk_alloc(uvm_chunk_size_t size,
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(chunk) == size, done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_num_pages(chunk) == size / PAGE_SIZE, done);
|
||||
|
||||
// It is possible for the kernel to allocate a chunk on a NUMA node other
|
||||
// than the one requested. However, that should not be an issue with
|
||||
// sufficient memory on each NUMA node.
|
||||
if (nid != NUMA_NO_NODE)
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_numa_node(chunk) == nid, done);
|
||||
|
||||
if (flags & UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO) {
|
||||
NvU64 *cpu_addr;
|
||||
|
||||
@@ -719,7 +726,7 @@ static NV_STATUS test_cpu_chunk_mapping_basic_verify(uvm_gpu_t *gpu,
|
||||
NvU64 dma_addr;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, flags, &chunk));
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, flags, NUMA_NO_NODE, &chunk));
|
||||
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
|
||||
// Check state of the physical chunk:
|
||||
@@ -763,27 +770,27 @@ static NV_STATUS test_cpu_chunk_mapping_basic(uvm_gpu_t *gpu, uvm_cpu_chunk_allo
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_mapping_array(uvm_gpu_t *gpu1, uvm_gpu_t *gpu2, uvm_gpu_t *gpu3)
|
||||
static NV_STATUS test_cpu_chunk_mapping_array(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_t *gpu2)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
uvm_cpu_physical_chunk_t *phys_chunk;
|
||||
NvU64 dma_addr_gpu2;
|
||||
NvU64 dma_addr_gpu1;
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(PAGE_SIZE, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, &chunk));
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(PAGE_SIZE, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
|
||||
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu2), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu2), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu3), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu2), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu3), done);
|
||||
dma_addr_gpu2 = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu2->parent);
|
||||
uvm_cpu_chunk_unmap_gpu_phys(chunk, gpu3->parent);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu2), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu1), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu2), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu2), done);
|
||||
dma_addr_gpu1 = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu1->parent);
|
||||
uvm_cpu_chunk_unmap_gpu_phys(chunk, gpu2->parent);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu0), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu0), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
|
||||
|
||||
// DMA mapping addresses for different GPUs live in different IOMMU spaces,
|
||||
// so it would be perfectly legal for them to have the same IOVA, and even
|
||||
@@ -793,7 +800,7 @@ static NV_STATUS test_cpu_chunk_mapping_array(uvm_gpu_t *gpu1, uvm_gpu_t *gpu2,
|
||||
// GPU1. It's true that we may get a false negative if both addresses
|
||||
// happened to alias and we had a bug in how the addresses are shifted in
|
||||
// the dense array, but that's better than intermittent failure.
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu2->parent) == dma_addr_gpu2, done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu1->parent) == dma_addr_gpu1, done);
|
||||
|
||||
done:
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
@@ -911,7 +918,7 @@ static NV_STATUS test_cpu_chunk_split_and_merge(uvm_gpu_t *gpu)
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
NV_STATUS status;
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, &chunk));
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
|
||||
status = do_test_cpu_chunk_split_and_merge(chunk, gpu);
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
|
||||
@@ -993,7 +1000,7 @@ static NV_STATUS test_cpu_chunk_dirty(uvm_gpu_t *gpu)
|
||||
uvm_cpu_physical_chunk_t *phys_chunk;
|
||||
size_t num_pages;
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, &chunk));
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
|
||||
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
num_pages = uvm_cpu_chunk_num_pages(chunk);
|
||||
|
||||
@@ -1005,7 +1012,7 @@ static NV_STATUS test_cpu_chunk_dirty(uvm_gpu_t *gpu)
|
||||
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO, &chunk));
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO, NUMA_NO_NODE, &chunk));
|
||||
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
num_pages = uvm_cpu_chunk_num_pages(chunk);
|
||||
|
||||
@@ -1170,13 +1177,35 @@ NV_STATUS test_cpu_chunk_free(uvm_va_space_t *va_space, uvm_processor_mask_t *te
|
||||
size_t size = uvm_chunk_find_next_size(alloc_sizes, PAGE_SIZE);
|
||||
|
||||
for_each_chunk_size_from(size, alloc_sizes) {
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, &chunk));
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
|
||||
TEST_NV_CHECK_RET(do_test_cpu_chunk_free(chunk, va_space, test_gpus));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_numa_alloc(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
size_t size;
|
||||
|
||||
for_each_chunk_size(size, alloc_sizes) {
|
||||
int nid;
|
||||
|
||||
for_each_possible_uvm_node(nid) {
|
||||
// Do not test CPU allocation on nodes that have no memory or CPU
|
||||
if (!node_state(nid, N_MEMORY) || !node_state(nid, N_CPU))
|
||||
continue;
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, nid, &chunk));
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_cpu_chunk_api(UVM_TEST_CPU_CHUNK_API_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
@@ -1197,6 +1226,7 @@ NV_STATUS uvm_test_cpu_chunk_api(UVM_TEST_CPU_CHUNK_API_PARAMS *params, struct f
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_free(va_space, &test_gpus), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_numa_alloc(va_space), done);
|
||||
|
||||
if (uvm_processor_mask_get_gpu_count(&test_gpus) >= 3) {
|
||||
uvm_gpu_t *gpu2, *gpu3;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -324,7 +324,7 @@ static NV_STATUS gpu_mem_check(uvm_gpu_t *gpu,
|
||||
|
||||
// TODO: Bug 3839176: [UVM][HCC][uvm_test] Update tests that assume GPU
|
||||
// engines can directly access sysmem
|
||||
// Skip this test for now. To enable this test in Confidential Computing,
|
||||
// Skip this test for now. To enable this test under SEV,
|
||||
// The GPU->CPU CE copy needs to be updated so it uses encryption when
|
||||
// CC is enabled.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
@@ -1068,7 +1068,7 @@ static NV_STATUS test_pmm_reverse_map_single(uvm_gpu_t *gpu, uvm_va_space_t *va_
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
is_resident = uvm_processor_mask_test(&va_block->resident, gpu->id) &&
|
||||
uvm_page_mask_full(uvm_va_block_resident_mask_get(va_block, gpu->id));
|
||||
uvm_page_mask_full(uvm_va_block_resident_mask_get(va_block, gpu->id, NUMA_NO_NODE));
|
||||
if (is_resident)
|
||||
phys_addr = uvm_va_block_gpu_phys_page_address(va_block, 0, gpu);
|
||||
|
||||
@@ -1154,7 +1154,7 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
// Verify that all pages are populated on the GPU
|
||||
is_resident = uvm_page_mask_region_full(uvm_va_block_resident_mask_get(va_block, gpu->id),
|
||||
is_resident = uvm_page_mask_region_full(uvm_va_block_resident_mask_get(va_block, gpu->id, NUMA_NO_NODE),
|
||||
reverse_mapping->region);
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
@@ -1223,6 +1223,8 @@ static NV_STATUS test_indirect_peers(uvm_gpu_t *owning_gpu, uvm_gpu_t *accessing
|
||||
if (!chunks)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
UVM_ASSERT(!g_uvm_global.sev_enabled);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(UVM_CHUNK_SIZE_MAX, current->mm, &verif_mem), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, owning_gpu), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, accessing_gpu), out);
|
||||
|
||||
@@ -176,7 +176,9 @@ static NV_STATUS preferred_location_unmap_remote_pages(uvm_va_block_t *va_block,
|
||||
mapped_mask = uvm_va_block_map_mask_get(va_block, preferred_location);
|
||||
|
||||
if (uvm_processor_mask_test(&va_block->resident, preferred_location)) {
|
||||
const uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, preferred_location);
|
||||
const uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block,
|
||||
preferred_location,
|
||||
NUMA_NO_NODE);
|
||||
|
||||
if (!uvm_page_mask_andnot(&va_block_context->caller_page_mask, mapped_mask, resident_mask))
|
||||
goto done;
|
||||
@@ -638,7 +640,7 @@ static NV_STATUS va_block_set_read_duplication_locked(uvm_va_block_t *va_block,
|
||||
|
||||
for_each_id_in_mask(src_id, &va_block->resident) {
|
||||
NV_STATUS status;
|
||||
uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, src_id);
|
||||
uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, src_id, NUMA_NO_NODE);
|
||||
|
||||
// Calling uvm_va_block_make_resident_read_duplicate will break all
|
||||
// SetAccessedBy and remote mappings
|
||||
@@ -695,7 +697,7 @@ static NV_STATUS va_block_unset_read_duplication_locked(uvm_va_block_t *va_block
|
||||
// If preferred_location is set and has resident copies, give it preference
|
||||
if (UVM_ID_IS_VALID(preferred_location) &&
|
||||
uvm_processor_mask_test(&va_block->resident, preferred_location)) {
|
||||
uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, preferred_location);
|
||||
uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, preferred_location, NUMA_NO_NODE);
|
||||
bool is_mask_empty = !uvm_page_mask_and(break_read_duplication_pages,
|
||||
&va_block->read_duplicated_pages,
|
||||
resident_mask);
|
||||
@@ -723,7 +725,7 @@ static NV_STATUS va_block_unset_read_duplication_locked(uvm_va_block_t *va_block
|
||||
if (uvm_id_equal(processor_id, preferred_location))
|
||||
continue;
|
||||
|
||||
resident_mask = uvm_va_block_resident_mask_get(va_block, processor_id);
|
||||
resident_mask = uvm_va_block_resident_mask_get(va_block, processor_id, NUMA_NO_NODE);
|
||||
is_mask_empty = !uvm_page_mask_and(break_read_duplication_pages,
|
||||
&va_block->read_duplicated_pages,
|
||||
resident_mask);
|
||||
|
||||
40
kernel-open/nvidia-uvm/uvm_processors.c
Normal file
40
kernel-open/nvidia-uvm/uvm_processors.c
Normal file
@@ -0,0 +1,40 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_processors.h"
|
||||
|
||||
int uvm_find_closest_node_mask(int src, const nodemask_t *mask)
|
||||
{
|
||||
int nid;
|
||||
int closest_nid = NUMA_NO_NODE;
|
||||
|
||||
if (node_isset(src, *mask))
|
||||
return src;
|
||||
|
||||
for_each_set_bit(nid, mask->bits, MAX_NUMNODES) {
|
||||
if (closest_nid == NUMA_NO_NODE || node_distance(src, nid) < node_distance(src, closest_nid))
|
||||
closest_nid = nid;
|
||||
}
|
||||
|
||||
return closest_nid;
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2019 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -26,6 +26,7 @@
|
||||
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_common.h"
|
||||
#include <linux/numa.h>
|
||||
|
||||
#define UVM_MAX_UNIQUE_GPU_PAIRS SUM_FROM_0_TO_N(UVM_MAX_GPUS - 1)
|
||||
|
||||
@@ -37,11 +38,11 @@
|
||||
// provide type safety, they are wrapped within the uvm_processor_id_t struct.
|
||||
// The range of valid identifiers needs to cover the maximum number of
|
||||
// supported GPUs on a system plus the CPU. CPU is assigned value 0, and GPUs
|
||||
// range: [1, UVM_ID_MAX_GPUS].
|
||||
// range: [1, UVM_PARENT_ID_MAX_GPUS].
|
||||
//
|
||||
// There are some functions that only expect GPU identifiers and, in order to
|
||||
// make it clearer, the uvm_gpu_id_t alias type is provided. However, as this
|
||||
// type is just a typedef of uvm_processor_id_t, there is no type checking
|
||||
// make it clearer, the uvm_parent_gpu_id_t alias type is provided. However, as
|
||||
// this type is just a typedef of uvm_processor_id_t, there is no type checking
|
||||
// performed by the compiler.
|
||||
//
|
||||
// Identifier value vs index
|
||||
@@ -60,22 +61,25 @@
|
||||
// the GPU within the GPU id space (basically id - 1).
|
||||
//
|
||||
// In the diagram below, MAX_SUB is used to abbreviate
|
||||
// UVM_ID_MAX_SUB_PROCESSORS.
|
||||
// UVM_PARENT_ID_MAX_SUB_PROCESSORS.
|
||||
//
|
||||
// |-------------------------- uvm_processor_id_t ----------------------|
|
||||
// | |
|
||||
// | |----------------------- uvm_gpu_id_t ------------------------||
|
||||
// | | ||
|
||||
// Proc type | CPU | GPU ... GPU ... GPU ||
|
||||
// | | ||
|
||||
// ID values | 0 | 1 ... i+1 ... UVM_ID_MAX_PROCESSORS-1 ||
|
||||
// TODO: Bug 4195538: uvm_parent_processor_id_t is currently but temporarily the
|
||||
// same as uvm_processor_id_t.
|
||||
//
|
||||
// GPU index 0 ... i ... UVM_ID_MAX_GPUS-1
|
||||
// |-------------------------- uvm_parent_processor_id_t ----------------------|
|
||||
// | |
|
||||
// | |----------------------- uvm_parent_gpu_id_t ------------------------||
|
||||
// | | ||
|
||||
// Proc type | CPU | GPU ... GPU ... GPU ||
|
||||
// | | ||
|
||||
// ID values | 0 | 1 ... i+1 ... UVM_PARENT_ID_MAX_PROCESSORS-1 ||
|
||||
//
|
||||
// GPU index 0 ... i ... UVM_PARENT_ID_MAX_GPUS-1
|
||||
// | | | |
|
||||
// | | | |
|
||||
// | |-------------| | |-----------------------------|
|
||||
// | | | |
|
||||
// | | | |
|
||||
// | |-------------| | |------------------------------------|
|
||||
// | | | |
|
||||
// | | | |
|
||||
// GPU index 0 ... MAX_SUB-1 ... i*MAX_SUB ... (i+1)*MAX_SUB-1 ... UVM_GLOBAL_ID_MAX_GPUS-1
|
||||
//
|
||||
// ID values | 0 | 1 ... MAX_SUB ... (i*MAX_SUB)+1 ... (i+1)*MAX_SUB ... UVM_GLOBAL_ID_MAX_PROCESSORS-1 ||
|
||||
@@ -210,7 +214,7 @@ static proc_id_t prefix_fn_mask##_find_first_id(const mask_t *mask)
|
||||
\
|
||||
static proc_id_t prefix_fn_mask##_find_first_gpu_id(const mask_t *mask) \
|
||||
{ \
|
||||
return proc_id_ctor(find_next_bit(mask->bitmap, (maxval), UVM_ID_GPU0_VALUE)); \
|
||||
return proc_id_ctor(find_next_bit(mask->bitmap, (maxval), UVM_PARENT_ID_GPU0_VALUE)); \
|
||||
} \
|
||||
\
|
||||
static proc_id_t prefix_fn_mask##_find_next_id(const mask_t *mask, proc_id_t min_id) \
|
||||
@@ -252,7 +256,7 @@ static NvU32 prefix_fn_mask##_get_gpu_count(const mask_t *mask)
|
||||
{ \
|
||||
NvU32 gpu_count = prefix_fn_mask##_get_count(mask); \
|
||||
\
|
||||
if (prefix_fn_mask##_test(mask, proc_id_ctor(UVM_ID_CPU_VALUE))) \
|
||||
if (prefix_fn_mask##_test(mask, proc_id_ctor(UVM_PARENT_ID_CPU_VALUE))) \
|
||||
--gpu_count; \
|
||||
\
|
||||
return gpu_count; \
|
||||
@@ -261,55 +265,55 @@ static NvU32 prefix_fn_mask##_get_gpu_count(const mask_t *mask)
|
||||
typedef struct
|
||||
{
|
||||
NvU32 val;
|
||||
} uvm_processor_id_t;
|
||||
} uvm_parent_processor_id_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 val;
|
||||
} uvm_global_processor_id_t;
|
||||
|
||||
typedef uvm_processor_id_t uvm_gpu_id_t;
|
||||
typedef uvm_parent_processor_id_t uvm_parent_gpu_id_t;
|
||||
typedef uvm_global_processor_id_t uvm_global_gpu_id_t;
|
||||
|
||||
// Static value assigned to the CPU
|
||||
#define UVM_ID_CPU_VALUE 0
|
||||
#define UVM_ID_GPU0_VALUE (UVM_ID_CPU_VALUE + 1)
|
||||
#define UVM_PARENT_ID_CPU_VALUE 0
|
||||
#define UVM_PARENT_ID_GPU0_VALUE (UVM_PARENT_ID_CPU_VALUE + 1)
|
||||
|
||||
// ID values for the CPU and first GPU, respectively; the values for both types
|
||||
// of IDs must match to enable sharing of UVM_PROCESSOR_MASK().
|
||||
#define UVM_GLOBAL_ID_CPU_VALUE UVM_ID_CPU_VALUE
|
||||
#define UVM_GLOBAL_ID_GPU0_VALUE UVM_ID_GPU0_VALUE
|
||||
#define UVM_GLOBAL_ID_CPU_VALUE UVM_PARENT_ID_CPU_VALUE
|
||||
#define UVM_GLOBAL_ID_GPU0_VALUE UVM_PARENT_ID_GPU0_VALUE
|
||||
|
||||
// Maximum number of GPUs/processors that can be represented with the id types
|
||||
#define UVM_ID_MAX_GPUS UVM_MAX_GPUS
|
||||
#define UVM_ID_MAX_PROCESSORS UVM_MAX_PROCESSORS
|
||||
#define UVM_PARENT_ID_MAX_GPUS UVM_MAX_GPUS
|
||||
#define UVM_PARENT_ID_MAX_PROCESSORS UVM_MAX_PROCESSORS
|
||||
|
||||
#define UVM_ID_MAX_SUB_PROCESSORS 8
|
||||
#define UVM_PARENT_ID_MAX_SUB_PROCESSORS 8
|
||||
|
||||
#define UVM_GLOBAL_ID_MAX_GPUS (UVM_MAX_GPUS * UVM_ID_MAX_SUB_PROCESSORS)
|
||||
#define UVM_GLOBAL_ID_MAX_GPUS (UVM_PARENT_ID_MAX_GPUS * UVM_PARENT_ID_MAX_SUB_PROCESSORS)
|
||||
#define UVM_GLOBAL_ID_MAX_PROCESSORS (UVM_GLOBAL_ID_MAX_GPUS + 1)
|
||||
|
||||
#define UVM_ID_CPU ((uvm_processor_id_t) { .val = UVM_ID_CPU_VALUE })
|
||||
#define UVM_ID_INVALID ((uvm_processor_id_t) { .val = UVM_ID_MAX_PROCESSORS })
|
||||
#define UVM_PARENT_ID_CPU ((uvm_parent_processor_id_t) { .val = UVM_PARENT_ID_CPU_VALUE })
|
||||
#define UVM_PARENT_ID_INVALID ((uvm_parent_processor_id_t) { .val = UVM_PARENT_ID_MAX_PROCESSORS })
|
||||
#define UVM_GLOBAL_ID_CPU ((uvm_global_processor_id_t) { .val = UVM_GLOBAL_ID_CPU_VALUE })
|
||||
#define UVM_GLOBAL_ID_INVALID ((uvm_global_processor_id_t) { .val = UVM_GLOBAL_ID_MAX_PROCESSORS })
|
||||
|
||||
#define UVM_ID_CHECK_BOUNDS(id) UVM_ASSERT_MSG(id.val <= UVM_ID_MAX_PROCESSORS, "id %u\n", id.val)
|
||||
#define UVM_PARENT_ID_CHECK_BOUNDS(id) UVM_ASSERT_MSG(id.val <= UVM_PARENT_ID_MAX_PROCESSORS, "id %u\n", id.val)
|
||||
|
||||
#define UVM_GLOBAL_ID_CHECK_BOUNDS(id) UVM_ASSERT_MSG(id.val <= UVM_GLOBAL_ID_MAX_PROCESSORS, "id %u\n", id.val)
|
||||
|
||||
static int uvm_id_cmp(uvm_processor_id_t id1, uvm_processor_id_t id2)
|
||||
static int uvm_parent_id_cmp(uvm_parent_processor_id_t id1, uvm_parent_processor_id_t id2)
|
||||
{
|
||||
UVM_ID_CHECK_BOUNDS(id1);
|
||||
UVM_ID_CHECK_BOUNDS(id2);
|
||||
UVM_PARENT_ID_CHECK_BOUNDS(id1);
|
||||
UVM_PARENT_ID_CHECK_BOUNDS(id2);
|
||||
|
||||
return UVM_CMP_DEFAULT(id1.val, id2.val);
|
||||
}
|
||||
|
||||
static bool uvm_id_equal(uvm_processor_id_t id1, uvm_processor_id_t id2)
|
||||
static bool uvm_parent_id_equal(uvm_parent_processor_id_t id1, uvm_parent_processor_id_t id2)
|
||||
{
|
||||
UVM_ID_CHECK_BOUNDS(id1);
|
||||
UVM_ID_CHECK_BOUNDS(id2);
|
||||
UVM_PARENT_ID_CHECK_BOUNDS(id1);
|
||||
UVM_PARENT_ID_CHECK_BOUNDS(id2);
|
||||
|
||||
return id1.val == id2.val;
|
||||
}
|
||||
@@ -330,30 +334,30 @@ static bool uvm_global_id_equal(uvm_global_processor_id_t id1, uvm_global_proces
|
||||
return id1.val == id2.val;
|
||||
}
|
||||
|
||||
#define UVM_ID_IS_CPU(id) uvm_id_equal(id, UVM_ID_CPU)
|
||||
#define UVM_ID_IS_INVALID(id) uvm_id_equal(id, UVM_ID_INVALID)
|
||||
#define UVM_ID_IS_VALID(id) (!UVM_ID_IS_INVALID(id))
|
||||
#define UVM_ID_IS_GPU(id) (!UVM_ID_IS_CPU(id) && !UVM_ID_IS_INVALID(id))
|
||||
#define UVM_PARENT_ID_IS_CPU(id) uvm_parent_id_equal(id, UVM_PARENT_ID_CPU)
|
||||
#define UVM_PARENT_ID_IS_INVALID(id) uvm_parent_id_equal(id, UVM_PARENT_ID_INVALID)
|
||||
#define UVM_PARENT_ID_IS_VALID(id) (!UVM_PARENT_ID_IS_INVALID(id))
|
||||
#define UVM_PARENT_ID_IS_GPU(id) (!UVM_PARENT_ID_IS_CPU(id) && !UVM_PARENT_ID_IS_INVALID(id))
|
||||
|
||||
#define UVM_GLOBAL_ID_IS_CPU(id) uvm_global_id_equal(id, UVM_GLOBAL_ID_CPU)
|
||||
#define UVM_GLOBAL_ID_IS_INVALID(id) uvm_global_id_equal(id, UVM_GLOBAL_ID_INVALID)
|
||||
#define UVM_GLOBAL_ID_IS_VALID(id) (!UVM_GLOBAL_ID_IS_INVALID(id))
|
||||
#define UVM_GLOBAL_ID_IS_GPU(id) (!UVM_GLOBAL_ID_IS_CPU(id) && !UVM_GLOBAL_ID_IS_INVALID(id))
|
||||
|
||||
static uvm_processor_id_t uvm_id_from_value(NvU32 val)
|
||||
static uvm_parent_processor_id_t uvm_parent_id_from_value(NvU32 val)
|
||||
{
|
||||
uvm_processor_id_t ret = { .val = val };
|
||||
uvm_parent_processor_id_t ret = { .val = val };
|
||||
|
||||
UVM_ID_CHECK_BOUNDS(ret);
|
||||
UVM_PARENT_ID_CHECK_BOUNDS(ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static uvm_gpu_id_t uvm_gpu_id_from_value(NvU32 val)
|
||||
static uvm_parent_gpu_id_t uvm_parent_gpu_id_from_value(NvU32 val)
|
||||
{
|
||||
uvm_gpu_id_t ret = uvm_id_from_value(val);
|
||||
uvm_parent_gpu_id_t ret = uvm_parent_id_from_value(val);
|
||||
|
||||
UVM_ASSERT(!UVM_ID_IS_CPU(ret));
|
||||
UVM_ASSERT(!UVM_PARENT_ID_IS_CPU(ret));
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -376,34 +380,34 @@ static uvm_global_gpu_id_t uvm_global_gpu_id_from_value(NvU32 val)
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Create a GPU id from the given GPU id index (previously obtained via
|
||||
// uvm_id_gpu_index)
|
||||
static uvm_gpu_id_t uvm_gpu_id_from_index(NvU32 index)
|
||||
// Create a parent GPU id from the given parent GPU id index (previously
|
||||
// obtained via uvm_parent_id_gpu_index)
|
||||
static uvm_parent_gpu_id_t uvm_parent_gpu_id_from_index(NvU32 index)
|
||||
{
|
||||
return uvm_gpu_id_from_value(index + UVM_ID_GPU0_VALUE);
|
||||
return uvm_parent_gpu_id_from_value(index + UVM_PARENT_ID_GPU0_VALUE);
|
||||
}
|
||||
|
||||
static uvm_processor_id_t uvm_id_next(uvm_processor_id_t id)
|
||||
static uvm_parent_processor_id_t uvm_parent_id_next(uvm_parent_processor_id_t id)
|
||||
{
|
||||
++id.val;
|
||||
|
||||
UVM_ID_CHECK_BOUNDS(id);
|
||||
UVM_PARENT_ID_CHECK_BOUNDS(id);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
static uvm_gpu_id_t uvm_gpu_id_next(uvm_gpu_id_t id)
|
||||
static uvm_parent_gpu_id_t uvm_parent_gpu_id_next(uvm_parent_gpu_id_t id)
|
||||
{
|
||||
UVM_ASSERT(UVM_ID_IS_GPU(id));
|
||||
UVM_ASSERT(UVM_PARENT_ID_IS_GPU(id));
|
||||
|
||||
++id.val;
|
||||
|
||||
UVM_ID_CHECK_BOUNDS(id);
|
||||
UVM_PARENT_ID_CHECK_BOUNDS(id);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
// Same as uvm_gpu_id_from_index but for uvm_global_processor_id_t
|
||||
// Same as uvm_parent_gpu_id_from_index but for uvm_global_processor_id_t
|
||||
static uvm_global_gpu_id_t uvm_global_gpu_id_from_index(NvU32 index)
|
||||
{
|
||||
return uvm_global_gpu_id_from_value(index + UVM_GLOBAL_ID_GPU0_VALUE);
|
||||
@@ -429,11 +433,11 @@ static uvm_global_gpu_id_t uvm_global_gpu_id_next(uvm_global_gpu_id_t id)
|
||||
return id;
|
||||
}
|
||||
|
||||
// This function returns the numerical value within [0, UVM_ID_MAX_PROCESSORS)
|
||||
// of the given processor id
|
||||
static NvU32 uvm_id_value(uvm_processor_id_t id)
|
||||
// This function returns the numerical value within
|
||||
// [0, UVM_PARENT_ID_MAX_PROCESSORS) of the given parent processor id.
|
||||
static NvU32 uvm_parent_id_value(uvm_parent_processor_id_t id)
|
||||
{
|
||||
UVM_ASSERT(UVM_ID_IS_VALID(id));
|
||||
UVM_ASSERT(UVM_PARENT_ID_IS_VALID(id));
|
||||
|
||||
return id.val;
|
||||
}
|
||||
@@ -448,12 +452,12 @@ static NvU32 uvm_global_id_value(uvm_global_processor_id_t id)
|
||||
}
|
||||
|
||||
// This function returns the index of the given GPU id within the GPU id space
|
||||
// [0, UVM_ID_MAX_GPUS)
|
||||
static NvU32 uvm_id_gpu_index(uvm_gpu_id_t id)
|
||||
// [0, UVM_PARENT_ID_MAX_GPUS)
|
||||
static NvU32 uvm_parent_id_gpu_index(uvm_parent_gpu_id_t id)
|
||||
{
|
||||
UVM_ASSERT(UVM_ID_IS_GPU(id));
|
||||
UVM_ASSERT(UVM_PARENT_ID_IS_GPU(id));
|
||||
|
||||
return id.val - UVM_ID_GPU0_VALUE;
|
||||
return id.val - UVM_PARENT_ID_GPU0_VALUE;
|
||||
}
|
||||
|
||||
// This function returns the index of the given GPU id within the GPU id space
|
||||
@@ -465,61 +469,61 @@ static NvU32 uvm_global_id_gpu_index(const uvm_global_gpu_id_t id)
|
||||
return id.val - UVM_GLOBAL_ID_GPU0_VALUE;
|
||||
}
|
||||
|
||||
static NvU32 uvm_global_id_gpu_index_from_gpu_id(const uvm_gpu_id_t id)
|
||||
static NvU32 uvm_global_id_gpu_index_from_parent_gpu_id(const uvm_parent_gpu_id_t id)
|
||||
{
|
||||
UVM_ASSERT(UVM_ID_IS_GPU(id));
|
||||
UVM_ASSERT(UVM_PARENT_ID_IS_GPU(id));
|
||||
|
||||
return uvm_id_gpu_index(id) * UVM_ID_MAX_SUB_PROCESSORS;
|
||||
return uvm_parent_id_gpu_index(id) * UVM_PARENT_ID_MAX_SUB_PROCESSORS;
|
||||
}
|
||||
|
||||
static NvU32 uvm_id_gpu_index_from_global_gpu_id(const uvm_global_gpu_id_t id)
|
||||
static NvU32 uvm_parent_id_gpu_index_from_global_gpu_id(const uvm_global_gpu_id_t id)
|
||||
{
|
||||
UVM_ASSERT(UVM_GLOBAL_ID_IS_GPU(id));
|
||||
|
||||
return uvm_global_id_gpu_index(id) / UVM_ID_MAX_SUB_PROCESSORS;
|
||||
return uvm_global_id_gpu_index(id) / UVM_PARENT_ID_MAX_SUB_PROCESSORS;
|
||||
}
|
||||
|
||||
static uvm_global_gpu_id_t uvm_global_gpu_id_from_gpu_id(const uvm_gpu_id_t id)
|
||||
static uvm_global_gpu_id_t uvm_global_gpu_id_from_parent_gpu_id(const uvm_parent_gpu_id_t id)
|
||||
{
|
||||
UVM_ASSERT(UVM_ID_IS_GPU(id));
|
||||
UVM_ASSERT(UVM_PARENT_ID_IS_GPU(id));
|
||||
|
||||
return uvm_global_gpu_id_from_index(uvm_global_id_gpu_index_from_gpu_id(id));
|
||||
return uvm_global_gpu_id_from_index(uvm_global_id_gpu_index_from_parent_gpu_id(id));
|
||||
}
|
||||
|
||||
static uvm_global_gpu_id_t uvm_global_gpu_id_from_parent_index(NvU32 index)
|
||||
{
|
||||
UVM_ASSERT(index < UVM_MAX_GPUS);
|
||||
UVM_ASSERT(index < UVM_PARENT_ID_MAX_GPUS);
|
||||
|
||||
return uvm_global_gpu_id_from_gpu_id(uvm_gpu_id_from_value(index + UVM_GLOBAL_ID_GPU0_VALUE));
|
||||
return uvm_global_gpu_id_from_parent_gpu_id(uvm_parent_gpu_id_from_value(index + UVM_GLOBAL_ID_GPU0_VALUE));
|
||||
}
|
||||
|
||||
static uvm_global_gpu_id_t uvm_global_gpu_id_from_sub_processor_index(const uvm_gpu_id_t id, NvU32 sub_index)
|
||||
static uvm_global_gpu_id_t uvm_global_gpu_id_from_sub_processor_index(const uvm_parent_gpu_id_t id, NvU32 sub_index)
|
||||
{
|
||||
NvU32 index;
|
||||
|
||||
UVM_ASSERT(sub_index < UVM_ID_MAX_SUB_PROCESSORS);
|
||||
UVM_ASSERT(sub_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
|
||||
index = uvm_global_id_gpu_index_from_gpu_id(id) + sub_index;
|
||||
index = uvm_global_id_gpu_index_from_parent_gpu_id(id) + sub_index;
|
||||
return uvm_global_gpu_id_from_index(index);
|
||||
}
|
||||
|
||||
static uvm_gpu_id_t uvm_gpu_id_from_global_gpu_id(const uvm_global_gpu_id_t id)
|
||||
static uvm_parent_gpu_id_t uvm_parent_gpu_id_from_global_gpu_id(const uvm_global_gpu_id_t id)
|
||||
{
|
||||
UVM_ASSERT(UVM_GLOBAL_ID_IS_GPU(id));
|
||||
|
||||
return uvm_gpu_id_from_index(uvm_id_gpu_index_from_global_gpu_id(id));
|
||||
return uvm_parent_gpu_id_from_index(uvm_parent_id_gpu_index_from_global_gpu_id(id));
|
||||
}
|
||||
|
||||
static NvU32 uvm_global_id_sub_processor_index(const uvm_global_gpu_id_t id)
|
||||
{
|
||||
return uvm_global_id_gpu_index(id) % UVM_ID_MAX_SUB_PROCESSORS;
|
||||
return uvm_global_id_gpu_index(id) % UVM_PARENT_ID_MAX_SUB_PROCESSORS;
|
||||
}
|
||||
|
||||
UVM_PROCESSOR_MASK(uvm_processor_mask_t, \
|
||||
uvm_processor_mask, \
|
||||
UVM_ID_MAX_PROCESSORS, \
|
||||
uvm_processor_id_t, \
|
||||
uvm_id_from_value)
|
||||
UVM_PARENT_ID_MAX_PROCESSORS, \
|
||||
uvm_parent_processor_id_t, \
|
||||
uvm_parent_id_from_value)
|
||||
|
||||
UVM_PROCESSOR_MASK(uvm_global_processor_mask_t, \
|
||||
uvm_global_processor_mask, \
|
||||
@@ -533,19 +537,19 @@ static bool uvm_processor_mask_gpu_subset(const uvm_processor_mask_t *subset, co
|
||||
{
|
||||
uvm_processor_mask_t subset_gpus;
|
||||
uvm_processor_mask_copy(&subset_gpus, subset);
|
||||
uvm_processor_mask_clear(&subset_gpus, UVM_ID_CPU);
|
||||
uvm_processor_mask_clear(&subset_gpus, UVM_PARENT_ID_CPU);
|
||||
return uvm_processor_mask_subset(&subset_gpus, mask);
|
||||
}
|
||||
|
||||
#define for_each_id_in_mask(id, mask) \
|
||||
for ((id) = uvm_processor_mask_find_first_id(mask); \
|
||||
UVM_ID_IS_VALID(id); \
|
||||
(id) = uvm_processor_mask_find_next_id((mask), uvm_id_next(id)))
|
||||
UVM_PARENT_ID_IS_VALID(id); \
|
||||
(id) = uvm_processor_mask_find_next_id((mask), uvm_parent_id_next(id)))
|
||||
|
||||
#define for_each_gpu_id_in_mask(gpu_id, mask) \
|
||||
for ((gpu_id) = uvm_processor_mask_find_first_gpu_id((mask)); \
|
||||
UVM_ID_IS_VALID(gpu_id); \
|
||||
(gpu_id) = uvm_processor_mask_find_next_id((mask), uvm_gpu_id_next(gpu_id)))
|
||||
UVM_PARENT_ID_IS_VALID(gpu_id); \
|
||||
(gpu_id) = uvm_processor_mask_find_next_id((mask), uvm_parent_gpu_id_next(gpu_id)))
|
||||
|
||||
#define for_each_global_id_in_mask(id, mask) \
|
||||
for ((id) = uvm_global_processor_mask_find_first_id(mask); \
|
||||
@@ -559,21 +563,36 @@ static bool uvm_processor_mask_gpu_subset(const uvm_processor_mask_t *subset, co
|
||||
|
||||
// Helper to iterate over all valid gpu ids
|
||||
#define for_each_gpu_id(i) \
|
||||
for (i = uvm_gpu_id_from_value(UVM_ID_GPU0_VALUE); UVM_ID_IS_VALID(i); i = uvm_gpu_id_next(i))
|
||||
for (i = uvm_parent_gpu_id_from_value(UVM_PARENT_ID_GPU0_VALUE); UVM_PARENT_ID_IS_VALID(i); i = uvm_parent_gpu_id_next(i))
|
||||
#define for_each_global_gpu_id(i) \
|
||||
for (i = uvm_global_gpu_id_from_value(UVM_GLOBAL_ID_GPU0_VALUE); UVM_GLOBAL_ID_IS_VALID(i); i = uvm_global_gpu_id_next(i))
|
||||
|
||||
#define for_each_global_sub_processor_id_in_gpu(id, i) \
|
||||
for (i = uvm_global_gpu_id_from_gpu_id(id); \
|
||||
for (i = uvm_global_gpu_id_from_parent_gpu_id(id); \
|
||||
UVM_GLOBAL_ID_IS_VALID(i) && \
|
||||
(uvm_global_id_value(i) < uvm_global_id_value(uvm_global_gpu_id_from_gpu_id(id)) + UVM_ID_MAX_SUB_PROCESSORS); \
|
||||
(uvm_global_id_value(i) < uvm_global_id_value(uvm_global_gpu_id_from_parent_gpu_id(id)) + UVM_PARENT_ID_MAX_SUB_PROCESSORS); \
|
||||
i = uvm_global_gpu_id_next(i))
|
||||
|
||||
// Helper to iterate over all valid gpu ids
|
||||
#define for_each_processor_id(i) for (i = UVM_ID_CPU; UVM_ID_IS_VALID(i); i = uvm_id_next(i))
|
||||
#define for_each_processor_id(i) for (i = UVM_PARENT_ID_CPU; UVM_PARENT_ID_IS_VALID(i); i = uvm_parent_id_next(i))
|
||||
|
||||
#define for_each_global_id(i) for (i = UVM_GLOBAL_ID_CPU; UVM_GLOBAL_ID_IS_VALID(i); i = uvm_global_id_next(i))
|
||||
|
||||
// Find the node in mask with the shorted distance (as returned by
|
||||
// node_distance) for src.
|
||||
// Note that the search is inclusive of src.
|
||||
// If mask has no bits set, NUMA_NO_NODE is returned.
|
||||
int uvm_find_closest_node_mask(int src, const nodemask_t *mask);
|
||||
|
||||
// Iterate over all nodes in mask with increasing distance from src.
|
||||
// Note that this iterator is destructive of the mask.
|
||||
#define for_each_closest_uvm_node(nid, src, mask) \
|
||||
for ((nid) = uvm_find_closest_node_mask((src), &(mask)); \
|
||||
(nid) != NUMA_NO_NODE; \
|
||||
node_clear((nid), (mask)), (nid) = uvm_find_closest_node_mask((src), &(mask)))
|
||||
|
||||
#define for_each_possible_uvm_node(nid) for_each_node_mask((nid), node_possible_map)
|
||||
|
||||
static bool uvm_processor_uuid_eq(const NvProcessorUuid *uuid1, const NvProcessorUuid *uuid2)
|
||||
{
|
||||
return memcmp(uuid1, uuid2, sizeof(*uuid1)) == 0;
|
||||
@@ -585,4 +604,78 @@ static void uvm_processor_uuid_copy(NvProcessorUuid *dst, const NvProcessorUuid
|
||||
memcpy(dst, src, sizeof(*dst));
|
||||
}
|
||||
|
||||
// TODO: Bug 4195538: [uvm][multi-SMC] Get UVM internal data structures ready to
|
||||
// meet multi-SMC requirements. Temporary aliases, they must be removed once
|
||||
// the data structures are converted.
|
||||
typedef uvm_parent_processor_id_t uvm_processor_id_t;
|
||||
typedef uvm_parent_gpu_id_t uvm_gpu_id_t;
|
||||
|
||||
#define UVM_ID_CPU_VALUE UVM_PARENT_ID_CPU_VALUE
|
||||
#define UVM_ID_GPU0_VALUE UVM_PARENT_ID_GPU0_VALUE
|
||||
#define UVM_ID_MAX_GPUS UVM_PARENT_ID_MAX_GPUS
|
||||
#define UVM_ID_MAX_PROCESSORS UVM_PARENT_ID_MAX_PROCESSORS
|
||||
#define UVM_ID_MAX_SUB_PROCESSORS UVM_PARENT_ID_MAX_SUB_PROCESSORS
|
||||
#define UVM_ID_CPU UVM_PARENT_ID_CPU
|
||||
#define UVM_ID_INVALID UVM_PARENT_ID_INVALID
|
||||
|
||||
static int uvm_id_cmp(uvm_parent_processor_id_t id1, uvm_parent_processor_id_t id2)
|
||||
{
|
||||
return UVM_CMP_DEFAULT(id1.val, id2.val);
|
||||
}
|
||||
|
||||
static bool uvm_id_equal(uvm_parent_processor_id_t id1, uvm_parent_processor_id_t id2)
|
||||
{
|
||||
return uvm_parent_id_equal(id1, id2);
|
||||
}
|
||||
|
||||
#define UVM_ID_IS_CPU(id) uvm_id_equal(id, UVM_ID_CPU)
|
||||
#define UVM_ID_IS_INVALID(id) uvm_id_equal(id, UVM_ID_INVALID)
|
||||
#define UVM_ID_IS_VALID(id) (!UVM_ID_IS_INVALID(id))
|
||||
#define UVM_ID_IS_GPU(id) (!UVM_ID_IS_CPU(id) && !UVM_ID_IS_INVALID(id))
|
||||
|
||||
static uvm_parent_gpu_id_t uvm_gpu_id_from_value(NvU32 val)
|
||||
{
|
||||
return uvm_parent_gpu_id_from_value(val);
|
||||
}
|
||||
|
||||
static NvU32 uvm_id_value(uvm_parent_processor_id_t id)
|
||||
{
|
||||
return uvm_parent_id_value(id);
|
||||
}
|
||||
|
||||
static NvU32 uvm_id_gpu_index(uvm_parent_gpu_id_t id)
|
||||
{
|
||||
return uvm_parent_id_gpu_index(id);
|
||||
}
|
||||
|
||||
static NvU32 uvm_id_gpu_index_from_global_gpu_id(const uvm_global_gpu_id_t id)
|
||||
{
|
||||
return uvm_parent_id_gpu_index_from_global_gpu_id(id);
|
||||
}
|
||||
|
||||
static uvm_parent_gpu_id_t uvm_gpu_id_from_index(NvU32 index)
|
||||
{
|
||||
return uvm_parent_gpu_id_from_index(index);
|
||||
}
|
||||
|
||||
static uvm_parent_gpu_id_t uvm_gpu_id_next(uvm_parent_gpu_id_t id)
|
||||
{
|
||||
return uvm_parent_gpu_id_next(id);
|
||||
}
|
||||
|
||||
static uvm_parent_gpu_id_t uvm_gpu_id_from_global_gpu_id(const uvm_global_gpu_id_t id)
|
||||
{
|
||||
return uvm_parent_gpu_id_from_global_gpu_id(id);
|
||||
}
|
||||
|
||||
static NvU32 uvm_global_id_gpu_index_from_gpu_id(const uvm_parent_gpu_id_t id)
|
||||
{
|
||||
return uvm_global_id_gpu_index_from_parent_gpu_id(id);
|
||||
}
|
||||
|
||||
static uvm_global_gpu_id_t uvm_global_gpu_id_from_gpu_id(const uvm_parent_gpu_id_t id)
|
||||
{
|
||||
return uvm_global_gpu_id_from_parent_gpu_id(id);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -106,26 +106,6 @@ static NV_STATUS uvm_test_nv_kthread_q(UVM_TEST_NV_KTHREAD_Q_PARAMS *params, str
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_test_numa_get_closest_cpu_node_to_gpu(UVM_TEST_NUMA_GET_CLOSEST_CPU_NODE_TO_GPU_PARAMS *params,
|
||||
struct file *filp)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
NV_STATUS status;
|
||||
uvm_rm_user_object_t user_rm_va_space = {
|
||||
.rm_control_fd = -1,
|
||||
.user_client = params->client,
|
||||
.user_object = params->smc_part_ref
|
||||
};
|
||||
|
||||
status = uvm_gpu_retain_by_uuid(¶ms->gpu_uuid, &user_rm_va_space, &gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
params->node_id = gpu->parent->closest_cpu_numa_node;
|
||||
uvm_gpu_release(gpu);
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Callers of this function should ensure that node is not NUMA_NO_NODE in order
|
||||
// to avoid overrunning the kernel's node to cpumask map.
|
||||
static NV_STATUS uvm_test_verify_bh_affinity(uvm_intr_handler_t *isr, int node)
|
||||
@@ -307,8 +287,6 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_DRAIN_REPLAYABLE_FAULTS, uvm_test_drain_replayable_faults);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PMA_GET_BATCH_SIZE, uvm_test_pma_get_batch_size);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PMM_QUERY_PMA_STATS, uvm_test_pmm_query_pma_stats);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_NUMA_GET_CLOSEST_CPU_NODE_TO_GPU,
|
||||
uvm_test_numa_get_closest_cpu_node_to_gpu);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_NUMA_CHECK_AFFINITY, uvm_test_numa_check_affinity);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_VA_SPACE_ADD_DUMMY_THREAD_CONTEXTS,
|
||||
uvm_test_va_space_add_dummy_thread_contexts);
|
||||
|
||||
@@ -561,6 +561,22 @@ typedef struct
|
||||
// user_pages_allocation_retry_force_count, but the injection point simulates
|
||||
// driver metadata allocation failure.
|
||||
//
|
||||
// cpu_chunk_allocation_target_id and cpu_chunk_allocation_actual_id are used
|
||||
// to control the NUMA node IDs for CPU chunk allocations, specifically for
|
||||
// testing overlapping CPU chunk allocations.
|
||||
//
|
||||
// Currently, uvm_api_migrate() does not pass the preferred CPU NUMA node to for
|
||||
// managed memory so it is not possible to request a specific node.
|
||||
// cpu_chunk_allocation_target_id is used to request the allocation be made on
|
||||
// specific node. On the other hand, cpu_chunk_allocation_actual_id is the node
|
||||
// on which the allocation will actually be made.
|
||||
//
|
||||
// The two parameters can be used to force a CPU chunk allocation to overlap a
|
||||
// previously allocated chunk.
|
||||
//
|
||||
// Please note that even when specifying cpu_cpu_allocation_actual_id, the
|
||||
// kernel may end up allocating on a different node.
|
||||
//
|
||||
// Error returns:
|
||||
// NV_ERR_INVALID_ADDRESS
|
||||
// - lookup_address doesn't match a UVM range
|
||||
@@ -571,6 +587,8 @@ typedef struct
|
||||
NvU32 page_table_allocation_retry_force_count; // In
|
||||
NvU32 user_pages_allocation_retry_force_count; // In
|
||||
NvU32 cpu_chunk_allocation_size_mask; // In
|
||||
NvS32 cpu_chunk_allocation_target_id; // In
|
||||
NvS32 cpu_chunk_allocation_actual_id; // In
|
||||
NvU32 cpu_pages_allocation_error_count; // In
|
||||
NvBool eviction_error; // In
|
||||
NvBool populate_error; // In
|
||||
@@ -604,6 +622,10 @@ typedef struct
|
||||
NvProcessorUuid resident_on[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 resident_on_count; // Out
|
||||
|
||||
// If the memory is resident on the CPU, the NUMA node on which the page
|
||||
// is resident. Otherwise, -1.
|
||||
NvS32 resident_nid; // Out
|
||||
|
||||
// The size of the physical allocation backing lookup_address. Only the
|
||||
// system-page-sized portion of this allocation which contains
|
||||
// lookup_address is guaranteed to be resident on the corresponding
|
||||
@@ -1168,19 +1190,6 @@ typedef struct
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_PMM_QUERY_PMA_STATS_PARAMS;
|
||||
|
||||
#define UVM_TEST_NUMA_GET_CLOSEST_CPU_NODE_TO_GPU UVM_TEST_IOCTL_BASE(77)
|
||||
typedef struct
|
||||
{
|
||||
NvProcessorUuid gpu_uuid; // In
|
||||
NvHandle client; // In
|
||||
NvHandle smc_part_ref; // In
|
||||
|
||||
// On kernels with NUMA support, this entry contains the closest CPU NUMA
|
||||
// node to this GPU. Otherwise, the value will be -1.
|
||||
NvS32 node_id; // Out
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_NUMA_GET_CLOSEST_CPU_NODE_TO_GPU_PARAMS;
|
||||
|
||||
// Test whether the bottom halves have run on the correct CPUs based on the
|
||||
// NUMA node locality of the GPU.
|
||||
//
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -44,6 +44,7 @@
|
||||
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/nodemask.h>
|
||||
|
||||
// VA blocks are the leaf nodes in the uvm_va_space tree for managed allocations
|
||||
// (VA ranges with type == UVM_VA_RANGE_TYPE_MANAGED):
|
||||
@@ -229,6 +230,42 @@ typedef struct
|
||||
|
||||
} uvm_va_block_gpu_state_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Per-page residency bit vector, used for fast traversal of resident
|
||||
// pages.
|
||||
//
|
||||
// A set bit means the CPU has a coherent copy of the physical page
|
||||
// resident in the NUMA node's memory, and that a CPU chunk for the
|
||||
// corresponding page index has been allocated. This does not mean that
|
||||
// the coherent copy is currently mapped anywhere, however. A page may be
|
||||
// resident on multiple processors (but not multiple CPU NUMA nodes) when in
|
||||
// read-duplicate mode.
|
||||
//
|
||||
// A cleared bit means the CPU NUMA node does not have a coherent copy of
|
||||
// that page resident. A CPU chunk for the corresponding page index may or
|
||||
// may not have been allocated. If the chunk is present, it's a cached chunk
|
||||
// which can be reused in the future.
|
||||
//
|
||||
// Allocating PAGES_PER_UVM_VA_BLOCK is overkill when the block is
|
||||
// smaller than UVM_VA_BLOCK_SIZE, but it's not much extra memory
|
||||
// overhead on the whole.
|
||||
uvm_page_mask_t resident;
|
||||
|
||||
// Per-page allocation bit vector.
|
||||
//
|
||||
// A set bit means that a CPU chunk has been allocated for the
|
||||
// corresponding page index on this NUMA node.
|
||||
uvm_page_mask_t allocated;
|
||||
|
||||
// CPU memory chunks represent physically contiguous CPU memory
|
||||
// allocations. See uvm_pmm_sysmem.h for more details on CPU chunks.
|
||||
// This member is meant to hold an opaque value indicating the CPU
|
||||
// chunk storage method. For more details on CPU chunk storage,
|
||||
// see uvm_cpu_chunk_storage_type_t in uvm_va_block.c.
|
||||
unsigned long chunks;
|
||||
} uvm_va_block_cpu_node_state_t;
|
||||
|
||||
// TODO: Bug 1766180: Worst-case we could have one of these per system page.
|
||||
// Options:
|
||||
// 1) Rely on the OOM killer to prevent the user from trying to do that
|
||||
@@ -306,38 +343,30 @@ struct uvm_va_block_struct
|
||||
|
||||
struct
|
||||
{
|
||||
// Per-page residency bit vector, used for fast traversal of resident
|
||||
// pages.
|
||||
//
|
||||
// A set bit means the CPU has a coherent copy of the physical page
|
||||
// resident in its memory, and that the corresponding entry in the pages
|
||||
// array is present. This does not mean that the coherent copy is
|
||||
// currently mapped anywhere, however. A page may be resident on
|
||||
// multiple processors when in read-duplicate mode.
|
||||
//
|
||||
// A cleared bit means the CPU does not have a coherent copy of that
|
||||
// page resident. The corresponding entry in the pages array may or may
|
||||
// not present. If the entry is present, it's a cached page which can be
|
||||
// reused in the future.
|
||||
//
|
||||
// Allocating PAGES_PER_UVM_VA_BLOCK is overkill when the block is
|
||||
// smaller than UVM_VA_BLOCK_SIZE, but it's not much extra memory
|
||||
// overhead on the whole.
|
||||
uvm_page_mask_t resident;
|
||||
|
||||
// CPU memory chunks represent physically contiguous CPU memory
|
||||
// allocations. See uvm_pmm_sysmem.h for more details on CPU chunks.
|
||||
// This member is meant to hold an opaque value indicating the CPU
|
||||
// chunk storage method. For more details on CPU chunk storage,
|
||||
// see uvm_cpu_chunk_storage_type_t in uvm_va_block.c.
|
||||
unsigned long chunks;
|
||||
// Per-NUMA node tracking of CPU allocations.
|
||||
// This is a dense array with one entry per possible NUMA node.
|
||||
uvm_va_block_cpu_node_state_t **node_state;
|
||||
|
||||
// Per-page allocation bit vector.
|
||||
//
|
||||
// A set bit means that a CPU page has been allocated for the
|
||||
// corresponding page index.
|
||||
// corresponding page index on at least one CPU NUMA node.
|
||||
uvm_page_mask_t allocated;
|
||||
|
||||
// Per-page residency bit vector. See
|
||||
// uvm_va_block_cpu_numa_state_t::resident for a detailed description.
|
||||
// This mask is a cumulative mask (logical OR) of all
|
||||
// uvm_va_block_cpu_node_state_t::resident masks. It is meant to be used
|
||||
// only for fast testing of page residency when it matters only if the
|
||||
// page is resident on the CPU.
|
||||
//
|
||||
// Note that this mask cannot be set directly as this will cause
|
||||
// inconsistencies between this mask and the per-NUMA residency masks.
|
||||
// In order to properly maintain consistency between the per-NUMA masks
|
||||
// and this one, uvm_va_block_cpu_[set|clear]_residency_*() helpers
|
||||
// should be used.
|
||||
uvm_page_mask_t resident;
|
||||
|
||||
// Per-page mapping bit vectors, one per bit we need to track. These are
|
||||
// used for fast traversal of valid mappings in the block. These contain
|
||||
// all non-address bits needed to establish a virtual mapping on this
|
||||
@@ -418,7 +447,8 @@ struct uvm_va_block_struct
|
||||
uvm_page_mask_t read_duplicated_pages;
|
||||
|
||||
// Mask to keep track of the pages that are not mapped on any non-UVM-Lite
|
||||
// processor.
|
||||
// processor. This mask is not used for HMM because the CPU can map pages
|
||||
// at any time without notifying the driver.
|
||||
// 0: Page is definitely not mapped by any processors
|
||||
// 1: Page may or may not be mapped by a processor
|
||||
//
|
||||
@@ -525,6 +555,13 @@ struct uvm_va_block_wrapper_struct
|
||||
// a successful migration if this error flag is cleared.
|
||||
NvU32 inject_cpu_pages_allocation_error_count;
|
||||
|
||||
// A NUMA node ID on which any CPU chunks will be allocated from.
|
||||
// This will override any other setting and/or policy.
|
||||
// Note that the kernel is still free to allocate from any of the
|
||||
// nodes in the thread's policy.
|
||||
int cpu_chunk_allocation_target_id;
|
||||
int cpu_chunk_allocation_actual_id;
|
||||
|
||||
// Force the next eviction attempt on this block to fail. Used for
|
||||
// testing only.
|
||||
bool inject_eviction_error;
|
||||
@@ -668,17 +705,12 @@ void uvm_va_block_context_free(uvm_va_block_context_t *va_block_context);
|
||||
// Initialization of an already-allocated uvm_va_block_context_t.
|
||||
//
|
||||
// mm is used to initialize the value of va_block_context->mm. NULL is allowed.
|
||||
static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context, struct mm_struct *mm)
|
||||
{
|
||||
UVM_ASSERT(va_block_context);
|
||||
void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context, struct mm_struct *mm);
|
||||
|
||||
// Write garbage into the VA Block context to ensure that the UVM code
|
||||
// clears masks appropriately
|
||||
if (UVM_IS_DEBUG())
|
||||
memset(va_block_context, 0xff, sizeof(*va_block_context));
|
||||
|
||||
va_block_context->mm = mm;
|
||||
}
|
||||
// Return the preferred NUMA node ID for the block's policy.
|
||||
// If the preferred node ID is NUMA_NO_NODE, the current NUMA node ID
|
||||
// is returned.
|
||||
int uvm_va_block_context_get_node(uvm_va_block_context_t *va_block_context);
|
||||
|
||||
// TODO: Bug 1766480: Using only page masks instead of a combination of regions
|
||||
// and page masks could simplify the below APIs and their implementations
|
||||
@@ -734,6 +766,9 @@ static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context,
|
||||
// those masks. It is the caller's responsiblity to zero the masks or
|
||||
// not first.
|
||||
//
|
||||
// va_block_context->make_resident.dest_nid is used to guide the NUMA node for
|
||||
// CPU allocations.
|
||||
//
|
||||
// Notably any status other than NV_OK indicates that the block's lock might
|
||||
// have been unlocked and relocked.
|
||||
//
|
||||
@@ -1377,8 +1412,14 @@ static uvm_va_block_test_t *uvm_va_block_get_test(uvm_va_block_t *va_block)
|
||||
|
||||
// Get the page residency mask for a processor if it's known to be there.
|
||||
//
|
||||
// If the processor is the CPU, the residency mask for the NUMA node ID
|
||||
// specified by nid will be returned (see
|
||||
// uvm_va_block_cpu_node_state_t::resident). If nid is NUMA_NO_NODE,
|
||||
// the cumulative CPU residency mask will be returned (see
|
||||
// uvm_va_block_t::cpu::resident).
|
||||
//
|
||||
// If the processor is a GPU, this will assert that GPU state is indeed present.
|
||||
uvm_page_mask_t *uvm_va_block_resident_mask_get(uvm_va_block_t *block, uvm_processor_id_t processor);
|
||||
uvm_page_mask_t *uvm_va_block_resident_mask_get(uvm_va_block_t *block, uvm_processor_id_t processor, int nid);
|
||||
|
||||
// Get the page mapped mask for a processor. The returned mask cannot be
|
||||
// directly modified by the caller
|
||||
@@ -1386,6 +1427,13 @@ uvm_page_mask_t *uvm_va_block_resident_mask_get(uvm_va_block_t *block, uvm_proce
|
||||
// If the processor is a GPU, this will assert that GPU state is indeed present.
|
||||
const uvm_page_mask_t *uvm_va_block_map_mask_get(uvm_va_block_t *block, uvm_processor_id_t processor);
|
||||
|
||||
// Return a mask of non-UVM-Lite pages that are unmapped within the given
|
||||
// region.
|
||||
// Locking: The block lock must be held.
|
||||
void uvm_va_block_unmapped_pages_get(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_page_mask_t *out_mask);
|
||||
|
||||
// VA block lookup functions. There are a number of permutations which might be
|
||||
// useful, such as looking up the block from {va_space, va_range} x {addr,
|
||||
// block index}. The ones implemented here and in uvm_va_range.h support the
|
||||
@@ -1756,17 +1804,28 @@ static bool uvm_page_mask_full(const uvm_page_mask_t *mask)
|
||||
return bitmap_full(mask->bitmap, PAGES_PER_UVM_VA_BLOCK);
|
||||
}
|
||||
|
||||
static bool uvm_page_mask_and(uvm_page_mask_t *mask_out, const uvm_page_mask_t *mask_in1, const uvm_page_mask_t *mask_in2)
|
||||
static void uvm_page_mask_fill(uvm_page_mask_t *mask)
|
||||
{
|
||||
bitmap_fill(mask->bitmap, PAGES_PER_UVM_VA_BLOCK);
|
||||
}
|
||||
|
||||
static bool uvm_page_mask_and(uvm_page_mask_t *mask_out,
|
||||
const uvm_page_mask_t *mask_in1,
|
||||
const uvm_page_mask_t *mask_in2)
|
||||
{
|
||||
return bitmap_and(mask_out->bitmap, mask_in1->bitmap, mask_in2->bitmap, PAGES_PER_UVM_VA_BLOCK);
|
||||
}
|
||||
|
||||
static bool uvm_page_mask_andnot(uvm_page_mask_t *mask_out, const uvm_page_mask_t *mask_in1, const uvm_page_mask_t *mask_in2)
|
||||
static bool uvm_page_mask_andnot(uvm_page_mask_t *mask_out,
|
||||
const uvm_page_mask_t *mask_in1,
|
||||
const uvm_page_mask_t *mask_in2)
|
||||
{
|
||||
return bitmap_andnot(mask_out->bitmap, mask_in1->bitmap, mask_in2->bitmap, PAGES_PER_UVM_VA_BLOCK);
|
||||
}
|
||||
|
||||
static void uvm_page_mask_or(uvm_page_mask_t *mask_out, const uvm_page_mask_t *mask_in1, const uvm_page_mask_t *mask_in2)
|
||||
static void uvm_page_mask_or(uvm_page_mask_t *mask_out,
|
||||
const uvm_page_mask_t *mask_in1,
|
||||
const uvm_page_mask_t *mask_in2)
|
||||
{
|
||||
bitmap_or(mask_out->bitmap, mask_in1->bitmap, mask_in2->bitmap, PAGES_PER_UVM_VA_BLOCK);
|
||||
}
|
||||
@@ -2036,30 +2095,49 @@ uvm_processor_id_t uvm_va_block_page_get_closest_resident(uvm_va_block_t *va_blo
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t processor);
|
||||
|
||||
// Mark CPU page page_index as resident on NUMA node specified by nid.
|
||||
// nid cannot be NUMA_NO_NODE.
|
||||
void uvm_va_block_cpu_set_resident_page(uvm_va_block_t *va_block, int nid, uvm_page_index_t page_index);
|
||||
|
||||
// Test if a CPU page is resident on NUMA node nid. If nid is NUMA_NO_NODE,
|
||||
// the function will return True if the page is resident on any CPU NUMA node.
|
||||
bool uvm_va_block_cpu_is_page_resident_on(uvm_va_block_t *va_block, int nid, uvm_page_index_t page_index);
|
||||
|
||||
// Test if all pages in region are resident on NUMA node nid. If nid is
|
||||
// NUMA_NO_NODE, the function will test if the pages in the region are
|
||||
// resident on any CPU NUMA node.
|
||||
bool uvm_va_block_cpu_is_region_resident_on(uvm_va_block_t *va_block, int nid, uvm_va_block_region_t region);
|
||||
|
||||
// Insert a CPU chunk at the given page_index into the va_block.
|
||||
// Locking: The va_block lock must be held.
|
||||
NV_STATUS uvm_cpu_chunk_insert_in_block(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_page_index_t page_index);
|
||||
NV_STATUS uvm_cpu_chunk_insert_in_block(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
|
||||
|
||||
// Remove a CPU chunk at the given page_index from the va_block.
|
||||
// nid cannot be NUMA_NO_NODE.
|
||||
// Locking: The va_block lock must be held.
|
||||
void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block, int nid, uvm_page_index_t page_index);
|
||||
|
||||
// Return the CPU chunk at the given page_index from the va_block.
|
||||
// Return the CPU chunk at the given page_index on the given NUMA node from the
|
||||
// va_block. nid cannot be NUMA_NO_NODE.
|
||||
// Locking: The va_block lock must be held.
|
||||
uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page(uvm_va_block_t *va_block,
|
||||
int nid,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Return the CPU chunk at the given page_index from the va_block.
|
||||
// Return the struct page * from the chunk corresponding to the given page_index
|
||||
// Locking: The va_block lock must be held.
|
||||
struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
|
||||
|
||||
// Return the struct page * of the resident chunk at the given page_index from
|
||||
// the va_block. The given page_index must be resident on the CPU.
|
||||
// Locking: The va_block lock must be held.
|
||||
struct page *uvm_va_block_get_cpu_page(uvm_va_block_t *va_block, uvm_page_index_t page_index);
|
||||
|
||||
// Physically map a CPU chunk so it is DMA'able from all registered GPUs.
|
||||
// nid cannot be NUMA_NO_NODE.
|
||||
// Locking: The va_block lock must be held.
|
||||
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Physically unmap a CPU chunk from all registered GPUs.
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include "uvm_forward_decl.h"
|
||||
|
||||
#include <linux/migrate.h>
|
||||
#include <linux/nodemask.h>
|
||||
|
||||
// UVM_VA_BLOCK_BITS is 21, meaning the maximum block size is 2MB. Rationale:
|
||||
// - 2MB matches the largest Pascal GPU page size so it's a natural fit
|
||||
@@ -145,6 +146,18 @@ typedef struct
|
||||
unsigned count;
|
||||
} uvm_prot_page_mask_array_t[UVM_PROT_MAX - 1];
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// A per-NUMA-node array of page masks (size num_possible_nodes()) that hold
|
||||
// the set of CPU pages used by the migration operation.
|
||||
uvm_page_mask_t **node_masks;
|
||||
|
||||
// Node mask used to iterate over the page masks above.
|
||||
// If a node's bit is set, it means that the page mask given by
|
||||
// node_to_index() in node_masks has set pages.
|
||||
nodemask_t nodes;
|
||||
} uvm_make_resident_page_tracking_t;
|
||||
|
||||
// In the worst case some VA block operations require more state than we should
|
||||
// reasonably store on the stack. Instead, we dynamically allocate VA block
|
||||
// contexts. These are used for almost all operations on VA blocks.
|
||||
@@ -159,6 +172,9 @@ typedef struct
|
||||
// this block_context.
|
||||
uvm_page_mask_t scratch_page_mask;
|
||||
|
||||
// Scratch node mask. This follows the same rules as scratch_page_mask;
|
||||
nodemask_t scratch_node_mask;
|
||||
|
||||
// State used by uvm_va_block_make_resident
|
||||
struct uvm_make_resident_context_struct
|
||||
{
|
||||
@@ -181,10 +197,24 @@ typedef struct
|
||||
// Used to perform ECC checks after the migration is done.
|
||||
uvm_processor_mask_t all_involved_processors;
|
||||
|
||||
// Page mask used to compute the set of CPU pages for each CPU node.
|
||||
uvm_page_mask_t node_pages_mask;
|
||||
|
||||
// Final residency for the data. This is useful for callees to know if
|
||||
// a migration is part of a staging copy
|
||||
uvm_processor_id_t dest_id;
|
||||
|
||||
// Final residency NUMA node if the migration destination is the CPU.
|
||||
int dest_nid;
|
||||
|
||||
// This structure is used to track CPU pages used for migrations on
|
||||
// a per-NUMA node basis.
|
||||
//
|
||||
// The pages could be used for either migrations to the CPU (used to
|
||||
// track the destination CPU pages) or staging copies (used to track
|
||||
// the CPU pages used for the staging).
|
||||
uvm_make_resident_page_tracking_t cpu_pages_used;
|
||||
|
||||
// Event that triggered the call
|
||||
uvm_make_resident_cause_t cause;
|
||||
} make_resident;
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
|
||||
const uvm_va_policy_t uvm_va_policy_default = {
|
||||
.preferred_location = UVM_ID_INVALID,
|
||||
.preferred_nid = NUMA_NO_NODE,
|
||||
.read_duplication = UVM_READ_DUPLICATION_UNSET,
|
||||
};
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#ifndef __UVM_VA_POLICY_H__
|
||||
#define __UVM_VA_POLICY_H__
|
||||
|
||||
#include <linux/numa.h>
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_processors.h"
|
||||
@@ -62,6 +63,18 @@ struct uvm_va_policy_struct
|
||||
// This is set to UVM_ID_INVALID if no preferred location is set.
|
||||
uvm_processor_id_t preferred_location;
|
||||
|
||||
// If the preferred location is the CPU, this is either the preferred NUMA
|
||||
// node ID or NUMA_NO_NODE to indicate that there is no preference among
|
||||
// nodes.
|
||||
// If preferred_location is a GPU, preferred_nid will be used if CPU
|
||||
// pages have to be allocated for any staging copies. Otherwise, it is
|
||||
// not used.
|
||||
//
|
||||
// TODO: Bug 4148100 - Preferred_location and preferred_nid should be
|
||||
// combined into a new type that combines the processor and NUMA node
|
||||
// ID.
|
||||
int preferred_nid;
|
||||
|
||||
// Mask of processors that are accessing this VA range and should have
|
||||
// their page tables updated to access the (possibly remote) pages.
|
||||
uvm_processor_mask_t accessed_by;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -162,9 +162,7 @@ static uvm_va_range_t *uvm_va_range_alloc_managed(uvm_va_space_t *va_space, NvU6
|
||||
goto error;
|
||||
|
||||
va_range->type = UVM_VA_RANGE_TYPE_MANAGED;
|
||||
|
||||
uvm_va_range_get_policy(va_range)->read_duplication = UVM_READ_DUPLICATION_UNSET;
|
||||
uvm_va_range_get_policy(va_range)->preferred_location = UVM_ID_INVALID;
|
||||
va_range->managed.policy = uvm_va_policy_default;
|
||||
|
||||
va_range->blocks = uvm_kvmalloc_zero(uvm_va_range_num_blocks(va_range) * sizeof(va_range->blocks[0]));
|
||||
if (!va_range->blocks) {
|
||||
@@ -376,7 +374,7 @@ NV_STATUS uvm_va_range_create_semaphore_pool(uvm_va_space_t *va_space,
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
if (i == 0 && g_uvm_global.conf_computing_enabled)
|
||||
if (i == 0 && g_uvm_global.sev_enabled)
|
||||
mem_alloc_params.dma_owner = gpu;
|
||||
|
||||
if (attrs.is_cacheable) {
|
||||
@@ -835,7 +833,7 @@ static void uvm_va_range_disable_peer_external(uvm_va_range_t *va_range,
|
||||
range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
|
||||
uvm_mutex_lock(&range_tree->lock);
|
||||
uvm_ext_gpu_map_for_each_safe(ext_map, ext_map_next, va_range, mapping_gpu) {
|
||||
if (ext_map->owning_gpu == owning_gpu && !ext_map->is_sysmem) {
|
||||
if (ext_map->owning_gpu == owning_gpu && (!ext_map->is_sysmem || ext_map->is_egm)) {
|
||||
UVM_ASSERT(deferred_free_list);
|
||||
uvm_ext_gpu_map_destroy(va_range, ext_map, deferred_free_list);
|
||||
}
|
||||
@@ -1807,7 +1805,7 @@ NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params,
|
||||
if (params->gpuAttributesCount > UVM_MAX_GPUS)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled && params->gpuAttributesCount == 0)
|
||||
if (g_uvm_global.sev_enabled && params->gpuAttributesCount == 0)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
// The mm needs to be locked in order to remove stale HMM va_blocks.
|
||||
|
||||
@@ -189,6 +189,7 @@ typedef struct
|
||||
// sysmem was originally allocated under. For the allocation to remain valid
|
||||
// we need to prevent the GPU from going away, similarly to P2P mapped
|
||||
// memory.
|
||||
// Similarly for EGM memory.
|
||||
//
|
||||
// This field is not used for sparse mappings as they don't have an
|
||||
// allocation and, hence, owning GPU.
|
||||
@@ -208,6 +209,9 @@ typedef struct
|
||||
// backing.
|
||||
bool is_sysmem;
|
||||
|
||||
// EGM memory. If true is_sysmem also has to be true and owning_gpu
|
||||
// has to be valid.
|
||||
bool is_egm;
|
||||
// GPU page tables mapping the allocation
|
||||
uvm_page_table_range_vec_t pt_range_vec;
|
||||
|
||||
|
||||
@@ -222,6 +222,12 @@ NV_STATUS uvm_va_space_create(struct address_space *mapping, uvm_va_space_t **va
|
||||
uvm_down_write_mmap_lock(current->mm);
|
||||
uvm_va_space_down_write(va_space);
|
||||
|
||||
va_space->va_block_context = uvm_va_block_context_alloc(NULL);
|
||||
if (!va_space->va_block_context) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
status = uvm_perf_init_va_space_events(va_space, &va_space->perf_events);
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
@@ -258,6 +264,7 @@ NV_STATUS uvm_va_space_create(struct address_space *mapping, uvm_va_space_t **va
|
||||
fail:
|
||||
uvm_perf_heuristics_unload(va_space);
|
||||
uvm_perf_destroy_va_space_events(&va_space->perf_events);
|
||||
uvm_va_block_context_free(va_space->va_block_context);
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_up_write_mmap_lock(current->mm);
|
||||
|
||||
@@ -457,8 +464,6 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
uvm_va_range_destroy(va_range, &deferred_free_list);
|
||||
}
|
||||
|
||||
uvm_hmm_va_space_destroy(va_space);
|
||||
|
||||
uvm_range_group_radix_tree_destroy(va_space);
|
||||
|
||||
// Unregister all GPUs in the VA space. Note that this does not release the
|
||||
@@ -466,11 +471,17 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
for_each_va_space_gpu(gpu, va_space)
|
||||
unregister_gpu(va_space, gpu, NULL, &deferred_free_list, NULL);
|
||||
|
||||
uvm_hmm_va_space_destroy(va_space);
|
||||
|
||||
uvm_perf_heuristics_unload(va_space);
|
||||
uvm_perf_destroy_va_space_events(&va_space->perf_events);
|
||||
|
||||
va_space_remove_dummy_thread_contexts(va_space);
|
||||
|
||||
// Destroy the VA space's block context node tracking after all ranges have
|
||||
// been destroyed as the VA blocks may reference it.
|
||||
uvm_va_block_context_free(va_space->va_block_context);
|
||||
|
||||
uvm_va_space_up_write(va_space);
|
||||
|
||||
UVM_ASSERT(uvm_processor_mask_empty(&va_space->registered_gpus));
|
||||
@@ -688,7 +699,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
|
||||
// Mixing coherent and non-coherent GPUs is not supported
|
||||
for_each_va_space_gpu(other_gpu, va_space) {
|
||||
if (uvm_gpu_is_coherent(gpu->parent) != uvm_gpu_is_coherent(other_gpu->parent)) {
|
||||
if (uvm_parent_gpu_is_coherent(gpu->parent) != uvm_parent_gpu_is_coherent(other_gpu->parent)) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
goto done;
|
||||
}
|
||||
@@ -729,7 +740,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
processor_mask_array_set(va_space->has_nvlink, UVM_ID_CPU, gpu->id);
|
||||
}
|
||||
|
||||
if (uvm_gpu_is_coherent(gpu->parent)) {
|
||||
if (uvm_parent_gpu_is_coherent(gpu->parent)) {
|
||||
processor_mask_array_set(va_space->has_native_atomics, gpu->id, UVM_ID_CPU);
|
||||
|
||||
if (gpu->mem_info.numa.enabled) {
|
||||
@@ -1540,7 +1551,6 @@ static void remove_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space,
|
||||
atomic_inc(&va_space->gpu_va_space_deferred_free.num_pending);
|
||||
|
||||
uvm_processor_mask_clear(&va_space->registered_gpu_va_spaces, gpu_va_space->gpu->id);
|
||||
uvm_processor_mask_clear_atomic(&va_space->needs_fault_buffer_flush, gpu_va_space->gpu->id);
|
||||
va_space->gpu_va_spaces[uvm_id_gpu_index(gpu_va_space->gpu->id)] = NULL;
|
||||
gpu_va_space->state = UVM_GPU_VA_SPACE_STATE_DEAD;
|
||||
}
|
||||
@@ -1610,14 +1620,14 @@ NV_STATUS uvm_va_space_unregister_gpu_va_space(uvm_va_space_t *va_space, const N
|
||||
return status;
|
||||
}
|
||||
|
||||
bool uvm_va_space_peer_enabled(uvm_va_space_t *va_space, uvm_gpu_t *gpu1, uvm_gpu_t *gpu2)
|
||||
bool uvm_va_space_peer_enabled(uvm_va_space_t *va_space, const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
|
||||
{
|
||||
size_t table_index;
|
||||
|
||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->registered_gpus, gpu0->id));
|
||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->registered_gpus, gpu1->id));
|
||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->registered_gpus, gpu2->id));
|
||||
|
||||
table_index = uvm_gpu_peer_table_index(gpu1->id, gpu2->id);
|
||||
table_index = uvm_gpu_peer_table_index(gpu0->id, gpu1->id);
|
||||
return !!test_bit(table_index, va_space->enabled_peers);
|
||||
}
|
||||
|
||||
@@ -2073,9 +2083,16 @@ NV_STATUS uvm_service_block_context_init(void)
|
||||
// Pre-allocate some fault service contexts for the CPU and add them to the global list
|
||||
while (num_preallocated_contexts-- > 0) {
|
||||
uvm_service_block_context_t *service_context = uvm_kvmalloc(sizeof(*service_context));
|
||||
|
||||
if (!service_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
service_context->block_context = uvm_va_block_context_alloc(NULL);
|
||||
if (!service_context->block_context) {
|
||||
uvm_kvfree(service_context);
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
|
||||
}
|
||||
|
||||
@@ -2089,6 +2106,7 @@ void uvm_service_block_context_exit(void)
|
||||
// Free fault service contexts for the CPU and add clear the global list
|
||||
list_for_each_entry_safe(service_context, service_context_tmp, &g_cpu_service_block_context_list,
|
||||
cpu_fault.service_context_list) {
|
||||
uvm_va_block_context_free(service_context->block_context);
|
||||
uvm_kvfree(service_context);
|
||||
}
|
||||
INIT_LIST_HEAD(&g_cpu_service_block_context_list);
|
||||
@@ -2110,8 +2128,17 @@ static uvm_service_block_context_t *service_block_context_cpu_alloc(void)
|
||||
|
||||
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
if (!service_context)
|
||||
if (!service_context) {
|
||||
service_context = uvm_kvmalloc(sizeof(*service_context));
|
||||
service_context->block_context = uvm_va_block_context_alloc(NULL);
|
||||
if (!service_context->block_context) {
|
||||
uvm_kvfree(service_context);
|
||||
service_context = NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
uvm_va_block_context_init(service_context->block_context, NULL);
|
||||
}
|
||||
|
||||
return service_context;
|
||||
}
|
||||
@@ -2137,6 +2164,7 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
bool tools_enabled;
|
||||
bool major_fault = false;
|
||||
bool is_remote_mm = false;
|
||||
uvm_service_block_context_t *service_context;
|
||||
uvm_global_processor_mask_t gpus_to_check_for_ecc;
|
||||
|
||||
@@ -2177,7 +2205,7 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
|
||||
// mmap_lock held on the CPU fault path, so tell the fault handler to use
|
||||
// that one. current->mm might differ if we're on the access_process_vm
|
||||
// (ptrace) path or if another driver is calling get_user_pages.
|
||||
service_context->block_context.mm = vma->vm_mm;
|
||||
service_context->block_context->mm = vma->vm_mm;
|
||||
|
||||
// The mmap_lock might be held in write mode, but the mode doesn't matter
|
||||
// for the purpose of lock ordering and we don't rely on it being in write
|
||||
@@ -2216,25 +2244,32 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
|
||||
uvm_tools_record_throttling_end(va_space, fault_addr, UVM_ID_CPU);
|
||||
|
||||
if (is_hmm) {
|
||||
// Note that normally we should find a va_block for the faulting
|
||||
// address because the block had to be created when migrating a
|
||||
// page to the GPU and a device private PTE inserted into the CPU
|
||||
// page tables in order for migrate_to_ram() to be called. Not
|
||||
// finding it means the PTE was remapped to a different virtual
|
||||
// address with mremap() so create a new va_block if needed.
|
||||
status = uvm_hmm_va_block_find_create(va_space,
|
||||
fault_addr,
|
||||
&service_context->block_context.hmm.vma,
|
||||
&va_block);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
if (va_space->va_space_mm.mm == vma->vm_mm) {
|
||||
// Note that normally we should find a va_block for the faulting
|
||||
// address because the block had to be created when migrating a
|
||||
// page to the GPU and a device private PTE inserted into the CPU
|
||||
// page tables in order for migrate_to_ram() to be called. Not
|
||||
// finding it means the PTE was remapped to a different virtual
|
||||
// address with mremap() so create a new va_block if needed.
|
||||
status = uvm_hmm_va_block_find_create(va_space,
|
||||
fault_addr,
|
||||
&service_context->block_context->hmm.vma,
|
||||
&va_block);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
UVM_ASSERT(service_context->block_context.hmm.vma == vma);
|
||||
status = uvm_hmm_migrate_begin(va_block);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
UVM_ASSERT(service_context->block_context->hmm.vma == vma);
|
||||
status = uvm_hmm_migrate_begin(va_block);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
service_context->cpu_fault.vmf = vmf;
|
||||
service_context->cpu_fault.vmf = vmf;
|
||||
}
|
||||
else {
|
||||
is_remote_mm = true;
|
||||
status = uvm_hmm_remote_cpu_fault(vmf);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
status = uvm_va_block_find_create_managed(va_space, fault_addr, &va_block);
|
||||
@@ -2265,7 +2300,7 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
|
||||
|
||||
tools_enabled = va_space->tools.enabled;
|
||||
|
||||
if (status == NV_OK) {
|
||||
if (status == NV_OK && !is_remote_mm) {
|
||||
uvm_va_space_global_gpus_in_mask(va_space,
|
||||
&gpus_to_check_for_ecc,
|
||||
&service_context->cpu_fault.gpus_to_check_for_ecc);
|
||||
@@ -2275,7 +2310,7 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_record_unlock_mmap_lock_read(vma->vm_mm);
|
||||
|
||||
if (status == NV_OK) {
|
||||
if (status == NV_OK && !is_remote_mm) {
|
||||
status = uvm_global_mask_check_ecc_error(&gpus_to_check_for_ecc);
|
||||
uvm_global_mask_release(&gpus_to_check_for_ecc);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -253,17 +253,6 @@ struct uvm_va_space_struct
|
||||
// corrupting state.
|
||||
uvm_processor_mask_t gpu_unregister_in_progress;
|
||||
|
||||
// On VMA destruction, the fault buffer needs to be flushed for all the GPUs
|
||||
// registered in the VA space to avoid leaving stale entries of the VA range
|
||||
// that is going to be destroyed. Otherwise, these fault entries can be
|
||||
// attributed to new VA ranges reallocated at the same addresses. However,
|
||||
// uvm_vm_close is called with mm->mmap_lock taken and we cannot take the
|
||||
// ISR lock. Therefore, we use a flag to notify the GPU fault handler that
|
||||
// the fault buffer needs to be flushed, before servicing the faults that
|
||||
// belong to the va_space. The bits are set and cleared atomically so no
|
||||
// va_space lock is required.
|
||||
uvm_processor_mask_t needs_fault_buffer_flush;
|
||||
|
||||
// Mask of processors that are participating in system-wide atomics
|
||||
uvm_processor_mask_t system_wide_atomics_enabled_processors;
|
||||
|
||||
@@ -335,7 +324,7 @@ struct uvm_va_space_struct
|
||||
// Block context used for GPU unmap operations so that allocation is not
|
||||
// required on the teardown path. This can only be used while the VA space
|
||||
// lock is held in write mode. Access using uvm_va_space_block_context().
|
||||
uvm_va_block_context_t va_block_context;
|
||||
uvm_va_block_context_t *va_block_context;
|
||||
|
||||
NvU64 initialization_flags;
|
||||
|
||||
@@ -541,7 +530,7 @@ void uvm_va_space_detach_all_user_channels(uvm_va_space_t *va_space, struct list
|
||||
|
||||
// Returns whether peer access between these two GPUs has been enabled in this
|
||||
// VA space. Both GPUs must be registered in the VA space.
|
||||
bool uvm_va_space_peer_enabled(uvm_va_space_t *va_space, uvm_gpu_t *gpu1, uvm_gpu_t *gpu2);
|
||||
bool uvm_va_space_peer_enabled(uvm_va_space_t *va_space, const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);
|
||||
|
||||
// Returns the va_space this file points to. Returns NULL if this file
|
||||
// does not point to a va_space.
|
||||
@@ -575,8 +564,8 @@ static uvm_va_block_context_t *uvm_va_space_block_context(uvm_va_space_t *va_spa
|
||||
if (mm)
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
|
||||
uvm_va_block_context_init(&va_space->va_block_context, mm);
|
||||
return &va_space->va_block_context;
|
||||
uvm_va_block_context_init(va_space->va_block_context, mm);
|
||||
return va_space->va_block_context;
|
||||
}
|
||||
|
||||
// Retains the GPU VA space memory object. destroy_gpu_va_space and
|
||||
|
||||
@@ -215,7 +215,13 @@ bool uvm_va_space_mm_enabled(uvm_va_space_t *va_space)
|
||||
|
||||
static struct mmu_notifier_ops uvm_mmu_notifier_ops_ats =
|
||||
{
|
||||
#if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE)
|
||||
.invalidate_range = uvm_mmu_notifier_invalidate_range_ats,
|
||||
#elif defined(NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS)
|
||||
.arch_invalidate_secondary_tlbs = uvm_mmu_notifier_invalidate_range_ats,
|
||||
#else
|
||||
#error One of invalidate_range/arch_invalid_secondary must be present
|
||||
#endif
|
||||
};
|
||||
|
||||
static int uvm_mmu_notifier_register(uvm_va_space_mm_t *va_space_mm)
|
||||
@@ -310,17 +316,6 @@ void uvm_va_space_mm_unregister(uvm_va_space_t *va_space)
|
||||
if (!va_space_mm->mm)
|
||||
return;
|
||||
|
||||
// At this point the mm is still valid because uvm_mm_release()
|
||||
// hasn't yet called mmput(). uvm_hmm_va_space_destroy() will kill
|
||||
// all the va_blocks along with any associated gpu_chunks, so we
|
||||
// need to make sure these chunks are free. However freeing them
|
||||
// requires a valid mm so we can call migrate_vma_setup(), so we
|
||||
// do that here.
|
||||
// TODO: Bug 3902536: [UVM-HMM] add code to migrate GPU memory
|
||||
// without having a va_block
|
||||
if (uvm_hmm_is_enabled(va_space))
|
||||
uvm_hmm_evict_va_blocks(va_space);
|
||||
|
||||
if (uvm_va_space_mm_enabled(va_space)) {
|
||||
if (UVM_ATS_IBM_SUPPORTED_IN_DRIVER() && g_uvm_global.ats.enabled)
|
||||
uvm_mmu_notifier_unregister(va_space_mm);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
Copyright (c) 2017-2021 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -145,10 +145,7 @@ static NvU64 small_half_pde_volta(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static void make_pde_volta(void *entry,
|
||||
uvm_mmu_page_table_alloc_t **phys_allocs,
|
||||
NvU32 depth,
|
||||
uvm_page_directory_t *child_dir)
|
||||
static void make_pde_volta(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
|
||||
{
|
||||
NvU32 entry_count = entries_per_index_volta(depth);
|
||||
NvU64 *entry_bits = (NvU64 *)entry;
|
||||
|
||||
Reference in New Issue
Block a user