535.43.09

This commit is contained in:
russellcnv
2023-09-01 21:36:45 -07:00
parent 18b7303c54
commit 17546dbdda
122 changed files with 41587 additions and 34584 deletions

View File

@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.08\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.09\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)

View File

@@ -566,8 +566,11 @@ typedef struct UvmPlatformInfo_tag
// Out: ATS (Address Translation Services) is supported
NvBool atsSupported;
// Out: AMD SEV (Secure Encrypted Virtualization) is enabled
NvBool sevEnabled;
// Out: True if HW trusted execution, such as AMD's SEV-SNP or Intel's TDX,
// is enabled in the VM, indicating that Confidential Computing must be
// also enabled in the GPU(s); these two security features are either both
// enabled, or both disabled.
NvBool confComputingEnabled;
} UvmPlatformInfo;
typedef struct UvmGpuClientInfo_tag

View File

@@ -6341,6 +6341,21 @@ compile_test() {
compile_check_conftest "$CODE" "NV_MEMPOLICY_HAS_HOME_NODE" "" "types"
;;
mmu_interval_notifier)
#
# Determine if mmu_interval_notifier struct is present or not
#
# Added by commit 99cb252f5 ("mm/mmu_notifier: add an interval tree
# notifier") in v5.10 (2019-11-12).
#
CODE="
#include <linux/mmu_notifier.h>
struct mmu_interval_notifier interval_notifier;
"
compile_check_conftest "$CODE" "NV_MMU_INTERVAL_NOTIFIER" "" "types"
;;
# When adding a new conftest entry, please use the correct format for
# specifying the relevant upstream Linux kernel commit.
#

View File

@@ -110,5 +110,6 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg

View File

@@ -44,6 +44,8 @@ void uvm_ats_init(const UvmPlatformInfo *platform_info)
void uvm_ats_init_va_space(uvm_va_space_t *va_space)
{
uvm_init_rwsem(&va_space->ats.lock, UVM_LOCK_ORDER_LEAF);
if (UVM_ATS_IBM_SUPPORTED())
uvm_ats_ibm_init_va_space(va_space);
}

View File

@@ -28,17 +28,32 @@
#include "uvm_forward_decl.h"
#include "uvm_ats_ibm.h"
#include "nv_uvm_types.h"
#include "uvm_lock.h"
#include "uvm_ats_sva.h"
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
// ATS prefetcher uses hmm_range_fault() to query residency information.
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
// of memory regions while hmm_range_fault() is being called, MMU interval
// notifiers are needed.
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
#define UVM_ATS_PREFETCH_SUPPORTED() 1
#else
#define UVM_ATS_PREFETCH_SUPPORTED() 0
#endif
typedef struct
{
// Mask of gpu_va_spaces which are registered for ATS access. The mask is
// indexed by gpu->id. This mask is protected by the VA space lock.
uvm_processor_mask_t registered_gpu_va_spaces;
// Protects racing invalidates in the VA space while hmm_range_fault() is
// being called in ats_compute_residency_mask().
uvm_rw_semaphore_t lock;
union
{
uvm_ibm_va_space_t ibm;

View File

@@ -20,60 +20,19 @@
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_api.h"
#include "uvm_tools.h"
#include "uvm_va_range.h"
#include "uvm_ats.h"
#include "uvm_ats_faults.h"
#include "uvm_migrate_pageable.h"
#include <linux/nodemask.h>
#include <linux/mempolicy.h>
#include <linux/mmu_notifier.h>
// TODO: Bug 2103669: Implement a real prefetching policy and remove or adapt
// these experimental parameters. These are intended to help guide that policy.
static unsigned int uvm_exp_perf_prefetch_ats_order_replayable = 0;
module_param(uvm_exp_perf_prefetch_ats_order_replayable, uint, 0644);
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_replayable,
"Max order of pages (2^N) to prefetch on replayable ATS faults");
static unsigned int uvm_exp_perf_prefetch_ats_order_non_replayable = 0;
module_param(uvm_exp_perf_prefetch_ats_order_non_replayable, uint, 0644);
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_non_replayable,
"Max order of pages (2^N) to prefetch on non-replayable ATS faults");
// Expand the fault region to the naturally-aligned region with order given by
// the module parameters, clamped to the vma containing fault_addr (if any).
// Note that this means the region contains fault_addr but may not begin at
// fault_addr.
static void expand_fault_region(struct vm_area_struct *vma,
NvU64 start,
size_t length,
uvm_fault_client_type_t client_type,
unsigned long *migrate_start,
unsigned long *migrate_length)
{
unsigned int order;
unsigned long outer, aligned_start, aligned_size;
*migrate_start = start;
*migrate_length = length;
if (client_type == UVM_FAULT_CLIENT_TYPE_HUB)
order = uvm_exp_perf_prefetch_ats_order_non_replayable;
else
order = uvm_exp_perf_prefetch_ats_order_replayable;
if (order == 0)
return;
UVM_ASSERT(vma);
UVM_ASSERT(order < BITS_PER_LONG - PAGE_SHIFT);
aligned_size = (1UL << order) * PAGE_SIZE;
aligned_start = start & ~(aligned_size - 1);
*migrate_start = max(vma->vm_start, aligned_start);
outer = min(vma->vm_end, aligned_start + aligned_size);
*migrate_length = outer - *migrate_start;
}
#if UVM_ATS_PREFETCH_SUPPORTED()
#include <linux/hmm.h>
#endif
static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
@@ -122,6 +81,8 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
.mm = mm,
.dst_id = ats_context->residency_id,
.dst_node_id = ats_context->residency_node,
.start = start,
.length = length,
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
.touch = true,
.skip_mapped = true,
@@ -132,13 +93,6 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
expand_fault_region(vma,
start,
length,
ats_context->client_type,
&uvm_migrate_args.start,
&uvm_migrate_args.length);
// We are trying to use migrate_vma API in the kernel (if it exists) to
// populate and map the faulting region on the GPU. We want to do this only
// on the first touch. That is, pages which are not already mapped. So, we
@@ -184,6 +138,12 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
struct mempolicy *vma_policy = vma_policy(vma);
unsigned short mode;
ats_context->prefetch_state.has_preferred_location = false;
// It's safe to read vma_policy since the mmap_lock is held in at least read
// mode in this path.
uvm_assert_mmap_lock_locked(vma->vm_mm);
if (!vma_policy)
goto done;
@@ -212,6 +172,9 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
else
residency = first_node(vma_policy->nodes);
}
if (!nodes_empty(vma_policy->nodes))
ats_context->prefetch_state.has_preferred_location = true;
}
// Update gpu if residency is not the faulting gpu.
@@ -219,12 +182,253 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
gpu = uvm_va_space_find_gpu_with_memory_node_id(gpu_va_space->va_space, residency);
done:
#else
ats_context->prefetch_state.has_preferred_location = false;
#endif
ats_context->residency_id = gpu ? gpu->parent->id : UVM_ID_CPU;
ats_context->residency_node = residency;
}
static void get_range_in_vma(struct vm_area_struct *vma, NvU64 base, NvU64 *start, NvU64 *end)
{
*start = max(vma->vm_start, (unsigned long) base);
*end = min(vma->vm_end, (unsigned long) (base + UVM_VA_BLOCK_SIZE));
}
static uvm_page_index_t uvm_ats_cpu_page_index(NvU64 base, NvU64 addr)
{
UVM_ASSERT(addr >= base);
UVM_ASSERT(addr <= (base + UVM_VA_BLOCK_SIZE));
return (addr - base) / PAGE_SIZE;
}
// start and end must be aligned to PAGE_SIZE and must fall within
// [base, base + UVM_VA_BLOCK_SIZE]
static uvm_va_block_region_t uvm_ats_region_from_start_end(NvU64 start, NvU64 end)
{
// base can be greater than, less than or equal to the start of a VMA.
NvU64 base = UVM_VA_BLOCK_ALIGN_DOWN(start);
UVM_ASSERT(start < end);
UVM_ASSERT(PAGE_ALIGNED(start));
UVM_ASSERT(PAGE_ALIGNED(end));
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
return uvm_va_block_region(uvm_ats_cpu_page_index(base, start), uvm_ats_cpu_page_index(base, end));
}
static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma, NvU64 base)
{
NvU64 start;
NvU64 end;
get_range_in_vma(vma, base, &start, &end);
return uvm_ats_region_from_start_end(start, end);
}
#if UVM_ATS_PREFETCH_SUPPORTED()
static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
{
uvm_ats_fault_context_t *ats_context = container_of(mni, uvm_ats_fault_context_t, prefetch_state.notifier);
uvm_va_space_t *va_space = ats_context->prefetch_state.va_space;
// The following write lock protects against concurrent invalidates while
// hmm_range_fault() is being called in ats_compute_residency_mask().
uvm_down_write(&va_space->ats.lock);
mmu_interval_set_seq(mni, cur_seq);
uvm_up_write(&va_space->ats.lock);
return true;
}
static bool uvm_ats_invalidate_notifier_entry(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
unsigned long cur_seq)
{
UVM_ENTRY_RET(uvm_ats_invalidate_notifier(mni, cur_seq));
}
static const struct mmu_interval_notifier_ops uvm_ats_notifier_ops =
{
.invalidate = uvm_ats_invalidate_notifier_entry,
};
#endif
static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_fault_context_t *ats_context)
{
NV_STATUS status = NV_OK;
#if UVM_ATS_PREFETCH_SUPPORTED()
int ret;
NvU64 start;
NvU64 end;
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
struct hmm_range range;
uvm_page_index_t page_index;
uvm_va_block_region_t vma_region;
uvm_va_space_t *va_space = gpu_va_space->va_space;
struct mm_struct *mm = va_space->va_space_mm.mm;
uvm_assert_rwsem_locked_read(&va_space->lock);
ats_context->prefetch_state.first_touch = true;
uvm_page_mask_zero(residency_mask);
get_range_in_vma(vma, base, &start, &end);
vma_region = uvm_ats_region_from_start_end(start, end);
range.notifier = &ats_context->prefetch_state.notifier;
range.start = start;
range.end = end;
range.hmm_pfns = ats_context->prefetch_state.pfns;
range.default_flags = 0;
range.pfn_flags_mask = 0;
range.dev_private_owner = NULL;
ats_context->prefetch_state.va_space = va_space;
// mmu_interval_notifier_insert() will try to acquire mmap_lock for write
// and will deadlock since mmap_lock is already held for read in this path.
// This is prevented by calling __mmu_notifier_register() during va_space
// creation. See the comment in uvm_mmu_notifier_register() for more
// details.
ret = mmu_interval_notifier_insert(range.notifier, mm, start, end, &uvm_ats_notifier_ops);
if (ret)
return errno_to_nv_status(ret);
while (true) {
range.notifier_seq = mmu_interval_read_begin(range.notifier);
ret = hmm_range_fault(&range);
if (ret == -EBUSY)
continue;
if (ret) {
status = errno_to_nv_status(ret);
UVM_ASSERT(status != NV_OK);
break;
}
uvm_down_read(&va_space->ats.lock);
// Pages may have been freed or re-allocated after hmm_range_fault() is
// called. So the PTE might point to a different page or nothing. In the
// memory hot-unplug case it is not safe to call page_to_nid() on the
// page as the struct page itself may have been freed. To protect
// against these cases, uvm_ats_invalidate_entry() blocks on va_space
// ATS write lock for concurrent invalidates since va_space ATS lock is
// held for read in this path.
if (!mmu_interval_read_retry(range.notifier, range.notifier_seq))
break;
uvm_up_read(&va_space->ats.lock);
}
if (status == NV_OK) {
for_each_va_block_page_in_region(page_index, vma_region) {
unsigned long pfn = ats_context->prefetch_state.pfns[page_index - vma_region.first];
if (pfn & HMM_PFN_VALID) {
struct page *page = hmm_pfn_to_page(pfn);
if (page_to_nid(page) == ats_context->residency_node)
uvm_page_mask_set(residency_mask, page_index);
ats_context->prefetch_state.first_touch = false;
}
}
uvm_up_read(&va_space->ats.lock);
}
mmu_interval_notifier_remove(range.notifier);
#endif
return status;
}
static void ats_expand_fault_region(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
uvm_ats_fault_context_t *ats_context,
uvm_va_block_region_t max_prefetch_region,
uvm_page_mask_t *faulted_mask)
{
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;
if (uvm_page_mask_empty(faulted_mask))
return;
uvm_perf_prefetch_compute_ats(gpu_va_space->va_space,
faulted_mask,
uvm_va_block_region_from_mask(NULL, faulted_mask),
max_prefetch_region,
residency_mask,
bitmap_tree,
prefetch_mask);
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
if (vma->vm_flags & VM_WRITE)
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
}
static NV_STATUS ats_fault_prefetch(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_fault_context_t *ats_context)
{
NV_STATUS status = NV_OK;
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
return status;
if (uvm_page_mask_empty(faulted_mask))
return status;
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
if (status != NV_OK)
return status;
// Prefetch the entire region if none of the pages are resident on any node
// and if preferred_location is the faulting GPU.
if (ats_context->prefetch_state.has_preferred_location &&
ats_context->prefetch_state.first_touch &&
uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->parent->id)) {
uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
if (vma->vm_flags & VM_WRITE)
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
return status;
}
ats_expand_fault_region(gpu_va_space, vma, ats_context, max_prefetch_region, faulted_mask);
return status;
}
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
@@ -267,6 +471,8 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
ats_batch_select_residency(gpu_va_space, vma, ats_context);
ats_fault_prefetch(gpu_va_space, vma, base, ats_context);
for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
NvU64 start = base + (subregion.first * PAGE_SIZE);
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Copyright (c) 2021-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -54,23 +54,26 @@ bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu)
return uvm_conf_computing_get_mode(gpu->parent) == UVM_GPU_CONF_COMPUTE_MODE_HCC;
}
NV_STATUS uvm_conf_computing_init_parent_gpu(const uvm_parent_gpu_t *parent)
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent)
{
UvmGpuConfComputeMode cc, sys_cc;
uvm_gpu_t *first;
uvm_gpu_t *first_gpu;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
// The Confidential Computing state of the GPU should match that of the
// system.
UVM_ASSERT(uvm_conf_computing_mode_enabled_parent(parent) == g_uvm_global.conf_computing_enabled);
// TODO: Bug 2844714: since we have no routine to traverse parent GPUs,
// find first child GPU and get its parent.
first = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);
if (!first)
return NV_OK;
first_gpu = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);
if (first_gpu == NULL)
return;
sys_cc = uvm_conf_computing_get_mode(first->parent);
cc = uvm_conf_computing_get_mode(parent);
return cc == sys_cc ? NV_OK : NV_ERR_NOT_SUPPORTED;
// All GPUs derive Confidential Computing status from their parent. By
// current policy all parent GPUs have identical Confidential Computing
// status.
UVM_ASSERT(uvm_conf_computing_get_mode(parent) == uvm_conf_computing_get_mode(first_gpu->parent));
}
static void dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,

View File

@@ -60,10 +60,8 @@
// UVM_METHOD_SIZE * 2 * 10 = 80.
#define UVM_CONF_COMPUTING_SIGN_BUF_MAX_SIZE 80
// All GPUs derive confidential computing status from their parent.
// By current policy all parent GPUs have identical confidential
// computing status.
NV_STATUS uvm_conf_computing_init_parent_gpu(const uvm_parent_gpu_t *parent);
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent);
bool uvm_conf_computing_mode_enabled_parent(const uvm_parent_gpu_t *parent);
bool uvm_conf_computing_mode_enabled(const uvm_gpu_t *gpu);
bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu);

View File

@@ -71,11 +71,6 @@ static void uvm_unregister_callbacks(void)
}
}
static void sev_init(const UvmPlatformInfo *platform_info)
{
g_uvm_global.sev_enabled = platform_info->sevEnabled;
}
NV_STATUS uvm_global_init(void)
{
NV_STATUS status;
@@ -124,8 +119,7 @@ NV_STATUS uvm_global_init(void)
uvm_ats_init(&platform_info);
g_uvm_global.num_simulated_devices = 0;
sev_init(&platform_info);
g_uvm_global.conf_computing_enabled = platform_info.confComputingEnabled;
status = uvm_gpu_init();
if (status != NV_OK) {

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -143,11 +143,16 @@ struct uvm_global_struct
struct page *page;
} unload_state;
// AMD Secure Encrypted Virtualization (SEV) status. True if VM has SEV
// enabled. This field is set once during global initialization
// (uvm_global_init), and can be read afterwards without acquiring any
// locks.
bool sev_enabled;
// True if the VM has AMD's SEV, or equivalent HW security extensions such
// as Intel's TDX, enabled. The flag is always false on the host.
//
// This value moves in tandem with that of Confidential Computing in the
// GPU(s) in all supported configurations, so it is used as a proxy for the
// Confidential Computing state.
//
// This field is set once during global initialization (uvm_global_init),
// and can be read afterwards without acquiring any locks.
bool conf_computing_enabled;
};
// Initialize global uvm state

View File

@@ -1099,12 +1099,7 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
return status;
}
status = uvm_conf_computing_init_parent_gpu(parent_gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Confidential computing: %s, GPU %s\n",
nvstatusToString(status), parent_gpu->name);
return status;
}
uvm_conf_computing_check_parent_gpu(parent_gpu);
parent_gpu->pci_dev = gpu_platform_info->pci_dev;
parent_gpu->closest_cpu_numa_node = dev_to_node(&parent_gpu->pci_dev->dev);

View File

@@ -46,6 +46,7 @@
#include "uvm_rb_tree.h"
#include "uvm_perf_prefetch.h"
#include "nv-kthread-q.h"
#include <linux/mmu_notifier.h>
#include "uvm_conf_computing.h"
// Buffer length to store uvm gpu id, RM device name and gpu uuid.
@@ -192,9 +193,9 @@ typedef struct
// Mask of successfully serviced read faults on pages in write_fault_mask.
uvm_page_mask_t reads_serviced_mask;
// Temporary mask used for uvm_page_mask_or_equal. This is used since
// bitmap_or_equal() isn't present in all linux kernel versions.
uvm_page_mask_t tmp_mask;
// Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
// SAM VMA. This is used as input to the prefetcher.
uvm_page_mask_t faulted_mask;
// Client type of the service requestor.
uvm_fault_client_type_t client_type;
@@ -204,6 +205,40 @@ typedef struct
// New residency NUMA node ID of the faulting region.
int residency_node;
struct
{
// True if preferred_location was set on this faulting region.
// UVM_VA_BLOCK_SIZE sized region in the faulting region bound by the
// VMA is is prefetched if preferred_location was set and if first_touch
// is true;
bool has_preferred_location;
// True if the UVM_VA_BLOCK_SIZE sized region isn't resident on any
// node. False if any page in the region is resident somewhere.
bool first_touch;
// Mask of prefetched pages in a UVM_VA_BLOCK_SIZE aligned region of a
// SAM VMA.
uvm_page_mask_t prefetch_pages_mask;
// PFN info of the faulting region
unsigned long pfns[PAGES_PER_UVM_VA_BLOCK];
// Faulting/preferred processor residency mask of the faulting region.
uvm_page_mask_t residency_mask;
#if defined(NV_MMU_INTERVAL_NOTIFIER)
// MMU notifier used to compute residency of this faulting region.
struct mmu_interval_notifier notifier;
#endif
uvm_va_space_t *va_space;
// Prefetch temporary state.
uvm_perf_prefetch_bitmap_tree_t bitmap_tree;
} prefetch_state;
} uvm_ats_fault_context_t;
struct uvm_fault_service_batch_context_struct

View File

@@ -1009,6 +1009,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
NvU64 address = uvm_va_block_cpu_page_address(va_block, page_index);
bool read_duplicate = false;
uvm_processor_id_t new_residency;
const uvm_va_policy_t *policy;
// Ensure that the migratability iterator covers the current address
while (iter.end < address)
@@ -1035,21 +1036,23 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
// If the underlying VMA is gone, skip HMM migrations.
if (uvm_va_block_is_hmm(va_block)) {
status = uvm_hmm_find_vma(&service_context->block_context, address);
status = uvm_hmm_find_vma(service_context->block_context.mm,
&service_context->block_context.hmm.vma,
address);
if (status == NV_ERR_INVALID_ADDRESS)
continue;
UVM_ASSERT(status == NV_OK);
}
service_context->block_context.policy = uvm_va_policy_get(va_block, address);
policy = uvm_va_policy_get(va_block, address);
new_residency = uvm_va_block_select_residency(va_block,
&service_context->block_context,
page_index,
processor,
uvm_fault_access_type_mask_bit(UVM_FAULT_ACCESS_TYPE_PREFETCH),
service_context->block_context.policy,
policy,
&thrashing_hint,
UVM_SERVICE_OPERATION_ACCESS_COUNTERS,
&read_duplicate);
@@ -1094,12 +1097,17 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
if (!uvm_processor_mask_empty(&service_context->resident_processors)) {
while (first_page_index <= last_page_index) {
uvm_page_index_t outer = last_page_index + 1;
const uvm_va_policy_t *policy;
if (uvm_va_block_is_hmm(va_block)) {
status = uvm_hmm_find_policy_vma_and_outer(va_block,
&service_context->block_context,
first_page_index,
&outer);
status = NV_ERR_INVALID_ADDRESS;
if (service_context->block_context.mm) {
status = uvm_hmm_find_policy_vma_and_outer(va_block,
&service_context->block_context.hmm.vma,
first_page_index,
&policy,
&outer);
}
if (status != NV_OK)
break;
}

View File

@@ -343,6 +343,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
bool read_duplicate;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
const uvm_va_policy_t *policy;
UVM_ASSERT(!fault_entry->is_fatal);
@@ -352,7 +353,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
UVM_ASSERT(fault_entry->fault_address >= va_block->start);
UVM_ASSERT(fault_entry->fault_address <= va_block->end);
service_context->block_context.policy = uvm_va_policy_get(va_block, fault_entry->fault_address);
policy = uvm_va_policy_get(va_block, fault_entry->fault_address);
if (service_context->num_retries == 0) {
// notify event to tools/performance heuristics. For now we use a
@@ -361,7 +362,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
uvm_perf_event_notify_gpu_fault(&va_space->perf_events,
va_block,
gpu->id,
service_context->block_context.policy->preferred_location,
policy->preferred_location,
fault_entry,
++non_replayable_faults->batch_id,
false);
@@ -396,7 +397,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
page_index,
gpu->id,
fault_entry->access_type_mask,
service_context->block_context.policy,
policy,
&thrashing_hint,
UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS,
&read_duplicate);
@@ -678,10 +679,17 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
fault_entry->fault_source.channel_id = user_channel->hw_channel_id;
if (!fault_entry->is_fatal) {
status = uvm_va_block_find_create(fault_entry->va_space,
fault_entry->fault_address,
va_block_context,
&va_block);
if (mm) {
status = uvm_va_block_find_create(fault_entry->va_space,
fault_entry->fault_address,
&va_block_context->hmm.vma,
&va_block);
}
else {
status = uvm_va_block_find_create_managed(fault_entry->va_space,
fault_entry->fault_address,
&va_block);
}
if (status == NV_OK)
status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry);
else
@@ -734,8 +742,6 @@ void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
// Differently to replayable faults, we do not batch up and preprocess
// non-replayable faults since getting multiple faults on the same
// memory region is not very likely
//
// TODO: Bug 2103669: [UVM/ATS] Optimize ATS fault servicing
for (i = 0; i < cached_faults; ++i) {
status = service_fault(gpu, &gpu->parent->fault_buffer_info.non_replayable.fault_cache[i]);
if (status != NV_OK)

View File

@@ -1322,6 +1322,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
uvm_fault_buffer_entry_t **ordered_fault_cache = batch_context->ordered_fault_cache;
uvm_service_block_context_t *block_context = &replayable_faults->block_service_context;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
const uvm_va_policy_t *policy;
NvU64 end;
// Check that all uvm_fault_access_type_t values can fit into an NvU8
@@ -1347,13 +1348,13 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
UVM_ASSERT(ordered_fault_cache[first_fault_index]->fault_address <= va_block->end);
if (uvm_va_block_is_hmm(va_block)) {
uvm_hmm_find_policy_end(va_block,
&block_context->block_context,
ordered_fault_cache[first_fault_index]->fault_address,
&end);
policy = uvm_hmm_find_policy_end(va_block,
block_context->block_context.hmm.vma,
ordered_fault_cache[first_fault_index]->fault_address,
&end);
}
else {
block_context->block_context.policy = uvm_va_range_get_policy(va_block->va_range);
policy = uvm_va_range_get_policy(va_block->va_range);
end = va_block->end;
}
@@ -1393,7 +1394,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
update_batch_and_notify_fault(gpu,
batch_context,
va_block,
block_context->block_context.policy->preferred_location,
policy->preferred_location,
current_entry,
is_duplicate);
}
@@ -1473,7 +1474,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
page_index,
gpu->id,
service_access_type_mask,
block_context->block_context.policy,
policy,
&thrashing_hint,
UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS,
&read_duplicate);
@@ -1625,21 +1626,25 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
const uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
const uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
const uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
const uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
uvm_page_mask_t *tmp_mask = &ats_context->tmp_mask;
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
UVM_ASSERT(vma);
ats_context->client_type = UVM_FAULT_CLIENT_TYPE_GPC;
uvm_page_mask_or(tmp_mask, write_fault_mask, read_fault_mask);
uvm_page_mask_or(faulted_mask, write_fault_mask, read_fault_mask);
status = uvm_ats_service_faults(gpu_va_space, vma, base, &batch_context->ats_context);
UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, tmp_mask));
// Remove prefetched pages from the serviced mask since fault servicing
// failures belonging to prefetch pages need to be ignored.
uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, faulted_mask);
if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, tmp_mask)) {
UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, faulted_mask));
if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, faulted_mask)) {
(*block_faults) += (fault_index_end - fault_index_start);
return status;
}
@@ -1867,7 +1872,13 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
va_range_next = uvm_va_space_iter_next(va_range_next, ~0ULL);
}
status = uvm_va_block_find_create_in_range(va_space, va_range, fault_address, va_block_context, &va_block);
if (va_range)
status = uvm_va_block_find_create_in_range(va_space, va_range, fault_address, &va_block);
else if (mm)
status = uvm_hmm_va_block_find_create(va_space, fault_address, &va_block_context->hmm.vma, &va_block);
else
status = NV_ERR_INVALID_ADDRESS;
if (status == NV_OK) {
status = service_fault_batch_block(gpu, va_block, batch_context, fault_index, block_faults);
}

View File

@@ -110,7 +110,20 @@ typedef struct
bool uvm_hmm_is_enabled_system_wide(void)
{
return !uvm_disable_hmm && !g_uvm_global.ats.enabled && uvm_va_space_mm_enabled_system();
if (uvm_disable_hmm)
return false;
if (g_uvm_global.ats.enabled)
return false;
// Confidential Computing and HMM impose mutually exclusive constraints. In
// Confidential Computing the GPU can only access pages resident in vidmem,
// but in HMM pages may be required to be resident in sysmem: file backed
// VMAs, huge pages, etc.
if (g_uvm_global.conf_computing_enabled)
return false;
return uvm_va_space_mm_enabled_system();
}
bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
@@ -127,32 +140,17 @@ static uvm_va_block_t *hmm_va_block_from_node(uvm_range_tree_node_t *node)
return container_of(node, uvm_va_block_t, hmm.node);
}
NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
{
uvm_hmm_va_space_t *hmm_va_space = &va_space->hmm;
struct mm_struct *mm = va_space->va_space_mm.mm;
int ret;
if (!uvm_hmm_is_enabled(va_space))
return NV_OK;
uvm_assert_mmap_lock_locked_write(mm);
uvm_assert_rwsem_locked_write(&va_space->lock);
return;
uvm_range_tree_init(&hmm_va_space->blocks);
uvm_mutex_init(&hmm_va_space->blocks_lock, UVM_LOCK_ORDER_LEAF);
// Initialize MMU interval notifiers for this process.
// This allows mmu_interval_notifier_insert() to be called without holding
// the mmap_lock for write.
// Note: there is no __mmu_notifier_unregister(), this call just allocates
// memory which is attached to the mm_struct and freed when the mm_struct
// is freed.
ret = __mmu_notifier_register(NULL, mm);
if (ret)
return errno_to_nv_status(ret);
return NV_OK;
return;
}
void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
@@ -325,7 +323,6 @@ static bool hmm_invalidate(uvm_va_block_t *va_block,
region = uvm_va_block_region_from_start_end(va_block, start, end);
va_block_context->hmm.vma = NULL;
va_block_context->policy = NULL;
// We only need to unmap GPUs since Linux handles the CPUs.
for_each_gpu_id_in_mask(id, &va_block->mapped) {
@@ -444,11 +441,11 @@ static void hmm_va_block_init(uvm_va_block_t *va_block,
static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
NvU64 addr,
bool allow_unreadable_vma,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct **vma_out,
uvm_va_block_t **va_block_ptr)
{
struct mm_struct *mm = va_space->va_space_mm.mm;
struct vm_area_struct *vma;
struct mm_struct *mm;
struct vm_area_struct *va_block_vma;
uvm_va_block_t *va_block;
NvU64 start, end;
NV_STATUS status;
@@ -457,15 +454,14 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
if (!uvm_hmm_is_enabled(va_space))
return NV_ERR_INVALID_ADDRESS;
UVM_ASSERT(mm);
UVM_ASSERT(!va_block_context || va_block_context->mm == mm);
mm = va_space->va_space_mm.mm;
uvm_assert_mmap_lock_locked(mm);
uvm_assert_rwsem_locked(&va_space->lock);
UVM_ASSERT(PAGE_ALIGNED(addr));
// Note that we have to allow PROT_NONE VMAs so that policies can be set.
vma = find_vma(mm, addr);
if (!uvm_hmm_vma_is_valid(vma, addr, allow_unreadable_vma))
va_block_vma = find_vma(mm, addr);
if (!uvm_hmm_vma_is_valid(va_block_vma, addr, allow_unreadable_vma))
return NV_ERR_INVALID_ADDRESS;
// Since we only hold the va_space read lock, there can be multiple
@@ -517,8 +513,8 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
done:
uvm_mutex_unlock(&va_space->hmm.blocks_lock);
if (va_block_context)
va_block_context->hmm.vma = vma;
if (vma_out)
*vma_out = va_block_vma;
*va_block_ptr = va_block;
return NV_OK;
@@ -532,43 +528,36 @@ err_unlock:
NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct **vma,
uvm_va_block_t **va_block_ptr)
{
return hmm_va_block_find_create(va_space, addr, false, va_block_context, va_block_ptr);
return hmm_va_block_find_create(va_space, addr, false, vma, va_block_ptr);
}
NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
NV_STATUS uvm_hmm_find_vma(struct mm_struct *mm, struct vm_area_struct **vma_out, NvU64 addr)
{
struct mm_struct *mm = va_block_context->mm;
struct vm_area_struct *vma;
if (!mm)
return NV_ERR_INVALID_ADDRESS;
uvm_assert_mmap_lock_locked(mm);
vma = find_vma(mm, addr);
if (!uvm_hmm_vma_is_valid(vma, addr, false))
*vma_out = find_vma(mm, addr);
if (!uvm_hmm_vma_is_valid(*vma_out, addr, false))
return NV_ERR_INVALID_ADDRESS;
va_block_context->hmm.vma = vma;
return NV_OK;
}
bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct *vma,
uvm_va_block_region_t region)
{
uvm_assert_mutex_locked(&va_block->lock);
if (uvm_va_block_is_hmm(va_block)) {
struct vm_area_struct *vma = va_block_context->hmm.vma;
UVM_ASSERT(vma);
UVM_ASSERT(va_block_context->mm == vma->vm_mm);
uvm_assert_mmap_lock_locked(va_block_context->mm);
UVM_ASSERT(va_block->hmm.va_space->va_space_mm.mm == vma->vm_mm);
uvm_assert_mmap_lock_locked(va_block->hmm.va_space->va_space_mm.mm);
UVM_ASSERT(vma->vm_start <= uvm_va_block_region_start(va_block, region));
UVM_ASSERT(vma->vm_end > uvm_va_block_region_end(va_block, region));
}
@@ -619,8 +608,6 @@ static NV_STATUS hmm_migrate_range(uvm_va_block_t *va_block,
uvm_mutex_lock(&va_block->lock);
uvm_for_each_va_policy_in(policy, va_block, start, end, node, region) {
va_block_context->policy = policy;
// Even though UVM_VA_BLOCK_RETRY_LOCKED() may unlock and relock the
// va_block lock, the policy remains valid because we hold the mmap
// lock so munmap can't remove the policy, and the va_space lock so the
@@ -670,7 +657,6 @@ void uvm_hmm_evict_va_blocks(uvm_va_space_t *va_space)
continue;
block_context->hmm.vma = vma;
block_context->policy = &uvm_va_policy_default;
uvm_hmm_va_block_migrate_locked(va_block,
NULL,
block_context,
@@ -1046,11 +1032,7 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
uvm_processor_mask_test(&old_policy->accessed_by, old_policy->preferred_location))
uvm_processor_mask_set(&set_accessed_by_processors, old_policy->preferred_location);
va_block_context->policy = uvm_va_policy_set_preferred_location(va_block,
region,
preferred_location,
old_policy);
if (!va_block_context->policy)
if (!uvm_va_policy_set_preferred_location(va_block, region, preferred_location, old_policy))
return NV_ERR_NO_MEMORY;
// Establish new remote mappings if the old preferred location had
@@ -1109,7 +1091,7 @@ NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
for (addr = base; addr < last_address; addr = va_block->end + 1) {
NvU64 end;
status = hmm_va_block_find_create(va_space, addr, true, va_block_context, &va_block);
status = hmm_va_block_find_create(va_space, addr, true, &va_block_context->hmm.vma, &va_block);
if (status != NV_OK)
break;
@@ -1151,7 +1133,6 @@ static NV_STATUS hmm_set_accessed_by_start_end_locked(uvm_va_block_t *va_block,
if (uvm_va_policy_is_read_duplicate(&node->policy, va_space))
continue;
va_block_context->policy = &node->policy;
region = uvm_va_block_region_from_start_end(va_block,
max(start, node->node.start),
min(end, node->node.end));
@@ -1196,7 +1177,7 @@ NV_STATUS uvm_hmm_set_accessed_by(uvm_va_space_t *va_space,
for (addr = base; addr < last_address; addr = va_block->end + 1) {
NvU64 end;
status = hmm_va_block_find_create(va_space, addr, true, va_block_context, &va_block);
status = hmm_va_block_find_create(va_space, addr, true, &va_block_context->hmm.vma, &va_block);
if (status != NV_OK)
break;
@@ -1249,8 +1230,6 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
uvm_mutex_lock(&va_block->lock);
uvm_for_each_va_policy_node_in(node, va_block, va_block->start, va_block->end) {
block_context->policy = &node->policy;
for_each_id_in_mask(id, &node->policy.accessed_by) {
status = hmm_set_accessed_by_start_end_locked(va_block,
block_context,
@@ -1309,13 +1288,13 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
}
}
void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
unsigned long addr,
NvU64 *endp)
const uvm_va_policy_t *uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
struct vm_area_struct *vma,
unsigned long addr,
NvU64 *endp)
{
struct vm_area_struct *vma = va_block_context->hmm.vma;
const uvm_va_policy_node_t *node;
const uvm_va_policy_t *policy;
NvU64 end = va_block->end;
uvm_assert_mmap_lock_locked(vma->vm_mm);
@@ -1326,40 +1305,45 @@ void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
node = uvm_va_policy_node_find(va_block, addr);
if (node) {
va_block_context->policy = &node->policy;
policy = &node->policy;
if (end > node->node.end)
end = node->node.end;
}
else {
va_block_context->policy = &uvm_va_policy_default;
policy = &uvm_va_policy_default;
}
*endp = end;
return policy;
}
NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct **vma_out,
uvm_page_index_t page_index,
const uvm_va_policy_t **policy,
uvm_page_index_t *outerp)
{
struct vm_area_struct *vma;
unsigned long addr;
NvU64 end;
uvm_page_index_t outer;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
struct mm_struct *mm = va_space->va_space_mm.mm;
if (!mm)
return NV_ERR_INVALID_ADDRESS;
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
uvm_assert_mmap_lock_locked(va_block_context->mm);
uvm_assert_mmap_lock_locked(mm);
uvm_assert_mutex_locked(&va_block->lock);
addr = uvm_va_block_cpu_page_address(va_block, page_index);
vma = vma_lookup(va_block_context->mm, addr);
if (!vma || !(vma->vm_flags & VM_READ))
*vma_out = vma_lookup(mm, addr);
if (!*vma_out || !((*vma_out)->vm_flags & VM_READ))
return NV_ERR_INVALID_ADDRESS;
va_block_context->hmm.vma = vma;
uvm_hmm_find_policy_end(va_block, va_block_context, addr, &end);
*policy = uvm_hmm_find_policy_end(va_block, *vma_out, addr, &end);
outer = uvm_va_block_cpu_page_index(va_block, end) + 1;
if (*outerp > outer)
@@ -1379,8 +1363,6 @@ static NV_STATUS hmm_clear_thrashing_policy(uvm_va_block_t *va_block,
uvm_mutex_lock(&va_block->lock);
uvm_for_each_va_policy_in(policy, va_block, va_block->start, va_block->end, node, region) {
block_context->policy = policy;
// Unmap may split PTEs and require a retry. Needs to be called
// before the pinned pages information is destroyed.
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
@@ -1424,11 +1406,10 @@ NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space)
}
uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct *vma,
const uvm_va_policy_t *policy,
NvU64 address)
{
struct vm_area_struct *vma = va_block_context->hmm.vma;
const uvm_va_policy_t *policy = va_block_context->policy;
NvU64 start, end;
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
@@ -1457,13 +1438,11 @@ uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
}
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct *vma,
NvU64 addr)
{
struct vm_area_struct *vma = va_block_context->hmm.vma;
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
uvm_assert_mmap_lock_locked(va_block_context->mm);
uvm_assert_mmap_lock_locked(va_block->hmm.va_space->va_space_mm.mm);
UVM_ASSERT(vma && addr >= vma->vm_start && addr < vma->vm_end);
if (!(vma->vm_flags & VM_READ))
@@ -2907,8 +2886,6 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma,
if (status != NV_OK)
return status;
UVM_ASSERT(!uvm_va_policy_is_read_duplicate(va_block_context->policy, va_block->hmm.va_space));
status = uvm_va_block_make_resident_copy(va_block,
va_block_retry,
va_block_context,
@@ -3140,7 +3117,7 @@ NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
for (addr = base; addr < last_address; addr = end + 1) {
struct vm_area_struct *vma;
status = hmm_va_block_find_create(va_space, addr, false, va_block_context, &va_block);
status = hmm_va_block_find_create(va_space, addr, false, &va_block_context->hmm.vma, &va_block);
if (status != NV_OK)
return status;
@@ -3232,7 +3209,6 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
uvm_for_each_va_policy_in(policy, va_block, start, end, node, region) {
npages = uvm_va_block_region_num_pages(region);
va_block_context->policy = policy;
if (out_accessed_by_set && uvm_processor_mask_get_count(&policy->accessed_by) > 0)
*out_accessed_by_set = true;

View File

@@ -49,9 +49,7 @@ typedef struct
bool uvm_hmm_is_enabled_system_wide(void);
// Initialize HMM for the given the va_space.
// Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
// and the va_space lock must be held in write mode.
NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);
void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);
// Destroy any HMM state for the given the va_space.
// Locking: va_space lock must be held in write mode.
@@ -90,31 +88,30 @@ typedef struct
// address 'addr' or the VMA does not have at least PROT_READ permission.
// The caller is also responsible for checking that there is no UVM
// va_range covering the given address before calling this function.
// If va_block_context is not NULL, the VMA is cached in
// va_block_context->hmm.vma.
// The VMA is returned in vma_out if it's not NULL.
// Locking: This function must be called with mm retained and locked for
// at least read and the va_space lock at least for read.
NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct **vma_out,
uvm_va_block_t **va_block_ptr);
// Find the VMA for the given address and set va_block_context->hmm.vma.
// Return NV_ERR_INVALID_ADDRESS if va_block_context->mm is NULL or there
// is no VMA associated with the address 'addr' or the VMA does not have at
// least PROT_READ permission.
// Find the VMA for the given address and return it in vma_out. Return
// NV_ERR_INVALID_ADDRESS if mm is NULL or there is no VMA associated with
// the address 'addr' or the VMA does not have at least PROT_READ
// permission.
// Locking: This function must be called with mm retained and locked for
// at least read or mm equal to NULL.
NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr);
NV_STATUS uvm_hmm_find_vma(struct mm_struct *mm, struct vm_area_struct **vma_out, NvU64 addr);
// If va_block is a HMM va_block, check that va_block_context->hmm.vma is
// not NULL and covers the given region. This always returns true and is
// intended to only be used with UVM_ASSERT().
// If va_block is a HMM va_block, check that vma is not NULL and covers the
// given region. This always returns true and is intended to only be used
// with UVM_ASSERT().
// Locking: This function must be called with the va_block lock held and if
// va_block is a HMM block, va_block_context->mm must be retained and
// locked for at least read.
// va_block is a HMM block, va_space->va_space_mm.mm->mmap_lock must be
// retained and locked for at least read.
bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct *vma,
uvm_va_block_region_t region);
// Initialize the HMM portion of the service_context.
@@ -225,31 +222,29 @@ typedef struct
return NV_OK;
}
// This function assigns va_block_context->policy to the policy covering
// the given address 'addr' and assigns the ending address '*endp' to the
// minimum of va_block->end, va_block_context->hmm.vma->vm_end - 1, and the
// ending address of the policy range. Note that va_block_context->hmm.vma
// is expected to be initialized before calling this function.
// Locking: This function must be called with
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
// the va_block lock held.
void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
unsigned long addr,
NvU64 *endp);
// This function returns the policy covering the given address 'addr' and
// assigns the ending address '*endp' to the minimum of va_block->end,
// vma->vm_end - 1, and the ending address of the policy range. Locking:
// This function must be called with vma->vm_mm retained and locked for at
// least read and the va_block and va_space lock held.
const uvm_va_policy_t *uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
struct vm_area_struct *vma,
unsigned long addr,
NvU64 *endp);
// This function finds the VMA for the page index 'page_index' and assigns
// it to va_block_context->vma, sets va_block_context->policy to the policy
// covering the given address, and sets the ending page range '*outerp'
// to the minimum of *outerp, va_block_context->hmm.vma->vm_end - 1, the
// ending address of the policy range, and va_block->end.
// Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise, NV_OK.
// Locking: This function must be called with
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
// the va_block lock held.
// This function finds the VMA for the page index 'page_index' and returns
// it in vma_out which must not be NULL. Returns the policy covering the
// given address, and sets the ending page range '*outerp' to the minimum of
// *outerp, vma->vm_end - 1, the ending address of the policy range, and
// va_block->end.
// Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise sets *vma
// and returns NV_OK.
// Locking: This function must be called with mm retained and locked for at
// least read and the va_block and va_space lock held.
NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct **vma,
uvm_page_index_t page_index,
const uvm_va_policy_t **policy,
uvm_page_index_t *outerp);
// Clear thrashing policy information from all HMM va_blocks.
@@ -258,24 +253,21 @@ typedef struct
// Return the expanded region around 'address' limited to the intersection
// of va_block start/end, vma start/end, and policy start/end.
// va_block_context must not be NULL, va_block_context->hmm.vma must be
// valid (this is usually set by uvm_hmm_va_block_find_create()), and
// va_block_context->policy must be valid.
// Locking: the caller must hold mm->mmap_lock in at least read mode, the
// va_space lock must be held in at least read mode, and the va_block lock
// held.
// Locking: the caller must hold va_space->va_space_mm.mm->mmap_lock in at
// least read mode, the va_space lock must be held in at least read mode,
// and the va_block lock held.
uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct *vma,
const uvm_va_policy_t *policy,
NvU64 address);
// Return the logical protection allowed of a HMM va_block for the page at
// the given address.
// va_block_context must not be NULL and va_block_context->hmm.vma must be
// valid (this is usually set by uvm_hmm_va_block_find_create()).
// Locking: the caller must hold va_block_context->mm mmap_lock in at least
// read mode.
// the given address within the vma which must be valid. This is usually
// obtained from uvm_hmm_va_block_find_create()).
// Locking: the caller must hold va_space->va_space_mm.mm mmap_lock in at
// least read mode.
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct *vma,
NvU64 addr);
// This is called to service a GPU fault.
@@ -288,9 +280,9 @@ typedef struct
uvm_service_block_context_t *service_context);
// This is called to migrate a region within a HMM va_block.
// va_block_context must not be NULL and va_block_context->policy and
// va_block_context->hmm.vma must be valid.
// Locking: the va_block_context->mm must be retained, mmap_lock must be
// va_block_context must not be NULL and va_block_context->hmm.vma
// must be valid.
// Locking: the va_space->va_space_mm.mm must be retained, mmap_lock must be
// locked, and the va_block lock held.
NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
uvm_va_block_retry_t *va_block_retry,
@@ -303,7 +295,7 @@ typedef struct
// UvmMigrate().
//
// va_block_context must not be NULL. The caller is not required to set
// va_block_context->policy or va_block_context->hmm.vma.
// va_block_context->hmm.vma.
//
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked and
// the va_space read lock must be held.
@@ -412,9 +404,8 @@ typedef struct
return false;
}
static NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
static void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
{
return NV_OK;
}
static void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
@@ -440,19 +431,19 @@ typedef struct
static NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct **vma,
uvm_va_block_t **va_block_ptr)
{
return NV_ERR_INVALID_ADDRESS;
}
static NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
static NV_STATUS uvm_hmm_find_vma(struct mm_struct *mm, struct vm_area_struct **vma, NvU64 addr)
{
return NV_OK;
}
static bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct *vma,
uvm_va_block_region_t region)
{
return true;
@@ -533,16 +524,19 @@ typedef struct
return NV_ERR_INVALID_ADDRESS;
}
static void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
unsigned long addr,
NvU64 *endp)
static const uvm_va_policy_t *uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
struct vm_area_struct *vma,
unsigned long addr,
NvU64 *endp)
{
UVM_ASSERT(0);
return NULL;
}
static NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct **vma,
uvm_page_index_t page_index,
const uvm_va_policy_t **policy,
uvm_page_index_t *outerp)
{
return NV_OK;
@@ -554,14 +548,15 @@ typedef struct
}
static uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct *vma,
const uvm_va_policy_t *policy,
NvU64 address)
{
return (uvm_va_block_region_t){};
}
static uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct *vma,
NvU64 addr)
{
return UVM_PROT_NONE;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2022 NVIDIA Corporation
Copyright (c) 2016-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -93,8 +93,9 @@ static bool sysmem_can_be_mapped_on_gpu(uvm_mem_t *sysmem)
{
UVM_ASSERT(uvm_mem_is_sysmem(sysmem));
// If SEV is enabled, only unprotected memory can be mapped
if (g_uvm_global.sev_enabled)
// In Confidential Computing, only unprotected memory can be mapped on the
// GPU
if (g_uvm_global.conf_computing_enabled)
return uvm_mem_is_sysmem_dma(sysmem);
return true;
@@ -737,7 +738,7 @@ static NV_STATUS mem_map_cpu_to_sysmem_kernel(uvm_mem_t *mem)
pages[page_index] = mem_cpu_page(mem, page_index * PAGE_SIZE);
}
if (g_uvm_global.sev_enabled && uvm_mem_is_sysmem_dma(mem))
if (g_uvm_global.conf_computing_enabled && uvm_mem_is_sysmem_dma(mem))
prot = uvm_pgprot_decrypted(PAGE_KERNEL_NOENC);
mem->kernel.cpu_addr = vmap(pages, num_pages, VM_MAP, prot);

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation
Copyright (c) 2016-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -44,10 +44,10 @@ static NvU32 first_page_size(NvU32 page_sizes)
static inline NV_STATUS __alloc_map_sysmem(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **sys_mem)
{
if (g_uvm_global.sev_enabled)
if (g_uvm_global.conf_computing_enabled)
return uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(size, gpu, current->mm, sys_mem);
else
return uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, sys_mem);
return uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, sys_mem);
}
static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
@@ -335,9 +335,6 @@ error:
static bool should_test_page_size(size_t alloc_size, NvU32 page_size)
{
if (g_uvm_global.sev_enabled)
return false;
if (g_uvm_global.num_simulated_devices == 0)
return true;

View File

@@ -223,7 +223,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
NV_STATUS status, tracker_status = NV_OK;
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context, region));
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context->hmm.vma, region));
if (uvm_va_block_is_hmm(va_block)) {
status = uvm_hmm_va_block_migrate_locked(va_block,
@@ -234,9 +234,9 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
}
else {
va_block_context->policy = uvm_va_range_get_policy(va_block->va_range);
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
if (uvm_va_policy_is_read_duplicate(va_block_context->policy, va_space)) {
if (uvm_va_policy_is_read_duplicate(policy, va_space)) {
status = uvm_va_block_make_resident_read_duplicate(va_block,
va_block_retry,
va_block_context,
@@ -371,8 +371,6 @@ static bool va_block_should_do_cpu_preunmap(uvm_va_block_t *va_block,
if (!va_block)
return true;
UVM_ASSERT(va_range_should_do_cpu_preunmap(va_block_context->policy, uvm_va_block_get_va_space(va_block)));
region = uvm_va_block_region_from_start_end(va_block, max(start, va_block->start), min(end, va_block->end));
uvm_mutex_lock(&va_block->lock);
@@ -496,11 +494,9 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
uvm_tracker_t *out_tracker)
{
NvU64 preunmap_range_start = start;
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_range);
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_range));
should_do_cpu_preunmap = should_do_cpu_preunmap && va_range_should_do_cpu_preunmap(va_block_context->policy,
va_range->va_space);
should_do_cpu_preunmap = should_do_cpu_preunmap && va_range_should_do_cpu_preunmap(policy, va_range->va_space);
// Divide migrations into groups of contiguous VA blocks. This is to trigger
// CPU unmaps for that region before the migration starts.
@@ -577,8 +573,6 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
break;
}
va_block_context->policy = uvm_va_range_get_policy(va_range);
// For UVM-Lite GPUs, the CUDA driver may suballocate a single va_range
// into many range groups. For this reason, we iterate over each va_range first
// then through the range groups within.
@@ -653,6 +647,8 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
if (mm)
uvm_assert_mmap_lock_locked(mm);
else if (!first_va_range)
return NV_ERR_INVALID_ADDRESS;
va_block_context = uvm_va_block_context_alloc(mm);
if (!va_block_context)

View File

@@ -672,6 +672,14 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
.finalize_and_map = uvm_migrate_vma_finalize_and_map_helper,
};
// WAR for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
// invalidates on read-only to read-write upgrades
//
// This code path isn't used on GH180 but we need to maintain consistent
// behaviour on systems that do.
if (!vma_is_anonymous(args->vma))
return NV_WARN_NOTHING_TO_DO;
ret = migrate_vma(&uvm_migrate_vma_ops, args->vma, args->start, args->end, args->src, args->dst, state);
if (ret < 0)
return errno_to_nv_status(ret);
@@ -685,6 +693,24 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
if (ret < 0)
return errno_to_nv_status(ret);
// TODO: Bug 2419180: support file-backed pages in migrate_vma, when
// support for it is added to the Linux kernel
//
// A side-effect of migrate_vma_setup() is it calls mmu notifiers even if a
// page can't be migrated (eg. because it's a non-anonymous mapping). We
// need this side-effect for SMMU on GH180 to ensure any cached read-only
// entries are flushed from SMMU on permission upgrade.
//
// TODO: Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
// invalidates on read-only to read-write upgrades
//
// The above WAR doesn't work for HugeTLBfs mappings because
// migrate_vma_setup() will fail in that case.
if (!vma_is_anonymous(args->vma)) {
migrate_vma_finalize(args);
return NV_WARN_NOTHING_TO_DO;
}
uvm_migrate_vma_alloc_and_copy(args, state);
if (state->status == NV_OK) {
migrate_vma_pages(args);
@@ -858,9 +884,13 @@ static NV_STATUS migrate_pageable_vma(struct vm_area_struct *vma,
start = max(start, vma->vm_start);
outer = min(outer, vma->vm_end);
// TODO: Bug 2419180: support file-backed pages in migrate_vma, when
// support for it is added to the Linux kernel
if (!vma_is_anonymous(vma))
// migrate_vma only supports anonymous VMAs. We check for those after
// calling migrate_vma_setup() to workaround Bug 4130089. We need to check
// for HugeTLB VMAs here because migrate_vma_setup() will return a fatal
// error for those.
// TODO: Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
// invalidates on read-only to read-write upgrades
if (is_vm_hugetlb_page(vma))
return NV_WARN_NOTHING_TO_DO;
if (uvm_processor_mask_empty(&va_space->registered_gpus))

View File

@@ -34,8 +34,8 @@ typedef struct
{
uvm_va_space_t *va_space;
struct mm_struct *mm;
unsigned long start;
unsigned long length;
const unsigned long start;
const unsigned long length;
uvm_processor_id_t dst_id;
// dst_node_id may be clobbered by uvm_migrate_pageable().

View File

@@ -906,11 +906,10 @@ error:
// --------------|-------------------------||----------------|----------------
// vidmem | - || vidmem | false
// sysmem | - || sysmem | false
// default | <not set> || vidmem | true (1)
// default | <not set> || vidmem | true
// default | vidmem || vidmem | false
// default | sysmem || sysmem | false
//
// (1) When SEV mode is enabled, the fallback path is disabled.
//
// In SR-IOV heavy the the page tree must be in vidmem, to prevent guest drivers
// from updating GPU page tables without hypervisor knowledge.
@@ -926,28 +925,27 @@ error:
//
static void page_tree_set_location(uvm_page_tree_t *tree, uvm_aperture_t location)
{
bool should_location_be_vidmem;
UVM_ASSERT(tree->gpu != NULL);
UVM_ASSERT_MSG((location == UVM_APERTURE_VID) ||
(location == UVM_APERTURE_SYS) ||
(location == UVM_APERTURE_DEFAULT),
"Invalid location %s (%d)\n", uvm_aperture_string(location), (int)location);
should_location_be_vidmem = uvm_gpu_is_virt_mode_sriov_heavy(tree->gpu)
|| uvm_conf_computing_mode_enabled(tree->gpu);
// The page tree of a "fake" GPU used during page tree testing can be in
// sysmem even if should_location_be_vidmem is true. A fake GPU can be
// identified by having no channel manager.
if ((tree->gpu->channel_manager != NULL) && should_location_be_vidmem)
UVM_ASSERT(location == UVM_APERTURE_VID);
// sysmem in scenarios where a "real" GPU must be in vidmem. Fake GPUs can
// be identified by having no channel manager.
if (tree->gpu->channel_manager != NULL) {
if (uvm_gpu_is_virt_mode_sriov_heavy(tree->gpu))
UVM_ASSERT(location == UVM_APERTURE_VID);
else if (uvm_conf_computing_mode_enabled(tree->gpu))
UVM_ASSERT(location == UVM_APERTURE_VID);
}
if (location == UVM_APERTURE_DEFAULT) {
if (page_table_aperture == UVM_APERTURE_DEFAULT) {
tree->location = UVM_APERTURE_VID;
// See the comment (1) above.
tree->location_sys_fallback = !g_uvm_global.sev_enabled;
tree->location_sys_fallback = true;
}
else {
tree->location = page_table_aperture;

View File

@@ -218,57 +218,11 @@ static void grow_fault_granularity(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
}
}
// Within a block we only allow prefetching to a single processor. Therefore,
// if two processors are accessing non-overlapping regions within the same
// block they won't benefit from prefetching.
//
// TODO: Bug 1778034: [uvm] Explore prefetching to different processors within
// a VA block.
static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t faulted_region,
uvm_page_mask_t *prefetch_pages,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree)
static void init_bitmap_tree_from_region(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_va_block_region_t max_prefetch_region,
const uvm_page_mask_t *resident_mask,
const uvm_page_mask_t *faulted_pages)
{
uvm_page_index_t page_index;
const uvm_page_mask_t *resident_mask = NULL;
const uvm_page_mask_t *thrashing_pages = NULL;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
const uvm_va_policy_t *policy = va_block_context->policy;
uvm_va_block_region_t max_prefetch_region;
NvU32 big_page_size;
uvm_va_block_region_t big_pages_region;
if (!uvm_id_equal(va_block->prefetch_info.last_migration_proc_id, new_residency)) {
va_block->prefetch_info.last_migration_proc_id = new_residency;
va_block->prefetch_info.fault_migrations_to_last_proc = 0;
}
// Compute the expanded region that prefetching is allowed from.
if (uvm_va_block_is_hmm(va_block)) {
max_prefetch_region = uvm_hmm_get_prefetch_region(va_block,
va_block_context,
uvm_va_block_region_start(va_block, faulted_region));
}
else {
max_prefetch_region = uvm_va_block_region_from_block(va_block);
}
uvm_page_mask_zero(prefetch_pages);
if (UVM_ID_IS_CPU(new_residency) || va_block->gpus[uvm_id_gpu_index(new_residency)] != NULL)
resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency);
// If this is a first-touch fault and the destination processor is the
// preferred location, populate the whole max_prefetch_region.
if (uvm_processor_mask_empty(&va_block->resident) &&
uvm_id_equal(new_residency, policy->preferred_location)) {
uvm_page_mask_region_fill(prefetch_pages, max_prefetch_region);
goto done;
}
if (resident_mask)
uvm_page_mask_or(&bitmap_tree->pages, resident_mask, faulted_pages);
else
@@ -277,6 +231,29 @@ static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_blo
// If we are using a subregion of the va_block, align bitmap_tree
uvm_page_mask_shift_right(&bitmap_tree->pages, &bitmap_tree->pages, max_prefetch_region.first);
bitmap_tree->offset = 0;
bitmap_tree->leaf_count = uvm_va_block_region_num_pages(max_prefetch_region);
bitmap_tree->level_count = ilog2(roundup_pow_of_two(bitmap_tree->leaf_count)) + 1;
}
static void update_bitmap_tree_from_va_block(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t max_prefetch_region)
{
NvU32 big_page_size;
uvm_va_block_region_t big_pages_region;
uvm_va_space_t *va_space;
const uvm_page_mask_t *thrashing_pages;
UVM_ASSERT(va_block);
UVM_ASSERT(va_block_context);
va_space = uvm_va_block_get_va_space(va_block);
// Get the big page size for the new residency.
// Assume 64K size if the new residency is the CPU or no GPU va space is
// registered in the current process for this GPU.
@@ -302,13 +279,9 @@ static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_blo
UVM_ASSERT(bitmap_tree->leaf_count <= PAGES_PER_UVM_VA_BLOCK);
uvm_page_mask_shift_left(&bitmap_tree->pages, &bitmap_tree->pages, bitmap_tree->offset);
}
else {
bitmap_tree->offset = 0;
bitmap_tree->leaf_count = uvm_va_block_region_num_pages(max_prefetch_region);
}
bitmap_tree->level_count = ilog2(roundup_pow_of_two(bitmap_tree->leaf_count)) + 1;
bitmap_tree->level_count = ilog2(roundup_pow_of_two(bitmap_tree->leaf_count)) + 1;
}
thrashing_pages = uvm_perf_thrashing_get_thrashing_pages(va_block);
@@ -320,25 +293,99 @@ static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_blo
max_prefetch_region,
faulted_pages,
thrashing_pages);
}
// Do not compute prefetch regions with faults on pages that are thrashing
if (thrashing_pages)
uvm_page_mask_andnot(&va_block_context->scratch_page_mask, faulted_pages, thrashing_pages);
else
uvm_page_mask_copy(&va_block_context->scratch_page_mask, faulted_pages);
static void compute_prefetch_mask(uvm_va_block_region_t faulted_region,
uvm_va_block_region_t max_prefetch_region,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
const uvm_page_mask_t *faulted_pages,
uvm_page_mask_t *out_prefetch_mask)
{
uvm_page_index_t page_index;
// Update the tree using the scratch mask to compute the pages to prefetch
for_each_va_block_page_in_region_mask(page_index, &va_block_context->scratch_page_mask, faulted_region) {
uvm_page_mask_zero(out_prefetch_mask);
// Update the tree using the faulted mask to compute the pages to prefetch.
for_each_va_block_page_in_region_mask(page_index, faulted_pages, faulted_region) {
uvm_va_block_region_t region = compute_prefetch_region(page_index, bitmap_tree, max_prefetch_region);
uvm_page_mask_region_fill(prefetch_pages, region);
uvm_page_mask_region_fill(out_prefetch_mask, region);
// Early out if we have already prefetched until the end of the VA block
if (region.outer == max_prefetch_region.outer)
break;
}
}
// Within a block we only allow prefetching to a single processor. Therefore,
// if two processors are accessing non-overlapping regions within the same
// block they won't benefit from prefetching.
//
// TODO: Bug 1778034: [uvm] Explore prefetching to different processors within
// a VA block.
static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t faulted_region,
uvm_page_mask_t *prefetch_pages,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree)
{
const uvm_page_mask_t *resident_mask = NULL;
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, faulted_region);
uvm_va_block_region_t max_prefetch_region;
const uvm_page_mask_t *thrashing_pages = uvm_perf_thrashing_get_thrashing_pages(va_block);
if (!uvm_id_equal(va_block->prefetch_info.last_migration_proc_id, new_residency)) {
va_block->prefetch_info.last_migration_proc_id = new_residency;
va_block->prefetch_info.fault_migrations_to_last_proc = 0;
}
// Compute the expanded region that prefetching is allowed from.
if (uvm_va_block_is_hmm(va_block)) {
max_prefetch_region = uvm_hmm_get_prefetch_region(va_block,
va_block_context->hmm.vma,
policy,
uvm_va_block_region_start(va_block, faulted_region));
}
else {
max_prefetch_region = uvm_va_block_region_from_block(va_block);
}
uvm_page_mask_zero(prefetch_pages);
if (UVM_ID_IS_CPU(new_residency) || va_block->gpus[uvm_id_gpu_index(new_residency)] != NULL)
resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency);
// If this is a first-touch fault and the destination processor is the
// preferred location, populate the whole max_prefetch_region.
if (uvm_processor_mask_empty(&va_block->resident) &&
uvm_id_equal(new_residency, policy->preferred_location)) {
uvm_page_mask_region_fill(prefetch_pages, max_prefetch_region);
}
else {
init_bitmap_tree_from_region(bitmap_tree, max_prefetch_region, resident_mask, faulted_pages);
update_bitmap_tree_from_va_block(bitmap_tree,
va_block,
va_block_context,
new_residency,
faulted_pages,
max_prefetch_region);
// Do not compute prefetch regions with faults on pages that are thrashing
if (thrashing_pages)
uvm_page_mask_andnot(&va_block_context->scratch_page_mask, faulted_pages, thrashing_pages);
else
uvm_page_mask_copy(&va_block_context->scratch_page_mask, faulted_pages);
compute_prefetch_mask(faulted_region,
max_prefetch_region,
bitmap_tree,
&va_block_context->scratch_page_mask,
prefetch_pages);
}
done:
// Do not prefetch pages that are going to be migrated/populated due to a
// fault
uvm_page_mask_andnot(prefetch_pages, prefetch_pages, faulted_pages);
@@ -364,31 +411,58 @@ done:
return uvm_page_mask_weight(prefetch_pages);
}
void uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t faulted_region,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_perf_prefetch_hint_t *out_hint)
bool uvm_perf_prefetch_enabled(uvm_va_space_t *va_space)
{
if (!g_uvm_perf_prefetch_enable)
return false;
UVM_ASSERT(va_space);
return va_space->test.page_prefetch_enabled;
}
void uvm_perf_prefetch_compute_ats(uvm_va_space_t *va_space,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t faulted_region,
uvm_va_block_region_t max_prefetch_region,
const uvm_page_mask_t *residency_mask,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_page_mask_t *out_prefetch_mask)
{
UVM_ASSERT(faulted_pages);
UVM_ASSERT(bitmap_tree);
UVM_ASSERT(out_prefetch_mask);
uvm_page_mask_zero(out_prefetch_mask);
if (!uvm_perf_prefetch_enabled(va_space))
return;
init_bitmap_tree_from_region(bitmap_tree, max_prefetch_region, residency_mask, faulted_pages);
compute_prefetch_mask(faulted_region, max_prefetch_region, bitmap_tree, faulted_pages, out_prefetch_mask);
}
void uvm_perf_prefetch_get_hint_va_block(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t faulted_region,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_perf_prefetch_hint_t *out_hint)
{
const uvm_va_policy_t *policy = va_block_context->policy;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_page_mask_t *prefetch_pages = &out_hint->prefetch_pages_mask;
NvU32 pending_prefetch_pages;
uvm_assert_rwsem_locked(&va_space->lock);
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, faulted_region));
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context, faulted_region));
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context->hmm.vma, faulted_region));
out_hint->residency = UVM_ID_INVALID;
uvm_page_mask_zero(prefetch_pages);
if (!g_uvm_perf_prefetch_enable)
return;
if (!va_space->test.page_prefetch_enabled)
if (!uvm_perf_prefetch_enabled(va_space))
return;
pending_prefetch_pages = uvm_perf_prefetch_prenotify_fault_migrations(va_block,

View File

@@ -61,21 +61,41 @@ typedef struct
// Global initialization function (no clean up needed).
NV_STATUS uvm_perf_prefetch_init(void);
// Returns whether prefetching is enabled in the VA space.
// va_space cannot be NULL.
bool uvm_perf_prefetch_enabled(uvm_va_space_t *va_space);
// Return the prefetch mask with the pages that may be prefetched in a ATS
// block. ATS block is a system allocated memory block with base aligned to
// UVM_VA_BLOCK_SIZE and a maximum size of UVM_VA_BLOCK_SIZE. The faulted_pages
// mask and faulted_region are the pages being faulted on the given residency.
//
// Only residency_mask can be NULL.
//
// Locking: The caller must hold the va_space lock.
void uvm_perf_prefetch_compute_ats(uvm_va_space_t *va_space,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t faulted_region,
uvm_va_block_region_t max_prefetch_region,
const uvm_page_mask_t *residency_mask,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_page_mask_t *out_prefetch_mask);
// Return a hint with the pages that may be prefetched in the block.
// The faulted_pages mask and faulted_region are the pages being migrated to
// the given residency.
// va_block_context must not be NULL, va_block_context->policy must be valid,
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
// which also means the va_block_context->mm is not NULL, retained, and locked
// for at least read.
// va_block_context must not be NULL, and if the va_block is a HMM
// block, va_block_context->hmm.vma must be valid which also means the
// va_block_context->mm is not NULL, retained, and locked for at least
// read.
// Locking: The caller must hold the va_space lock and va_block lock.
void uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t faulted_region,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_perf_prefetch_hint_t *out_hint);
void uvm_perf_prefetch_get_hint_va_block(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t faulted_region,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_perf_prefetch_hint_t *out_hint);
void uvm_perf_prefetch_bitmap_tree_iter_init(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_page_index_t page_index,

View File

@@ -1095,7 +1095,7 @@ static NV_STATUS unmap_remote_pinned_pages(uvm_va_block_t *va_block,
NV_STATUS tracker_status;
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
uvm_processor_id_t processor_id;
const uvm_va_policy_t *policy = va_block_context->policy;
const uvm_va_policy_t *policy = uvm_va_policy_get(va_block, uvm_va_block_region_start(va_block, region));
uvm_assert_mutex_locked(&va_block->lock);
@@ -1141,10 +1141,9 @@ NV_STATUS uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t *va_bl
{
block_thrashing_info_t *block_thrashing;
uvm_processor_mask_t unmap_processors;
const uvm_va_policy_t *policy = va_block_context->policy;
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, region));
block_thrashing = thrashing_info_get(va_block);
if (!block_thrashing || !block_thrashing->pages)
@@ -1867,8 +1866,6 @@ static void thrashing_unpin_pages(struct work_struct *work)
UVM_ASSERT(uvm_page_mask_test(&block_thrashing->pinned_pages.mask, page_index));
uvm_va_block_context_init(va_block_context, NULL);
va_block_context->policy =
uvm_va_policy_get(va_block, uvm_va_block_cpu_page_address(va_block, page_index));
uvm_perf_thrashing_unmap_remote_pinned_pages_all(va_block,
va_block_context,
@@ -2123,8 +2120,6 @@ NV_STATUS uvm_test_set_page_thrashing_policy(UVM_TEST_SET_PAGE_THRASHING_POLICY_
uvm_va_block_region_t va_block_region = uvm_va_block_region_from_block(va_block);
uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
block_context->policy = uvm_va_range_get_policy(va_range);
uvm_mutex_lock(&va_block->lock);
// Unmap may split PTEs and require a retry. Needs to be called

View File

@@ -103,11 +103,11 @@ void uvm_perf_thrashing_unload(uvm_va_space_t *va_space);
// Destroy the thrashing detection struct for the given block.
void uvm_perf_thrashing_info_destroy(uvm_va_block_t *va_block);
// Unmap remote mappings from all processors on the pinned pages
// described by region and block_thrashing->pinned pages.
// va_block_context must not be NULL and va_block_context->policy must be valid.
// See the comments for uvm_va_block_check_policy_is_valid() in uvm_va_block.h.
// Locking: the va_block lock must be held.
// Unmap remote mappings from all processors on the pinned pages described by
// region and block_thrashing->pinned pages. va_block_context must not be NULL
// and policy for the region must match. See the comments for
// uvm_va_block_check_policy_is_valid() in uvm_va_block.h. Locking: the
// va_block lock must be held.
NV_STATUS uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region);

View File

@@ -3820,18 +3820,11 @@ NV_STATUS uvm_test_evict_chunk(UVM_TEST_EVICT_CHUNK_PARAMS *params, struct file
// For virtual mode, look up and retain the block first so that eviction can
// be started without the VA space lock held.
if (params->eviction_mode == UvmTestEvictModeVirtual) {
uvm_va_block_context_t *block_context;
if (mm)
status = uvm_va_block_find_create(va_space, params->address, NULL, &block);
else
status = uvm_va_block_find_create_managed(va_space, params->address, &block);
block_context = uvm_va_block_context_alloc(mm);
if (!block_context) {
status = NV_ERR_NO_MEMORY;
uvm_va_space_up_read(va_space);
uvm_va_space_mm_release_unlock(va_space, mm);
goto out;
}
status = uvm_va_block_find_create(va_space, params->address, block_context, &block);
uvm_va_block_context_free(block_context);
if (status != NV_OK) {
uvm_va_space_up_read(va_space);
uvm_va_space_mm_or_current_release_unlock(va_space, mm);

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -324,7 +324,7 @@ static NV_STATUS gpu_mem_check(uvm_gpu_t *gpu,
// TODO: Bug 3839176: [UVM][HCC][uvm_test] Update tests that assume GPU
// engines can directly access sysmem
// Skip this test for now. To enable this test under SEV,
// Skip this test for now. To enable this test in Confidential Computing,
// The GPU->CPU CE copy needs to be updated so it uses encryption when
// CC is enabled.
if (uvm_conf_computing_mode_enabled(gpu))
@@ -1223,8 +1223,6 @@ static NV_STATUS test_indirect_peers(uvm_gpu_t *owning_gpu, uvm_gpu_t *accessing
if (!chunks)
return NV_ERR_NO_MEMORY;
UVM_ASSERT(!g_uvm_global.sev_enabled);
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(UVM_CHUNK_SIZE_MAX, current->mm, &verif_mem), out);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, owning_gpu), out);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, accessing_gpu), out);

View File

@@ -160,7 +160,7 @@ static NV_STATUS preferred_location_unmap_remote_pages(uvm_va_block_t *va_block,
NV_STATUS status = NV_OK;
NV_STATUS tracker_status;
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
const uvm_va_policy_t *policy = va_block_context->policy;
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
uvm_processor_id_t preferred_location = policy->preferred_location;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
const uvm_page_mask_t *mapped_mask;
@@ -279,6 +279,9 @@ static NV_STATUS preferred_location_set(uvm_va_space_t *va_space,
return NV_OK;
}
if (!mm)
return NV_ERR_INVALID_ADDRESS;
return uvm_hmm_set_preferred_location(va_space, preferred_location, base, last_address, out_tracker);
}
@@ -445,7 +448,6 @@ NV_STATUS uvm_va_block_set_accessed_by_locked(uvm_va_block_t *va_block,
NV_STATUS tracker_status;
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
status = uvm_va_block_add_mappings(va_block,
va_block_context,
@@ -467,13 +469,13 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
uvm_va_block_region_t region = uvm_va_block_region_from_block(va_block);
NV_STATUS status;
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
// Read duplication takes precedence over SetAccessedBy. Do not add mappings
// if read duplication is enabled.
if (uvm_va_policy_is_read_duplicate(va_block_context->policy, va_space))
if (uvm_va_policy_is_read_duplicate(policy, va_space))
return NV_OK;
status = UVM_VA_BLOCK_LOCK_RETRY(va_block,
@@ -592,8 +594,15 @@ static NV_STATUS accessed_by_set(uvm_va_space_t *va_space,
UVM_ASSERT(va_range_last->node.end >= last_address);
}
else {
// NULL mm case already filtered by uvm_api_range_type_check()
UVM_ASSERT(mm);
UVM_ASSERT(type == UVM_API_RANGE_TYPE_HMM);
status = uvm_hmm_set_accessed_by(va_space, processor_id, set_bit, base, last_address, &local_tracker);
status = uvm_hmm_set_accessed_by(va_space,
processor_id,
set_bit,
base,
last_address,
&local_tracker);
}
done:
@@ -656,7 +665,6 @@ NV_STATUS uvm_va_block_set_read_duplication(uvm_va_block_t *va_block,
// TODO: Bug 3660922: need to implement HMM read duplication support.
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
status = UVM_VA_BLOCK_LOCK_RETRY(va_block, &va_block_retry,
va_block_set_read_duplication_locked(va_block,
@@ -675,7 +683,7 @@ static NV_STATUS va_block_unset_read_duplication_locked(uvm_va_block_t *va_block
uvm_processor_id_t processor_id;
uvm_va_block_region_t block_region = uvm_va_block_region_from_block(va_block);
uvm_page_mask_t *break_read_duplication_pages = &va_block_context->caller_page_mask;
const uvm_va_policy_t *policy = va_block_context->policy;
const uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
uvm_processor_id_t preferred_location = policy->preferred_location;
uvm_processor_mask_t accessed_by = policy->accessed_by;
@@ -757,7 +765,6 @@ NV_STATUS uvm_va_block_unset_read_duplication(uvm_va_block_t *va_block,
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
// Restore all SetAccessedBy mappings
status = UVM_VA_BLOCK_LOCK_RETRY(va_block, &va_block_retry,
@@ -915,7 +922,6 @@ static NV_STATUS system_wide_atomics_set(uvm_va_space_t *va_space, const NvProce
if (va_range->type != UVM_VA_RANGE_TYPE_MANAGED)
continue;
va_block_context->policy = uvm_va_range_get_policy(va_range);
for_each_va_block_in_va_range(va_range, va_block) {
uvm_page_mask_t *non_resident_pages = &va_block_context->caller_page_mask;

View File

@@ -264,7 +264,6 @@ NV_STATUS uvm_range_group_va_range_migrate(uvm_va_range_t *va_range,
return NV_ERR_NO_MEMORY;
uvm_assert_rwsem_locked(&va_range->va_space->lock);
va_block_context->policy = uvm_va_range_get_policy(va_range);
// Iterate over blocks, populating them if necessary
for (i = uvm_va_range_block_index(va_range, start); i <= uvm_va_range_block_index(va_range, end); ++i) {

View File

@@ -2069,7 +2069,11 @@ static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space,
// The RM flavor of the lock is needed to perform ECC checks.
uvm_va_space_down_read_rm(va_space);
status = uvm_va_block_find_create(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), block_context, &block);
if (mm)
status = uvm_va_block_find_create(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block_context->hmm.vma, &block);
else
status = uvm_va_block_find_create_managed(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block);
if (status != NV_OK)
goto unlock_and_exit;

View File

@@ -106,36 +106,6 @@ uvm_va_space_t *uvm_va_block_get_va_space(uvm_va_block_t *va_block)
return va_space;
}
bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
const uvm_va_policy_t *policy,
uvm_va_block_region_t region)
{
uvm_assert_mutex_locked(&va_block->lock);
if (uvm_va_block_is_hmm(va_block)) {
const uvm_va_policy_node_t *node;
if (uvm_va_policy_is_default(policy)) {
// There should only be the default policy within the region.
node = uvm_va_policy_node_iter_first(va_block,
uvm_va_block_region_start(va_block, region),
uvm_va_block_region_end(va_block, region));
UVM_ASSERT(!node);
}
else {
// The policy node should cover the region.
node = uvm_va_policy_node_from_policy(policy);
UVM_ASSERT(node->node.start <= uvm_va_block_region_start(va_block, region));
UVM_ASSERT(node->node.end >= uvm_va_block_region_end(va_block, region));
}
}
else {
UVM_ASSERT(policy == uvm_va_range_get_policy(va_block->va_range));
}
return true;
}
static NvU64 block_gpu_pte_flag_cacheable(uvm_va_block_t *block, uvm_gpu_t *gpu, uvm_processor_id_t resident_id)
{
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
@@ -3697,7 +3667,6 @@ NV_STATUS uvm_va_block_make_resident_copy(uvm_va_block_t *va_block,
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_va_block_is_hmm(va_block) || va_block->va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
resident_mask = block_resident_mask_get_alloc(va_block, dest_id);
if (!resident_mask)
@@ -3944,7 +3913,6 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
// TODO: Bug 3660922: need to implement HMM read duplication support.
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
va_block_context->make_resident.dest_id = dest_id;
va_block_context->make_resident.cause = cause;
@@ -4742,7 +4710,7 @@ static void block_unmap_cpu(uvm_va_block_t *block, uvm_va_block_region_t region,
// Given a mask of mapped pages, returns true if any of the pages in the mask
// are mapped remotely by the given GPU.
static bool block_has_remote_mapping_gpu(uvm_va_block_t *block,
uvm_va_block_context_t *block_context,
uvm_page_mask_t *scratch_page_mask,
uvm_gpu_id_t gpu_id,
const uvm_page_mask_t *mapped_pages)
{
@@ -4764,7 +4732,7 @@ static bool block_has_remote_mapping_gpu(uvm_va_block_t *block,
}
// Remote pages are pages which are mapped but not resident locally
return uvm_page_mask_andnot(&block_context->scratch_page_mask, mapped_pages, &gpu_state->resident);
return uvm_page_mask_andnot(scratch_page_mask, mapped_pages, &gpu_state->resident);
}
// Writes pte_clear_val to the 4k PTEs covered by clear_page_mask. If
@@ -6659,7 +6627,7 @@ static NV_STATUS block_unmap_gpu(uvm_va_block_t *block,
if (status != NV_OK)
return status;
only_local_mappings = !block_has_remote_mapping_gpu(block, block_context, gpu->id, pages_to_unmap);
only_local_mappings = !block_has_remote_mapping_gpu(block, &block_context->scratch_page_mask, gpu->id, pages_to_unmap);
tlb_membar = uvm_hal_downgrade_membar_type(gpu, only_local_mappings);
status = uvm_push_begin_acquire(gpu->channel_manager,
@@ -6794,16 +6762,15 @@ static NV_STATUS uvm_cpu_insert_page(struct vm_area_struct *vma,
}
static uvm_prot_t compute_logical_prot(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct *hmm_vma,
uvm_page_index_t page_index)
{
struct vm_area_struct *vma;
uvm_prot_t logical_prot;
if (uvm_va_block_is_hmm(va_block)) {
NvU64 addr = uvm_va_block_cpu_page_address(va_block, page_index);
logical_prot = uvm_hmm_compute_logical_prot(va_block, va_block_context, addr);
logical_prot = uvm_hmm_compute_logical_prot(va_block, hmm_vma, addr);
}
else {
uvm_va_range_t *va_range = va_block->va_range;
@@ -6815,6 +6782,8 @@ static uvm_prot_t compute_logical_prot(uvm_va_block_t *va_block,
logical_prot = UVM_PROT_NONE;
}
else {
struct vm_area_struct *vma;
vma = uvm_va_range_vma(va_range);
if (!(vma->vm_flags & VM_READ))
@@ -6864,13 +6833,15 @@ static struct page *block_page_get(uvm_va_block_t *block, block_phys_page_t bloc
// with new_prot permissions
// - Guarantee that vm_insert_page is safe to use (vma->vm_mm has a reference
// and mmap_lock is held in at least read mode)
// - For HMM blocks that vma is valid and safe to use, vma->vm_mm has a
// reference and mmap_lock is held in at least read mode
// - Ensure that the struct page corresponding to the physical memory being
// mapped exists
// - Manage the block's residency bitmap
// - Ensure that the block hasn't been killed (block->va_range is present)
// - Update the pte/mapping tracking state on success
static NV_STATUS block_map_cpu_page_to(uvm_va_block_t *block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct *hmm_vma,
uvm_processor_id_t resident_id,
uvm_page_index_t page_index,
uvm_prot_t new_prot)
@@ -6883,7 +6854,7 @@ static NV_STATUS block_map_cpu_page_to(uvm_va_block_t *block,
NvU64 addr;
struct page *page;
UVM_ASSERT(uvm_va_block_is_hmm(block) || va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
UVM_ASSERT((uvm_va_block_is_hmm(block) && hmm_vma) || va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
UVM_ASSERT(new_prot != UVM_PROT_NONE);
UVM_ASSERT(new_prot < UVM_PROT_MAX);
UVM_ASSERT(uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(resident_id)], UVM_ID_CPU));
@@ -6904,7 +6875,7 @@ static NV_STATUS block_map_cpu_page_to(uvm_va_block_t *block,
// Check for existing VMA permissions. They could have been modified after
// the initial mmap by mprotect.
if (new_prot > compute_logical_prot(block, va_block_context, page_index))
if (new_prot > compute_logical_prot(block, hmm_vma, page_index))
return NV_ERR_INVALID_ACCESS_TYPE;
if (uvm_va_block_is_hmm(block)) {
@@ -7001,7 +6972,7 @@ static NV_STATUS block_map_cpu_to(uvm_va_block_t *block,
for_each_va_block_page_in_region_mask(page_index, pages_to_map, region) {
status = block_map_cpu_page_to(block,
block_context,
block_context->hmm.vma,
resident_id,
page_index,
new_prot);
@@ -7234,13 +7205,13 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
const uvm_page_mask_t *pte_mask;
uvm_page_mask_t *running_page_mask = &va_block_context->mapping.map_running_page_mask;
NV_STATUS status;
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
va_block_context->mapping.cause = cause;
UVM_ASSERT(new_prot != UVM_PROT_NONE);
UVM_ASSERT(new_prot < UVM_PROT_MAX);
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
// Mapping is not supported on the eviction path that doesn't hold the VA
// space lock.
@@ -7282,7 +7253,7 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
// Map per resident location so we can more easily detect physically-
// contiguous mappings.
map_get_allowed_destinations(va_block, va_block_context, va_block_context->policy, id, &allowed_destinations);
map_get_allowed_destinations(va_block, va_block_context, policy, id, &allowed_destinations);
for_each_closest_id(resident_id, &allowed_destinations, id, va_space) {
if (UVM_ID_IS_CPU(id)) {
@@ -7588,8 +7559,6 @@ NV_STATUS uvm_va_block_map_mask(uvm_va_block_t *va_block,
NV_STATUS tracker_status;
uvm_processor_id_t id;
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
for_each_id_in_mask(id, map_processor_mask) {
status = uvm_va_block_map(va_block,
va_block_context,
@@ -9573,7 +9542,7 @@ static bool block_region_might_read_duplicate(uvm_va_block_t *va_block,
// could be changed in the future to optimize multiple faults/counters on
// contiguous pages.
static uvm_prot_t compute_new_permission(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct *hmm_vma,
uvm_page_index_t page_index,
uvm_processor_id_t fault_processor_id,
uvm_processor_id_t new_residency,
@@ -9586,7 +9555,7 @@ static uvm_prot_t compute_new_permission(uvm_va_block_t *va_block,
// query_promote: upgrade access privileges to avoid future faults IF
// they don't trigger further revocations.
new_prot = uvm_fault_access_type_to_prot(access_type);
logical_prot = compute_logical_prot(va_block, va_block_context, page_index);
logical_prot = compute_logical_prot(va_block, hmm_vma, page_index);
UVM_ASSERT(logical_prot >= new_prot);
@@ -9729,11 +9698,10 @@ NV_STATUS uvm_va_block_add_mappings_after_migration(uvm_va_block_t *va_block,
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
const uvm_page_mask_t *final_page_mask = map_page_mask;
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
const uvm_va_policy_t *policy = va_block_context->policy;
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
uvm_processor_id_t preferred_location;
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, region));
// Read duplication takes precedence over SetAccessedBy.
//
@@ -9959,8 +9927,6 @@ NV_STATUS uvm_va_block_add_mappings(uvm_va_block_t *va_block,
uvm_range_group_range_iter_t iter;
uvm_prot_t prot_to_map;
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
if (UVM_ID_IS_CPU(processor_id) && !uvm_va_block_is_hmm(va_block)) {
if (!uvm_va_range_vma_check(va_range, va_block_context->mm))
return NV_OK;
@@ -10207,11 +10173,8 @@ uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
{
uvm_processor_id_t id;
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
va_block_context->policy,
uvm_va_block_region_for_page(page_index)));
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block,
va_block_context,
va_block_context->hmm.vma,
uvm_va_block_region_for_page(page_index)));
id = block_select_residency(va_block,
@@ -10255,6 +10218,7 @@ static bool check_access_counters_dont_revoke(uvm_va_block_t *block,
// Update service_context->prefetch_hint, service_context->per_processor_masks,
// and service_context->region.
static void uvm_va_block_get_prefetch_hint(uvm_va_block_t *va_block,
const uvm_va_policy_t *policy,
uvm_service_block_context_t *service_context)
{
uvm_processor_id_t new_residency;
@@ -10265,20 +10229,19 @@ static void uvm_va_block_get_prefetch_hint(uvm_va_block_t *va_block,
if (uvm_processor_mask_get_count(&service_context->resident_processors) == 1) {
uvm_page_index_t page_index;
uvm_page_mask_t *new_residency_mask;
const uvm_va_policy_t *policy = service_context->block_context.policy;
new_residency = uvm_processor_mask_find_first_id(&service_context->resident_processors);
new_residency_mask = &service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency;
// Update prefetch tracking structure with the pages that will migrate
// due to faults
uvm_perf_prefetch_get_hint(va_block,
&service_context->block_context,
new_residency,
new_residency_mask,
service_context->region,
&service_context->prefetch_bitmap_tree,
&service_context->prefetch_hint);
uvm_perf_prefetch_get_hint_va_block(va_block,
&service_context->block_context,
new_residency,
new_residency_mask,
service_context->region,
&service_context->prefetch_bitmap_tree,
&service_context->prefetch_hint);
// Obtain the prefetch hint and give a fake fault access type to the
// prefetched pages
@@ -10463,7 +10426,7 @@ NV_STATUS uvm_va_block_service_finish(uvm_processor_id_t processor_id,
for_each_va_block_page_in_region_mask(page_index, new_residency_mask, service_context->region) {
new_prot = compute_new_permission(va_block,
&service_context->block_context,
service_context->block_context.hmm.vma,
page_index,
processor_id,
new_residency,
@@ -10706,11 +10669,8 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
NV_STATUS status = NV_OK;
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
service_context->block_context.policy,
service_context->region));
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block,
&service_context->block_context,
service_context->block_context.hmm.vma,
service_context->region));
// GPU fault servicing must be done under the VA space read lock. GPU fault
@@ -10724,7 +10684,9 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
else
uvm_assert_rwsem_locked_read(&va_space->lock);
uvm_va_block_get_prefetch_hint(va_block, service_context);
uvm_va_block_get_prefetch_hint(va_block,
uvm_va_policy_get_region(va_block, service_context->region),
service_context);
for_each_id_in_mask(new_residency, &service_context->resident_processors) {
if (uvm_va_block_is_hmm(va_block)) {
@@ -10757,11 +10719,8 @@ NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
uvm_va_range_t *va_range = va_block->va_range;
uvm_prot_t access_prot = uvm_fault_access_type_to_prot(access_type);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
va_block_context->policy,
uvm_va_block_region_for_page(page_index)));
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block,
va_block_context,
va_block_context->hmm.vma,
uvm_va_block_region_for_page(page_index)));
// CPU permissions are checked later by block_map_cpu_page.
@@ -10779,8 +10738,8 @@ NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
// vm_flags at any moment (for example on mprotect) and here we are not
// guaranteed to have vma->vm_mm->mmap_lock. During tests we ensure that
// this scenario does not happen.
if ((va_block_context->mm || uvm_enable_builtin_tests) &&
(access_prot > compute_logical_prot(va_block, va_block_context, page_index)))
if (((va_block->hmm.va_space && va_block->hmm.va_space->va_space_mm.mm) || uvm_enable_builtin_tests) &&
(access_prot > compute_logical_prot(va_block, va_block_context->hmm.vma, page_index)))
return NV_ERR_INVALID_ACCESS_TYPE;
}
@@ -10866,6 +10825,7 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
uvm_perf_thrashing_hint_t thrashing_hint;
uvm_processor_id_t new_residency;
bool read_duplicate;
const uvm_va_policy_t *policy;
uvm_assert_rwsem_locked(&va_space->lock);
@@ -10874,13 +10834,13 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
uvm_assert_mmap_lock_locked(service_context->block_context.mm);
service_context->block_context.policy = uvm_va_policy_get(va_block, fault_addr);
policy = uvm_va_policy_get(va_block, fault_addr);
if (service_context->num_retries == 0) {
// notify event to tools/performance heuristics
uvm_perf_event_notify_cpu_fault(&va_space->perf_events,
va_block,
service_context->block_context.policy->preferred_location,
policy->preferred_location,
fault_addr,
fault_access_type > UVM_FAULT_ACCESS_TYPE_READ,
KSTK_EIP(current));
@@ -10925,7 +10885,7 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
page_index,
UVM_ID_CPU,
uvm_fault_access_type_mask_bit(fault_access_type),
service_context->block_context.policy,
policy,
&thrashing_hint,
UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS,
&read_duplicate);
@@ -11025,7 +10985,6 @@ NV_STATUS uvm_va_block_find(uvm_va_space_t *va_space, NvU64 addr, uvm_va_block_t
NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
uvm_va_range_t *va_range,
NvU64 addr,
uvm_va_block_context_t *va_block_context,
uvm_va_block_t **out_block)
{
size_t index;
@@ -11033,12 +10992,7 @@ NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
if (uvm_enable_builtin_tests && atomic_dec_if_positive(&va_space->test.va_block_allocation_fail_nth) == 0)
return NV_ERR_NO_MEMORY;
if (!va_range) {
if (!va_block_context || !va_block_context->mm)
return NV_ERR_INVALID_ADDRESS;
return uvm_hmm_va_block_find_create(va_space, addr, va_block_context, out_block);
}
UVM_ASSERT(va_range);
UVM_ASSERT(addr >= va_range->node.start);
UVM_ASSERT(addr <= va_range->node.end);
@@ -11052,14 +11006,32 @@ NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
return uvm_va_range_block_create(va_range, index, out_block);
}
NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_block_context_t *va_block_context,
uvm_va_block_t **out_block)
{
uvm_va_range_t *va_range = uvm_va_range_find(va_space, addr);
return uvm_va_block_find_create_in_range(va_space, va_range, addr, va_block_context, out_block);
if (va_range)
return uvm_va_block_find_create_in_range(va_space, va_range, addr, out_block);
else
return NV_ERR_INVALID_ADDRESS;
}
NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
NvU64 addr,
struct vm_area_struct **hmm_vma,
uvm_va_block_t **out_block)
{
uvm_va_range_t *va_range = uvm_va_range_find(va_space, addr);
if (hmm_vma)
*hmm_vma = NULL;
if (va_range)
return uvm_va_block_find_create_in_range(va_space, va_range, addr, out_block);
else
return uvm_hmm_va_block_find_create(va_space, addr, hmm_vma, out_block);
}
// Launch a synchronous, encrypted copy between GPU and CPU.
@@ -11236,8 +11208,6 @@ NV_STATUS uvm_va_block_write_from_cpu(uvm_va_block_t *va_block,
if (UVM_ID_IS_INVALID(proc))
proc = UVM_ID_CPU;
block_context->policy = uvm_va_policy_get(va_block, dst);
// Use make_resident() in all cases to break read-duplication, but
// block_retry can be NULL as if the page is not resident yet we will make
// it resident on the CPU.
@@ -11406,7 +11376,6 @@ static void block_add_eviction_mappings(void *args)
uvm_va_range_t *va_range = va_block->va_range;
NV_STATUS status = NV_OK;
block_context->policy = uvm_va_range_get_policy(va_range);
for_each_id_in_mask(id, &uvm_va_range_get_policy(va_range)->accessed_by) {
status = uvm_va_block_set_accessed_by(va_block, block_context, id);
if (status != NV_OK)
@@ -11557,8 +11526,8 @@ NV_STATUS uvm_va_block_evict_chunks(uvm_va_block_t *va_block,
&accessed_by_set);
}
else {
block_context->policy = uvm_va_range_get_policy(va_block->va_range);
accessed_by_set = uvm_processor_mask_get_count(&block_context->policy->accessed_by) > 0;
const uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
accessed_by_set = uvm_processor_mask_get_count(&policy->accessed_by) > 0;
// TODO: Bug 1765193: make_resident() breaks read-duplication, but it's
// not necessary to do so for eviction. Add a version that unmaps only
@@ -11749,19 +11718,16 @@ NV_STATUS uvm_test_va_block_inject_error(UVM_TEST_VA_BLOCK_INJECT_ERROR_PARAMS *
struct mm_struct *mm;
uvm_va_block_t *va_block;
uvm_va_block_test_t *va_block_test;
uvm_va_block_context_t *block_context = NULL;
NV_STATUS status = NV_OK;
mm = uvm_va_space_mm_or_current_retain_lock(va_space);
uvm_va_space_down_read(va_space);
block_context = uvm_va_block_context_alloc(mm);
if (!block_context) {
status = NV_ERR_NO_MEMORY;
goto out;
}
if (mm)
status = uvm_va_block_find_create(va_space, params->lookup_address, NULL, &va_block);
else
status = uvm_va_block_find_create_managed(va_space, params->lookup_address, &va_block);
status = uvm_va_block_find_create(va_space, params->lookup_address, block_context, &va_block);
if (status != NV_OK)
goto out;
@@ -11801,7 +11767,6 @@ block_unlock:
out:
uvm_va_space_up_read(va_space);
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
uvm_va_block_context_free(block_context);
return status;
}
@@ -11872,7 +11837,11 @@ NV_STATUS uvm_test_change_pte_mapping(UVM_TEST_CHANGE_PTE_MAPPING_PARAMS *params
goto out;
}
status = uvm_va_block_find_create(va_space, params->va, block_context, &block);
if (mm)
status = uvm_va_block_find_create(va_space, params->va, &block_context->hmm.vma, &block);
else
status = uvm_va_block_find_create_managed(va_space, params->va, &block);
if (status != NV_OK)
goto out;
@@ -11899,8 +11868,6 @@ NV_STATUS uvm_test_change_pte_mapping(UVM_TEST_CHANGE_PTE_MAPPING_PARAMS *params
goto out_block;
}
block_context->policy = uvm_va_policy_get(block, params->va);
if (new_prot == UVM_PROT_NONE) {
status = uvm_va_block_unmap(block, block_context, id, region, NULL, &block->tracker);
}

View File

@@ -453,11 +453,12 @@ struct uvm_va_block_struct
NvU16 fault_migrations_to_last_proc;
} prefetch_info;
#if UVM_IS_CONFIG_HMM()
struct
{
#if UVM_IS_CONFIG_HMM()
// The MMU notifier is registered per va_block.
struct mmu_interval_notifier notifier;
#endif
// This is used to serialize migrations between CPU and GPU while
// allowing the va_block lock to be dropped.
@@ -487,7 +488,6 @@ struct uvm_va_block_struct
// Storage node for range tree of va_blocks.
uvm_range_tree_node_t node;
} hmm;
#endif
};
// We define additional per-VA Block fields for testing. When
@@ -678,18 +678,8 @@ static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context,
memset(va_block_context, 0xff, sizeof(*va_block_context));
va_block_context->mm = mm;
#if UVM_IS_CONFIG_HMM()
va_block_context->hmm.vma = NULL;
#endif
}
// Check that a single policy covers the given region for the given va_block.
// This always returns true and is intended to only be used with UVM_ASSERT().
// Locking: the va_block lock must be held.
bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
const uvm_va_policy_t *policy,
uvm_va_block_region_t region);
// TODO: Bug 1766480: Using only page masks instead of a combination of regions
// and page masks could simplify the below APIs and their implementations
// at the cost of having to scan the whole mask for small regions.
@@ -734,15 +724,15 @@ bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
// user memory is guaranteed not to happen. Allocation-retry of GPU page tables
// can still occur though.
//
// va_block_context must not be NULL. This function will set a bit in
// va_block_context->make_resident.pages_changed_residency for each page that
// changed residency (due to a migration or first population) as a result of the
// operation and va_block_context->make_resident.all_involved_processors for
// each processor involved in the copy. This function only sets bits in those
// masks. It is the caller's responsiblity to zero the masks or not first.
//
// va_block_context->policy must also be set by the caller for the given region.
// See the comments for uvm_va_block_check_policy_is_valid().
// va_block_context must not be NULL and policy for the region must
// match. This function will set a bit in
// va_block_context->make_resident.pages_changed_residency for each
// page that changed residency (due to a migration or first
// population) as a result of the operation and
// va_block_context->make_resident.all_involved_processors for each
// processor involved in the copy. This function only sets bits in
// those masks. It is the caller's responsiblity to zero the masks or
// not first.
//
// Notably any status other than NV_OK indicates that the block's lock might
// have been unlocked and relocked.
@@ -839,7 +829,7 @@ void uvm_va_block_make_resident_finish(uvm_va_block_t *va_block,
// pages because the earlier operation can cause a PTE split or merge which is
// assumed by the later operation.
//
// va_block_context must not be NULL and va_block_context->policy must be valid.
// va_block_context must not be NULL and policy for the region must match.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// If allocation-retry was required as part of the operation and was successful,
@@ -896,7 +886,7 @@ NV_STATUS uvm_va_block_map_mask(uvm_va_block_t *va_block,
// pages because the earlier operation can cause a PTE split or merge which is
// assumed by the later operation.
//
// va_block_context must not be NULL. The va_block_context->policy is unused.
// va_block_context must not be NULL.
//
// If allocation-retry was required as part of the operation and was successful,
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the
@@ -929,7 +919,7 @@ NV_STATUS uvm_va_block_unmap_mask(uvm_va_block_t *va_block,
// - Unmap the preferred location's processor from any pages in this region
// which are not resident on the preferred location.
//
// va_block_context must not be NULL and va_block_context->policy must be valid.
// va_block_context must not be NULL and policy for the region must match.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// LOCKING: The caller must hold the VA block lock.
@@ -941,7 +931,7 @@ NV_STATUS uvm_va_block_set_preferred_location_locked(uvm_va_block_t *va_block,
// location and policy. Waits for the operation to complete before returning.
// This function should only be called with managed va_blocks.
//
// va_block_context must not be NULL and va_block_context->policy must be valid.
// va_block_context must not be NULL and policy for the region must match.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// LOCKING: This takes and releases the VA block lock. If va_block_context->mm
@@ -956,7 +946,7 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
// the tracker after all mappings have been started.
// This function can be called with HMM and managed va_blocks.
//
// va_block_context must not be NULL and va_block_context->policy must be valid.
// va_block_context must not be NULL and policy for the region must match.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// LOCKING: The caller must hold the va_block lock and
@@ -970,7 +960,7 @@ NV_STATUS uvm_va_block_set_accessed_by_locked(uvm_va_block_t *va_block,
// Breaks SetAccessedBy and remote mappings
// This function should only be called with managed va_blocks.
//
// va_block_context must not be NULL and va_block_context->policy must be valid.
// va_block_context must not be NULL and policy for the region must match.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// LOCKING: This takes and releases the VA block lock. If va_block_context->mm
@@ -982,7 +972,7 @@ NV_STATUS uvm_va_block_set_read_duplication(uvm_va_block_t *va_block,
// Restores SetAccessedBy mappings
// This function should only be called with managed va_blocks.
//
// va_block_context must not be NULL and va_block_context->policy must be valid.
// va_block_context must not be NULL and policy for the region must match.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// LOCKING: This takes and releases the VA block lock. If va_block_context->mm
@@ -1002,10 +992,9 @@ NV_STATUS uvm_va_block_unset_read_duplication(uvm_va_block_t *va_block,
// NV_ERR_INVALID_OPERATION The access would violate the policies specified
// by UvmPreventMigrationRangeGroups.
//
// va_block_context must not be NULL, va_block_context->policy must be valid,
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
// which also means the va_block_context->mm is not NULL, retained, and locked
// for at least read.
// va_block_context must not be NULL, policy must match, and if the va_block is
// a HMM block, va_block_context->hmm.vma must be valid which also means the
// va_block_context->mm is not NULL, retained, and locked for at least read.
// Locking: the va_block lock must be held.
NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
@@ -1041,7 +1030,7 @@ NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
// different pages because the earlier operation can cause a PTE split or merge
// which is assumed by the later operation.
//
// va_block_context must not be NULL. The va_block_context->policy is unused.
// va_block_context must not be NULL.
//
// If allocation-retry was required as part of the operation and was successful,
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the
@@ -1081,7 +1070,7 @@ NV_STATUS uvm_va_block_revoke_prot_mask(uvm_va_block_t *va_block,
// processor_id, which triggered the migration and should have already been
// mapped).
//
// va_block_context must not be NULL and va_block_context->policy must be valid.
// va_block_context must not be NULL and policy for the region must match.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// This function acquires/waits for the va_block tracker and updates that
@@ -1112,7 +1101,7 @@ NV_STATUS uvm_va_block_add_mappings_after_migration(uvm_va_block_t *va_block,
// Note that this can return NV_ERR_MORE_PROCESSING_REQUIRED just like
// uvm_va_block_map() indicating that the operation needs to be retried.
//
// va_block_context must not be NULL and va_block_context->policy must be valid.
// va_block_context must not be NULL and policy must for the region must match.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// LOCKING: The caller must hold the va block lock. If va_block_context->mm !=
@@ -1134,7 +1123,7 @@ NV_STATUS uvm_va_block_add_gpu_va_space(uvm_va_block_t *va_block, uvm_gpu_va_spa
// If mm != NULL, that mm is used for any CPU mappings which may be created as
// a result of this call. See uvm_va_block_context_t::mm for details.
//
// va_block_context must not be NULL. The va_block_context->policy is unused.
// va_block_context must not be NULL.
//
// LOCKING: The caller must hold the va_block lock. If block_context->mm is not
// NULL, the caller must hold mm->mmap_lock in at least read mode.
@@ -1225,7 +1214,6 @@ NV_STATUS uvm_va_block_split_locked(uvm_va_block_t *existing_va_block,
// - va_space lock must be held in at least read mode
//
// service_context->block_context.mm is ignored and vma->vm_mm is used instead.
// service_context->block_context.policy is set by this function.
//
// Returns NV_ERR_INVALID_ACCESS_TYPE if a CPU mapping to fault_addr cannot be
// accessed, for example because it's within a range group which is non-
@@ -1239,10 +1227,10 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
// (migrations, cache invalidates, etc.) in response to the given service block
// context.
//
// service_context must not be NULL and service_context->block_context.policy
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context must not be NULL and policy for service_context->region must
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context->prefetch_hint is set by this function.
//
// Locking:
@@ -1267,10 +1255,10 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
// Performs population of the destination pages, unmapping and copying source
// pages to new_residency.
//
// service_context must not be NULL and service_context->block_context.policy
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context must not be NULL and policy for service_context->region must
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context->prefetch_hint should be set before calling this function.
//
// Locking:
@@ -1296,10 +1284,10 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
// This updates the va_block residency state and maps the faulting processor_id
// to the new residency (which may be remote).
//
// service_context must not be NULL and service_context->block_context.policy
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context must not be NULL and policy for service_context->region must
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context must be initialized by calling uvm_va_block_service_copy()
// before calling this function.
//
@@ -1428,40 +1416,34 @@ const uvm_page_mask_t *uvm_va_block_map_mask_get(uvm_va_block_t *block, uvm_proc
NV_STATUS uvm_va_block_find(uvm_va_space_t *va_space, NvU64 addr, uvm_va_block_t **out_block);
// Same as uvm_va_block_find except that the block is created if not found.
// If addr is covered by a UVM_VA_RANGE_TYPE_MANAGED va_range, a managed block
// will be created. Otherwise, if addr is not covered by any va_range, HMM is
// enabled in the va_space, and va_block_context and va_block_context->mm are
// non-NULL, then a HMM block will be created and va_block_context->hmm.vma is
// set to the VMA covering 'addr'. The va_block_context->policy field is left
// unchanged.
// In either case, if va_block_context->mm is non-NULL, it must be retained and
// locked in at least read mode. Return values:
// If addr is covered by a UVM_VA_RANGE_TYPE_MANAGED va_range a managed block
// will be created. If addr is not covered by any va_range and HMM is
// enabled in the va_space then a HMM block will be created and hmm_vma is
// set to the VMA covering 'addr'. The va_space_mm must be retained and locked.
// Otherwise hmm_vma is set to NULL.
// Return values:
// NV_ERR_INVALID_ADDRESS addr is not a UVM_VA_RANGE_TYPE_MANAGED va_range nor
// a HMM enabled VMA.
// NV_ERR_NO_MEMORY memory could not be allocated.
NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_block_context_t *va_block_context,
struct vm_area_struct **hmm_vma,
uvm_va_block_t **out_block);
// Same as uvm_va_block_find_create except that va_range lookup was already done
// by the caller. If the supplied va_range is NULL, this function behaves just
// like when the va_range lookup in uvm_va_block_find_create is NULL.
// Same as uvm_va_block_find_create except that only managed va_blocks are
// created if not already present in the VA range. Does not require va_space_mm
// to be locked or retained.
NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_block_t **out_block);
// Same as uvm_va_block_find_create_managed except that va_range lookup was
// already done by the caller. The supplied va_range must not be NULL.
NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
uvm_va_range_t *va_range,
NvU64 addr,
uvm_va_block_context_t *va_block_context,
uvm_va_block_t **out_block);
// Same as uvm_va_block_find_create except that only managed va_blocks are
// created if not already present in the VA range.
static NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_block_t **out_block)
{
return uvm_va_block_find_create(va_space, addr, NULL, out_block);
}
// Look up a chunk backing a specific address within the VA block.
// Returns NULL if none.
uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu_t *gpu, NvU64 address);
@@ -1476,10 +1458,10 @@ uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu
// The caller needs to handle allocation-retry. va_block_retry can be NULL if
// the destination is the CPU.
//
// va_block_context must not be NULL and va_block_context->policy must be valid.
// See the comments for uvm_va_block_check_policy_is_valid().
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// va_block_context must not be NULL and policy for the region must match. See
// the comments for uvm_va_block_check_policy_is_valid(). If va_block is a HMM
// block, va_block_context->hmm.vma must be valid. See the comments for
// uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
//
// LOCKING: The caller must hold the va_block lock. If va_block_context->mm !=
// NULL, va_block_context->mm->mmap_lock must be held in at least
@@ -1497,7 +1479,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
// The [dst, dst + size) range has to fit within a single PAGE_SIZE page.
//
// va_block_context must not be NULL. The caller is not required to set
// va_block_context->policy or va_block_context->hmm.vma.
// va_block_context->hmm.vma.
//
// The caller needs to support allocation-retry of page tables.
//
@@ -1569,7 +1551,7 @@ void uvm_va_block_mark_cpu_dirty(uvm_va_block_t *va_block);
// successful, NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case the
// block's lock was unlocked and relocked.
//
// va_block_context must not be NULL. The va_block_context->policy is unused.
// va_block_context must not be NULL.
//
// LOCKING: The caller must hold the va_block lock.
NV_STATUS uvm_va_block_set_cancel(uvm_va_block_t *va_block, uvm_va_block_context_t *block_context, uvm_gpu_t *gpu);
@@ -1650,12 +1632,18 @@ static uvm_va_block_region_t uvm_va_block_region_from_block(uvm_va_block_t *va_b
return uvm_va_block_region(0, uvm_va_block_num_cpu_pages(va_block));
}
// Create a block region from a va block and page mask. Note that the region
// Create a block region from a va block and page mask. If va_block is NULL, the
// region is assumed to cover the maximum va_block size. Note that the region
// covers the first through the last set bit and may have unset bits in between.
static uvm_va_block_region_t uvm_va_block_region_from_mask(uvm_va_block_t *va_block, const uvm_page_mask_t *page_mask)
{
uvm_va_block_region_t region;
uvm_page_index_t outer = uvm_va_block_num_cpu_pages(va_block);
uvm_page_index_t outer;
if (va_block)
outer = uvm_va_block_num_cpu_pages(va_block);
else
outer = PAGES_PER_UVM_VA_BLOCK;
region.first = find_first_bit(page_mask->bitmap, outer);
if (region.first >= outer) {
@@ -2140,15 +2128,14 @@ uvm_va_block_region_t uvm_va_block_big_page_region_subset(uvm_va_block_t *va_blo
// MAX_BIG_PAGES_PER_UVM_VA_BLOCK is returned.
size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t page_index, NvU32 big_page_size);
// Returns the new residency for a page that faulted or triggered access
// counter notifications. The read_duplicate output parameter indicates if the
// page meets the requirements to be read-duplicated
// va_block_context must not be NULL, va_block_context->policy must be valid,
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
// which also means the va_block_context->mm is not NULL, retained, and locked
// for at least read. See the comments for uvm_va_block_check_policy_is_valid()
// and uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// Locking: the va_block lock must be held.
// Returns the new residency for a page that faulted or triggered access counter
// notifications. The read_duplicate output parameter indicates if the page
// meets the requirements to be read-duplicated va_block_context must not be
// NULL, and if the va_block is a HMM block, va_block_context->hmm.vma must be
// valid which also means the va_block_context->mm is not NULL, retained, and
// locked for at least read. See the comments for
// uvm_va_block_check_policy_is_valid() and uvm_hmm_check_context_vma_is_valid()
// in uvm_hmm.h. Locking: the va_block lock must be held.
uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_index_t page_index,

View File

@@ -29,9 +29,7 @@
#include "uvm_tlb_batch.h"
#include "uvm_forward_decl.h"
#if UVM_IS_CONFIG_HMM()
#include <linux/migrate.h>
#endif
// UVM_VA_BLOCK_BITS is 21, meaning the maximum block size is 2MB. Rationale:
// - 2MB matches the largest Pascal GPU page size so it's a natural fit
@@ -234,9 +232,6 @@ typedef struct
// the mm, such as creating CPU mappings.
struct mm_struct *mm;
const uvm_va_policy_t *policy;
#if UVM_IS_CONFIG_HMM()
struct
{
// These are used for migrate_vma_*(), hmm_range_fault(), and
@@ -257,10 +252,11 @@ typedef struct
// Cached VMA pointer. This is only valid while holding the mmap_lock.
struct vm_area_struct *vma;
#if UVM_IS_CONFIG_HMM()
// Used for migrate_vma_*() to migrate pages to/from GPU/CPU.
struct migrate_vma migrate_vma_args;
} hmm;
#endif
} hmm;
// Convenience buffer for page mask prints
char page_mask_string_buffer[UVM_PAGE_MASK_PRINT_MIN_BUFFER_SIZE];

View File

@@ -54,6 +54,52 @@ const uvm_va_policy_t *uvm_va_policy_get(uvm_va_block_t *va_block, NvU64 addr)
}
}
// HMM va_blocks can have different polices for different regions withing the
// va_block. This function checks the given region is covered by the same policy
// and asserts if the region is covered by different policies.
// This always returns true and is intended to only be used with UVM_ASSERT() to
// avoid calling it on release builds.
// Locking: the va_block lock must be held.
static bool uvm_hmm_va_block_assert_policy_is_valid(uvm_va_block_t *va_block,
const uvm_va_policy_t *policy,
uvm_va_block_region_t region)
{
const uvm_va_policy_node_t *node;
if (uvm_va_policy_is_default(policy)) {
// There should only be the default policy within the region.
node = uvm_va_policy_node_iter_first(va_block,
uvm_va_block_region_start(va_block, region),
uvm_va_block_region_end(va_block, region));
UVM_ASSERT(!node);
}
else {
// The policy node should cover the region.
node = uvm_va_policy_node_from_policy(policy);
UVM_ASSERT(node->node.start <= uvm_va_block_region_start(va_block, region));
UVM_ASSERT(node->node.end >= uvm_va_block_region_end(va_block, region));
}
return true;
}
const uvm_va_policy_t *uvm_va_policy_get_region(uvm_va_block_t *va_block, uvm_va_block_region_t region)
{
uvm_assert_mutex_locked(&va_block->lock);
if (uvm_va_block_is_hmm(va_block)) {
const uvm_va_policy_t *policy;
const uvm_va_policy_node_t *node = uvm_va_policy_node_find(va_block, uvm_va_block_region_start(va_block, region));
policy = node ? &node->policy : &uvm_va_policy_default;
UVM_ASSERT(uvm_hmm_va_block_assert_policy_is_valid(va_block, policy, region));
return policy;
}
else {
return uvm_va_range_get_policy(va_block->va_range);
}
}
#if UVM_IS_CONFIG_HMM()
static struct kmem_cache *g_uvm_va_policy_node_cache __read_mostly;

View File

@@ -100,6 +100,9 @@ bool uvm_va_policy_is_read_duplicate(const uvm_va_policy_t *policy, uvm_va_space
// Locking: The va_block lock must be held.
const uvm_va_policy_t *uvm_va_policy_get(uvm_va_block_t *va_block, NvU64 addr);
// Same as above but asserts the policy covers the whole region
const uvm_va_policy_t *uvm_va_policy_get_region(uvm_va_block_t *va_block, uvm_va_block_region_t region);
// Return a uvm_va_policy_node_t given a uvm_va_policy_t pointer.
static const uvm_va_policy_node_t *uvm_va_policy_node_from_policy(const uvm_va_policy_t *policy)
{

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -376,7 +376,7 @@ NV_STATUS uvm_va_range_create_semaphore_pool(uvm_va_space_t *va_space,
if (status != NV_OK)
goto error;
if (i == 0 && g_uvm_global.sev_enabled)
if (i == 0 && g_uvm_global.conf_computing_enabled)
mem_alloc_params.dma_owner = gpu;
if (attrs.is_cacheable) {
@@ -608,7 +608,6 @@ static NV_STATUS va_range_add_gpu_va_space_managed(uvm_va_range_t *va_range,
uvm_va_block_t *va_block;
uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, mm);
va_block_context->policy = uvm_va_range_get_policy(va_range);
// TODO: Bug 2090378. Consolidate all per-VA block operations within
// uvm_va_block_add_gpu_va_space so we only need to take the VA block
@@ -687,7 +686,6 @@ static void va_range_remove_gpu_va_space_managed(uvm_va_range_t *va_range,
bool should_enable_read_duplicate;
uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, mm);
va_block_context->policy = uvm_va_range_get_policy(va_range);
should_enable_read_duplicate =
uvm_va_range_get_policy(va_range)->read_duplication == UVM_READ_DUPLICATION_ENABLED &&
uvm_va_space_can_read_duplicate(va_space, NULL) != uvm_va_space_can_read_duplicate(va_space, gpu_va_space->gpu);
@@ -769,7 +767,6 @@ static NV_STATUS uvm_va_range_enable_peer_managed(uvm_va_range_t *va_range, uvm_
uvm_va_space_t *va_space = va_range->va_space;
uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, NULL);
va_block_context->policy = uvm_va_range_get_policy(va_range);
for_each_va_block_in_va_range(va_range, va_block) {
// TODO: Bug 1767224: Refactor the uvm_va_block_set_accessed_by logic
@@ -1322,7 +1319,6 @@ static NV_STATUS range_unmap_mask(uvm_va_range_t *va_range,
if (uvm_processor_mask_empty(mask))
return NV_OK;
block_context->policy = uvm_va_range_get_policy(va_range);
for_each_va_block_in_va_range(va_range, block) {
NV_STATUS status;
@@ -1364,7 +1360,6 @@ static NV_STATUS range_map_uvm_lite_gpus(uvm_va_range_t *va_range, uvm_tracker_t
if (uvm_processor_mask_empty(&va_range->uvm_lite_gpus))
return NV_OK;
va_block_context->policy = uvm_va_range_get_policy(va_range);
for_each_va_block_in_va_range(va_range, va_block) {
// UVM-Lite GPUs always map with RWA
@@ -1528,7 +1523,6 @@ NV_STATUS uvm_va_range_set_preferred_location(uvm_va_range_t *va_range,
uvm_processor_mask_copy(&va_range->uvm_lite_gpus, &new_uvm_lite_gpus);
va_block_context = uvm_va_space_block_context(va_space, mm);
va_block_context->policy = uvm_va_range_get_policy(va_range);
for_each_va_block_in_va_range(va_range, va_block) {
uvm_processor_id_t id;
@@ -1610,7 +1604,6 @@ NV_STATUS uvm_va_range_set_accessed_by(uvm_va_range_t *va_range,
uvm_processor_mask_copy(&va_range->uvm_lite_gpus, &new_uvm_lite_gpus);
va_block_context = uvm_va_space_block_context(va_space, mm);
va_block_context->policy = policy;
for_each_va_block_in_va_range(va_range, va_block) {
status = uvm_va_block_set_accessed_by(va_block, va_block_context, processor_id);
@@ -1657,7 +1650,6 @@ NV_STATUS uvm_va_range_set_read_duplication(uvm_va_range_t *va_range, struct mm_
return NV_OK;
va_block_context = uvm_va_space_block_context(va_range->va_space, mm);
va_block_context->policy = uvm_va_range_get_policy(va_range);
for_each_va_block_in_va_range(va_range, va_block) {
NV_STATUS status = uvm_va_block_set_read_duplication(va_block, va_block_context);
@@ -1679,7 +1671,6 @@ NV_STATUS uvm_va_range_unset_read_duplication(uvm_va_range_t *va_range, struct m
return NV_OK;
va_block_context = uvm_va_space_block_context(va_range->va_space, mm);
va_block_context->policy = uvm_va_range_get_policy(va_range);
for_each_va_block_in_va_range(va_range, va_block) {
status = uvm_va_block_unset_read_duplication(va_block, va_block_context);
@@ -1816,7 +1807,7 @@ NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params,
if (params->gpuAttributesCount > UVM_MAX_GPUS)
return NV_ERR_INVALID_ARGUMENT;
if (g_uvm_global.sev_enabled && params->gpuAttributesCount == 0)
if (g_uvm_global.conf_computing_enabled && params->gpuAttributesCount == 0)
return NV_ERR_INVALID_ARGUMENT;
// The mm needs to be locked in order to remove stale HMM va_blocks.

View File

@@ -242,9 +242,7 @@ NV_STATUS uvm_va_space_create(struct address_space *mapping, uvm_va_space_t **va
if (status != NV_OK)
goto fail;
status = uvm_hmm_va_space_initialize(va_space);
if (status != NV_OK)
goto fail;
uvm_hmm_va_space_initialize(va_space);
uvm_va_space_up_write(va_space);
uvm_up_write_mmap_lock(current->mm);
@@ -2226,11 +2224,12 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
// address with mremap() so create a new va_block if needed.
status = uvm_hmm_va_block_find_create(va_space,
fault_addr,
&service_context->block_context,
&service_context->block_context.hmm.vma,
&va_block);
if (status != NV_OK)
break;
UVM_ASSERT(service_context->block_context.hmm.vma == vma);
status = uvm_hmm_migrate_begin(va_block);
if (status != NV_OK)
break;

View File

@@ -274,6 +274,22 @@ NV_STATUS uvm_va_space_mm_register(uvm_va_space_t *va_space)
}
}
if ((UVM_IS_CONFIG_HMM() || UVM_ATS_PREFETCH_SUPPORTED()) && uvm_va_space_pageable_mem_access_supported(va_space)) {
#if UVM_CAN_USE_MMU_NOTIFIERS()
// Initialize MMU interval notifiers for this process. This allows
// mmu_interval_notifier_insert() to be called without holding the
// mmap_lock for write.
// Note: there is no __mmu_notifier_unregister(), this call just
// allocates memory which is attached to the mm_struct and freed
// when the mm_struct is freed.
ret = __mmu_notifier_register(NULL, current->mm);
if (ret)
return errno_to_nv_status(ret);
#else
UVM_ASSERT(0);
#endif
}
return NV_OK;
}

View File

@@ -0,0 +1,33 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __DETECT_SELF_HOSTED_H__
#define __DETECT_SELF_HOSTED_H__
// PCI devIds 0x2340-0x237f are for Self-Hosted Hopper
static inline int pci_devid_is_self_hosted(unsigned short devid)
{
return devid >= 0x2340 && devid <= 0x237f;
}
#endif

View File

@@ -96,6 +96,8 @@
#include "conftest/patches.h"
#include "detect-self-hosted.h"
#define RM_THRESHOLD_TOTAL_IRQ_COUNT 100000
#define RM_THRESHOLD_UNAHNDLED_IRQ_COUNT 99900
#define RM_UNHANDLED_TIMEOUT_US 100000

View File

@@ -209,7 +209,7 @@ NV_STATUS nvUvmInterfaceSessionCreate(uvmGpuSessionHandle *session,
memset(platformInfo, 0, sizeof(*platformInfo));
platformInfo->atsSupported = nv_ats_supported;
platformInfo->sevEnabled = os_cc_enabled;
platformInfo->confComputingEnabled = os_cc_enabled;
status = rm_gpu_ops_create_session(sp, (gpuSessionHandle *)session);