mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-27 11:39:46 +00:00
535.43.09
This commit is contained in:
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.08\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.09\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
|
||||
@@ -566,8 +566,11 @@ typedef struct UvmPlatformInfo_tag
|
||||
// Out: ATS (Address Translation Services) is supported
|
||||
NvBool atsSupported;
|
||||
|
||||
// Out: AMD SEV (Secure Encrypted Virtualization) is enabled
|
||||
NvBool sevEnabled;
|
||||
// Out: True if HW trusted execution, such as AMD's SEV-SNP or Intel's TDX,
|
||||
// is enabled in the VM, indicating that Confidential Computing must be
|
||||
// also enabled in the GPU(s); these two security features are either both
|
||||
// enabled, or both disabled.
|
||||
NvBool confComputingEnabled;
|
||||
} UvmPlatformInfo;
|
||||
|
||||
typedef struct UvmGpuClientInfo_tag
|
||||
|
||||
@@ -6341,6 +6341,21 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_MEMPOLICY_HAS_HOME_NODE" "" "types"
|
||||
;;
|
||||
|
||||
mmu_interval_notifier)
|
||||
#
|
||||
# Determine if mmu_interval_notifier struct is present or not
|
||||
#
|
||||
# Added by commit 99cb252f5 ("mm/mmu_notifier: add an interval tree
|
||||
# notifier") in v5.10 (2019-11-12).
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mmu_notifier.h>
|
||||
struct mmu_interval_notifier interval_notifier;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MMU_INTERVAL_NOTIFIER" "" "types"
|
||||
;;
|
||||
|
||||
# When adding a new conftest entry, please use the correct format for
|
||||
# specifying the relevant upstream Linux kernel commit.
|
||||
#
|
||||
|
||||
@@ -110,5 +110,6 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
|
||||
@@ -44,6 +44,8 @@ void uvm_ats_init(const UvmPlatformInfo *platform_info)
|
||||
|
||||
void uvm_ats_init_va_space(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_init_rwsem(&va_space->ats.lock, UVM_LOCK_ORDER_LEAF);
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
uvm_ats_ibm_init_va_space(va_space);
|
||||
}
|
||||
|
||||
@@ -28,17 +28,32 @@
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "nv_uvm_types.h"
|
||||
#include "uvm_lock.h"
|
||||
|
||||
#include "uvm_ats_sva.h"
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
|
||||
|
||||
// ATS prefetcher uses hmm_range_fault() to query residency information.
|
||||
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
|
||||
// of memory regions while hmm_range_fault() is being called, MMU interval
|
||||
// notifiers are needed.
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
|
||||
#define UVM_ATS_PREFETCH_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_ATS_PREFETCH_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Mask of gpu_va_spaces which are registered for ATS access. The mask is
|
||||
// indexed by gpu->id. This mask is protected by the VA space lock.
|
||||
uvm_processor_mask_t registered_gpu_va_spaces;
|
||||
|
||||
// Protects racing invalidates in the VA space while hmm_range_fault() is
|
||||
// being called in ats_compute_residency_mask().
|
||||
uvm_rw_semaphore_t lock;
|
||||
|
||||
union
|
||||
{
|
||||
uvm_ibm_va_space_t ibm;
|
||||
|
||||
@@ -20,60 +20,19 @@
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_api.h"
|
||||
#include "uvm_tools.h"
|
||||
#include "uvm_va_range.h"
|
||||
#include "uvm_ats.h"
|
||||
#include "uvm_ats_faults.h"
|
||||
#include "uvm_migrate_pageable.h"
|
||||
#include <linux/nodemask.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
|
||||
// TODO: Bug 2103669: Implement a real prefetching policy and remove or adapt
|
||||
// these experimental parameters. These are intended to help guide that policy.
|
||||
static unsigned int uvm_exp_perf_prefetch_ats_order_replayable = 0;
|
||||
module_param(uvm_exp_perf_prefetch_ats_order_replayable, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_replayable,
|
||||
"Max order of pages (2^N) to prefetch on replayable ATS faults");
|
||||
|
||||
static unsigned int uvm_exp_perf_prefetch_ats_order_non_replayable = 0;
|
||||
module_param(uvm_exp_perf_prefetch_ats_order_non_replayable, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_non_replayable,
|
||||
"Max order of pages (2^N) to prefetch on non-replayable ATS faults");
|
||||
|
||||
// Expand the fault region to the naturally-aligned region with order given by
|
||||
// the module parameters, clamped to the vma containing fault_addr (if any).
|
||||
// Note that this means the region contains fault_addr but may not begin at
|
||||
// fault_addr.
|
||||
static void expand_fault_region(struct vm_area_struct *vma,
|
||||
NvU64 start,
|
||||
size_t length,
|
||||
uvm_fault_client_type_t client_type,
|
||||
unsigned long *migrate_start,
|
||||
unsigned long *migrate_length)
|
||||
{
|
||||
unsigned int order;
|
||||
unsigned long outer, aligned_start, aligned_size;
|
||||
|
||||
*migrate_start = start;
|
||||
*migrate_length = length;
|
||||
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_HUB)
|
||||
order = uvm_exp_perf_prefetch_ats_order_non_replayable;
|
||||
else
|
||||
order = uvm_exp_perf_prefetch_ats_order_replayable;
|
||||
|
||||
if (order == 0)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(order < BITS_PER_LONG - PAGE_SHIFT);
|
||||
|
||||
aligned_size = (1UL << order) * PAGE_SIZE;
|
||||
|
||||
aligned_start = start & ~(aligned_size - 1);
|
||||
|
||||
*migrate_start = max(vma->vm_start, aligned_start);
|
||||
outer = min(vma->vm_end, aligned_start + aligned_size);
|
||||
*migrate_length = outer - *migrate_start;
|
||||
}
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
#include <linux/hmm.h>
|
||||
#endif
|
||||
|
||||
static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
@@ -122,6 +81,8 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
.mm = mm,
|
||||
.dst_id = ats_context->residency_id,
|
||||
.dst_node_id = ats_context->residency_node,
|
||||
.start = start,
|
||||
.length = length,
|
||||
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
|
||||
.touch = true,
|
||||
.skip_mapped = true,
|
||||
@@ -132,13 +93,6 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
|
||||
|
||||
expand_fault_region(vma,
|
||||
start,
|
||||
length,
|
||||
ats_context->client_type,
|
||||
&uvm_migrate_args.start,
|
||||
&uvm_migrate_args.length);
|
||||
|
||||
// We are trying to use migrate_vma API in the kernel (if it exists) to
|
||||
// populate and map the faulting region on the GPU. We want to do this only
|
||||
// on the first touch. That is, pages which are not already mapped. So, we
|
||||
@@ -184,6 +138,12 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mempolicy *vma_policy = vma_policy(vma);
|
||||
unsigned short mode;
|
||||
|
||||
ats_context->prefetch_state.has_preferred_location = false;
|
||||
|
||||
// It's safe to read vma_policy since the mmap_lock is held in at least read
|
||||
// mode in this path.
|
||||
uvm_assert_mmap_lock_locked(vma->vm_mm);
|
||||
|
||||
if (!vma_policy)
|
||||
goto done;
|
||||
|
||||
@@ -212,6 +172,9 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
|
||||
else
|
||||
residency = first_node(vma_policy->nodes);
|
||||
}
|
||||
|
||||
if (!nodes_empty(vma_policy->nodes))
|
||||
ats_context->prefetch_state.has_preferred_location = true;
|
||||
}
|
||||
|
||||
// Update gpu if residency is not the faulting gpu.
|
||||
@@ -219,12 +182,253 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
|
||||
gpu = uvm_va_space_find_gpu_with_memory_node_id(gpu_va_space->va_space, residency);
|
||||
|
||||
done:
|
||||
#else
|
||||
ats_context->prefetch_state.has_preferred_location = false;
|
||||
#endif
|
||||
|
||||
ats_context->residency_id = gpu ? gpu->parent->id : UVM_ID_CPU;
|
||||
ats_context->residency_node = residency;
|
||||
}
|
||||
|
||||
static void get_range_in_vma(struct vm_area_struct *vma, NvU64 base, NvU64 *start, NvU64 *end)
|
||||
{
|
||||
*start = max(vma->vm_start, (unsigned long) base);
|
||||
*end = min(vma->vm_end, (unsigned long) (base + UVM_VA_BLOCK_SIZE));
|
||||
}
|
||||
|
||||
static uvm_page_index_t uvm_ats_cpu_page_index(NvU64 base, NvU64 addr)
|
||||
{
|
||||
UVM_ASSERT(addr >= base);
|
||||
UVM_ASSERT(addr <= (base + UVM_VA_BLOCK_SIZE));
|
||||
|
||||
return (addr - base) / PAGE_SIZE;
|
||||
}
|
||||
|
||||
// start and end must be aligned to PAGE_SIZE and must fall within
|
||||
// [base, base + UVM_VA_BLOCK_SIZE]
|
||||
static uvm_va_block_region_t uvm_ats_region_from_start_end(NvU64 start, NvU64 end)
|
||||
{
|
||||
// base can be greater than, less than or equal to the start of a VMA.
|
||||
NvU64 base = UVM_VA_BLOCK_ALIGN_DOWN(start);
|
||||
|
||||
UVM_ASSERT(start < end);
|
||||
UVM_ASSERT(PAGE_ALIGNED(start));
|
||||
UVM_ASSERT(PAGE_ALIGNED(end));
|
||||
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
|
||||
|
||||
return uvm_va_block_region(uvm_ats_cpu_page_index(base, start), uvm_ats_cpu_page_index(base, end));
|
||||
}
|
||||
|
||||
static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma, NvU64 base)
|
||||
{
|
||||
NvU64 start;
|
||||
NvU64 end;
|
||||
|
||||
get_range_in_vma(vma, base, &start, &end);
|
||||
|
||||
return uvm_ats_region_from_start_end(start, end);
|
||||
}
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
|
||||
static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
|
||||
{
|
||||
uvm_ats_fault_context_t *ats_context = container_of(mni, uvm_ats_fault_context_t, prefetch_state.notifier);
|
||||
uvm_va_space_t *va_space = ats_context->prefetch_state.va_space;
|
||||
|
||||
// The following write lock protects against concurrent invalidates while
|
||||
// hmm_range_fault() is being called in ats_compute_residency_mask().
|
||||
uvm_down_write(&va_space->ats.lock);
|
||||
|
||||
mmu_interval_set_seq(mni, cur_seq);
|
||||
|
||||
uvm_up_write(&va_space->ats.lock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool uvm_ats_invalidate_notifier_entry(struct mmu_interval_notifier *mni,
|
||||
const struct mmu_notifier_range *range,
|
||||
unsigned long cur_seq)
|
||||
{
|
||||
UVM_ENTRY_RET(uvm_ats_invalidate_notifier(mni, cur_seq));
|
||||
}
|
||||
|
||||
static const struct mmu_interval_notifier_ops uvm_ats_notifier_ops =
|
||||
{
|
||||
.invalidate = uvm_ats_invalidate_notifier_entry,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
int ret;
|
||||
NvU64 start;
|
||||
NvU64 end;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
struct hmm_range range;
|
||||
uvm_page_index_t page_index;
|
||||
uvm_va_block_region_t vma_region;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
|
||||
uvm_assert_rwsem_locked_read(&va_space->lock);
|
||||
|
||||
ats_context->prefetch_state.first_touch = true;
|
||||
|
||||
uvm_page_mask_zero(residency_mask);
|
||||
|
||||
get_range_in_vma(vma, base, &start, &end);
|
||||
|
||||
vma_region = uvm_ats_region_from_start_end(start, end);
|
||||
|
||||
range.notifier = &ats_context->prefetch_state.notifier;
|
||||
range.start = start;
|
||||
range.end = end;
|
||||
range.hmm_pfns = ats_context->prefetch_state.pfns;
|
||||
range.default_flags = 0;
|
||||
range.pfn_flags_mask = 0;
|
||||
range.dev_private_owner = NULL;
|
||||
|
||||
ats_context->prefetch_state.va_space = va_space;
|
||||
|
||||
// mmu_interval_notifier_insert() will try to acquire mmap_lock for write
|
||||
// and will deadlock since mmap_lock is already held for read in this path.
|
||||
// This is prevented by calling __mmu_notifier_register() during va_space
|
||||
// creation. See the comment in uvm_mmu_notifier_register() for more
|
||||
// details.
|
||||
ret = mmu_interval_notifier_insert(range.notifier, mm, start, end, &uvm_ats_notifier_ops);
|
||||
if (ret)
|
||||
return errno_to_nv_status(ret);
|
||||
|
||||
while (true) {
|
||||
range.notifier_seq = mmu_interval_read_begin(range.notifier);
|
||||
ret = hmm_range_fault(&range);
|
||||
if (ret == -EBUSY)
|
||||
continue;
|
||||
if (ret) {
|
||||
status = errno_to_nv_status(ret);
|
||||
UVM_ASSERT(status != NV_OK);
|
||||
break;
|
||||
}
|
||||
|
||||
uvm_down_read(&va_space->ats.lock);
|
||||
|
||||
// Pages may have been freed or re-allocated after hmm_range_fault() is
|
||||
// called. So the PTE might point to a different page or nothing. In the
|
||||
// memory hot-unplug case it is not safe to call page_to_nid() on the
|
||||
// page as the struct page itself may have been freed. To protect
|
||||
// against these cases, uvm_ats_invalidate_entry() blocks on va_space
|
||||
// ATS write lock for concurrent invalidates since va_space ATS lock is
|
||||
// held for read in this path.
|
||||
if (!mmu_interval_read_retry(range.notifier, range.notifier_seq))
|
||||
break;
|
||||
|
||||
uvm_up_read(&va_space->ats.lock);
|
||||
}
|
||||
|
||||
if (status == NV_OK) {
|
||||
for_each_va_block_page_in_region(page_index, vma_region) {
|
||||
unsigned long pfn = ats_context->prefetch_state.pfns[page_index - vma_region.first];
|
||||
|
||||
if (pfn & HMM_PFN_VALID) {
|
||||
struct page *page = hmm_pfn_to_page(pfn);
|
||||
|
||||
if (page_to_nid(page) == ats_context->residency_node)
|
||||
uvm_page_mask_set(residency_mask, page_index);
|
||||
|
||||
ats_context->prefetch_state.first_touch = false;
|
||||
}
|
||||
}
|
||||
|
||||
uvm_up_read(&va_space->ats.lock);
|
||||
}
|
||||
|
||||
mmu_interval_notifier_remove(range.notifier);
|
||||
|
||||
#endif
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void ats_expand_fault_region(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_ats_fault_context_t *ats_context,
|
||||
uvm_va_block_region_t max_prefetch_region,
|
||||
uvm_page_mask_t *faulted_mask)
|
||||
{
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;
|
||||
|
||||
if (uvm_page_mask_empty(faulted_mask))
|
||||
return;
|
||||
|
||||
uvm_perf_prefetch_compute_ats(gpu_va_space->va_space,
|
||||
faulted_mask,
|
||||
uvm_va_block_region_from_mask(NULL, faulted_mask),
|
||||
max_prefetch_region,
|
||||
residency_mask,
|
||||
bitmap_tree,
|
||||
prefetch_mask);
|
||||
|
||||
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
|
||||
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
|
||||
}
|
||||
|
||||
static NV_STATUS ats_fault_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
|
||||
|
||||
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
|
||||
return status;
|
||||
|
||||
if (uvm_page_mask_empty(faulted_mask))
|
||||
return status;
|
||||
|
||||
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// Prefetch the entire region if none of the pages are resident on any node
|
||||
// and if preferred_location is the faulting GPU.
|
||||
if (ats_context->prefetch_state.has_preferred_location &&
|
||||
ats_context->prefetch_state.first_touch &&
|
||||
uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->parent->id)) {
|
||||
|
||||
uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
|
||||
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
|
||||
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
ats_expand_fault_region(gpu_va_space, vma, ats_context, max_prefetch_region, faulted_mask);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
@@ -267,6 +471,8 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
ats_batch_select_residency(gpu_va_space, vma, ats_context);
|
||||
|
||||
ats_fault_prefetch(gpu_va_space, vma, base, ats_context);
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -54,23 +54,26 @@ bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu)
|
||||
return uvm_conf_computing_get_mode(gpu->parent) == UVM_GPU_CONF_COMPUTE_MODE_HCC;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_init_parent_gpu(const uvm_parent_gpu_t *parent)
|
||||
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent)
|
||||
{
|
||||
UvmGpuConfComputeMode cc, sys_cc;
|
||||
uvm_gpu_t *first;
|
||||
uvm_gpu_t *first_gpu;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
// The Confidential Computing state of the GPU should match that of the
|
||||
// system.
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled_parent(parent) == g_uvm_global.conf_computing_enabled);
|
||||
|
||||
// TODO: Bug 2844714: since we have no routine to traverse parent GPUs,
|
||||
// find first child GPU and get its parent.
|
||||
first = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);
|
||||
if (!first)
|
||||
return NV_OK;
|
||||
first_gpu = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);
|
||||
if (first_gpu == NULL)
|
||||
return;
|
||||
|
||||
sys_cc = uvm_conf_computing_get_mode(first->parent);
|
||||
cc = uvm_conf_computing_get_mode(parent);
|
||||
|
||||
return cc == sys_cc ? NV_OK : NV_ERR_NOT_SUPPORTED;
|
||||
// All GPUs derive Confidential Computing status from their parent. By
|
||||
// current policy all parent GPUs have identical Confidential Computing
|
||||
// status.
|
||||
UVM_ASSERT(uvm_conf_computing_get_mode(parent) == uvm_conf_computing_get_mode(first_gpu->parent));
|
||||
}
|
||||
|
||||
static void dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
|
||||
|
||||
@@ -60,10 +60,8 @@
|
||||
// UVM_METHOD_SIZE * 2 * 10 = 80.
|
||||
#define UVM_CONF_COMPUTING_SIGN_BUF_MAX_SIZE 80
|
||||
|
||||
// All GPUs derive confidential computing status from their parent.
|
||||
// By current policy all parent GPUs have identical confidential
|
||||
// computing status.
|
||||
NV_STATUS uvm_conf_computing_init_parent_gpu(const uvm_parent_gpu_t *parent);
|
||||
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent);
|
||||
|
||||
bool uvm_conf_computing_mode_enabled_parent(const uvm_parent_gpu_t *parent);
|
||||
bool uvm_conf_computing_mode_enabled(const uvm_gpu_t *gpu);
|
||||
bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu);
|
||||
|
||||
@@ -71,11 +71,6 @@ static void uvm_unregister_callbacks(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void sev_init(const UvmPlatformInfo *platform_info)
|
||||
{
|
||||
g_uvm_global.sev_enabled = platform_info->sevEnabled;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_global_init(void)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@@ -124,8 +119,7 @@ NV_STATUS uvm_global_init(void)
|
||||
|
||||
uvm_ats_init(&platform_info);
|
||||
g_uvm_global.num_simulated_devices = 0;
|
||||
|
||||
sev_init(&platform_info);
|
||||
g_uvm_global.conf_computing_enabled = platform_info.confComputingEnabled;
|
||||
|
||||
status = uvm_gpu_init();
|
||||
if (status != NV_OK) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -143,11 +143,16 @@ struct uvm_global_struct
|
||||
struct page *page;
|
||||
} unload_state;
|
||||
|
||||
// AMD Secure Encrypted Virtualization (SEV) status. True if VM has SEV
|
||||
// enabled. This field is set once during global initialization
|
||||
// (uvm_global_init), and can be read afterwards without acquiring any
|
||||
// locks.
|
||||
bool sev_enabled;
|
||||
// True if the VM has AMD's SEV, or equivalent HW security extensions such
|
||||
// as Intel's TDX, enabled. The flag is always false on the host.
|
||||
//
|
||||
// This value moves in tandem with that of Confidential Computing in the
|
||||
// GPU(s) in all supported configurations, so it is used as a proxy for the
|
||||
// Confidential Computing state.
|
||||
//
|
||||
// This field is set once during global initialization (uvm_global_init),
|
||||
// and can be read afterwards without acquiring any locks.
|
||||
bool conf_computing_enabled;
|
||||
};
|
||||
|
||||
// Initialize global uvm state
|
||||
|
||||
@@ -1099,12 +1099,7 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_conf_computing_init_parent_gpu(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Confidential computing: %s, GPU %s\n",
|
||||
nvstatusToString(status), parent_gpu->name);
|
||||
return status;
|
||||
}
|
||||
uvm_conf_computing_check_parent_gpu(parent_gpu);
|
||||
|
||||
parent_gpu->pci_dev = gpu_platform_info->pci_dev;
|
||||
parent_gpu->closest_cpu_numa_node = dev_to_node(&parent_gpu->pci_dev->dev);
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
#include "uvm_rb_tree.h"
|
||||
#include "uvm_perf_prefetch.h"
|
||||
#include "nv-kthread-q.h"
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include "uvm_conf_computing.h"
|
||||
|
||||
// Buffer length to store uvm gpu id, RM device name and gpu uuid.
|
||||
@@ -192,9 +193,9 @@ typedef struct
|
||||
// Mask of successfully serviced read faults on pages in write_fault_mask.
|
||||
uvm_page_mask_t reads_serviced_mask;
|
||||
|
||||
// Temporary mask used for uvm_page_mask_or_equal. This is used since
|
||||
// bitmap_or_equal() isn't present in all linux kernel versions.
|
||||
uvm_page_mask_t tmp_mask;
|
||||
// Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
|
||||
// SAM VMA. This is used as input to the prefetcher.
|
||||
uvm_page_mask_t faulted_mask;
|
||||
|
||||
// Client type of the service requestor.
|
||||
uvm_fault_client_type_t client_type;
|
||||
@@ -204,6 +205,40 @@ typedef struct
|
||||
|
||||
// New residency NUMA node ID of the faulting region.
|
||||
int residency_node;
|
||||
|
||||
struct
|
||||
{
|
||||
// True if preferred_location was set on this faulting region.
|
||||
// UVM_VA_BLOCK_SIZE sized region in the faulting region bound by the
|
||||
// VMA is is prefetched if preferred_location was set and if first_touch
|
||||
// is true;
|
||||
bool has_preferred_location;
|
||||
|
||||
// True if the UVM_VA_BLOCK_SIZE sized region isn't resident on any
|
||||
// node. False if any page in the region is resident somewhere.
|
||||
bool first_touch;
|
||||
|
||||
// Mask of prefetched pages in a UVM_VA_BLOCK_SIZE aligned region of a
|
||||
// SAM VMA.
|
||||
uvm_page_mask_t prefetch_pages_mask;
|
||||
|
||||
// PFN info of the faulting region
|
||||
unsigned long pfns[PAGES_PER_UVM_VA_BLOCK];
|
||||
|
||||
// Faulting/preferred processor residency mask of the faulting region.
|
||||
uvm_page_mask_t residency_mask;
|
||||
|
||||
#if defined(NV_MMU_INTERVAL_NOTIFIER)
|
||||
// MMU notifier used to compute residency of this faulting region.
|
||||
struct mmu_interval_notifier notifier;
|
||||
#endif
|
||||
|
||||
uvm_va_space_t *va_space;
|
||||
|
||||
// Prefetch temporary state.
|
||||
uvm_perf_prefetch_bitmap_tree_t bitmap_tree;
|
||||
} prefetch_state;
|
||||
|
||||
} uvm_ats_fault_context_t;
|
||||
|
||||
struct uvm_fault_service_batch_context_struct
|
||||
|
||||
@@ -1009,6 +1009,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
NvU64 address = uvm_va_block_cpu_page_address(va_block, page_index);
|
||||
bool read_duplicate = false;
|
||||
uvm_processor_id_t new_residency;
|
||||
const uvm_va_policy_t *policy;
|
||||
|
||||
// Ensure that the migratability iterator covers the current address
|
||||
while (iter.end < address)
|
||||
@@ -1035,21 +1036,23 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
|
||||
// If the underlying VMA is gone, skip HMM migrations.
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = uvm_hmm_find_vma(&service_context->block_context, address);
|
||||
status = uvm_hmm_find_vma(service_context->block_context.mm,
|
||||
&service_context->block_context.hmm.vma,
|
||||
address);
|
||||
if (status == NV_ERR_INVALID_ADDRESS)
|
||||
continue;
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
service_context->block_context.policy = uvm_va_policy_get(va_block, address);
|
||||
policy = uvm_va_policy_get(va_block, address);
|
||||
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&service_context->block_context,
|
||||
page_index,
|
||||
processor,
|
||||
uvm_fault_access_type_mask_bit(UVM_FAULT_ACCESS_TYPE_PREFETCH),
|
||||
service_context->block_context.policy,
|
||||
policy,
|
||||
&thrashing_hint,
|
||||
UVM_SERVICE_OPERATION_ACCESS_COUNTERS,
|
||||
&read_duplicate);
|
||||
@@ -1094,12 +1097,17 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
if (!uvm_processor_mask_empty(&service_context->resident_processors)) {
|
||||
while (first_page_index <= last_page_index) {
|
||||
uvm_page_index_t outer = last_page_index + 1;
|
||||
const uvm_va_policy_t *policy;
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = uvm_hmm_find_policy_vma_and_outer(va_block,
|
||||
&service_context->block_context,
|
||||
first_page_index,
|
||||
&outer);
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
if (service_context->block_context.mm) {
|
||||
status = uvm_hmm_find_policy_vma_and_outer(va_block,
|
||||
&service_context->block_context.hmm.vma,
|
||||
first_page_index,
|
||||
&policy,
|
||||
&outer);
|
||||
}
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -343,6 +343,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
bool read_duplicate;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
const uvm_va_policy_t *policy;
|
||||
|
||||
UVM_ASSERT(!fault_entry->is_fatal);
|
||||
|
||||
@@ -352,7 +353,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
UVM_ASSERT(fault_entry->fault_address >= va_block->start);
|
||||
UVM_ASSERT(fault_entry->fault_address <= va_block->end);
|
||||
|
||||
service_context->block_context.policy = uvm_va_policy_get(va_block, fault_entry->fault_address);
|
||||
policy = uvm_va_policy_get(va_block, fault_entry->fault_address);
|
||||
|
||||
if (service_context->num_retries == 0) {
|
||||
// notify event to tools/performance heuristics. For now we use a
|
||||
@@ -361,7 +362,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_perf_event_notify_gpu_fault(&va_space->perf_events,
|
||||
va_block,
|
||||
gpu->id,
|
||||
service_context->block_context.policy->preferred_location,
|
||||
policy->preferred_location,
|
||||
fault_entry,
|
||||
++non_replayable_faults->batch_id,
|
||||
false);
|
||||
@@ -396,7 +397,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
page_index,
|
||||
gpu->id,
|
||||
fault_entry->access_type_mask,
|
||||
service_context->block_context.policy,
|
||||
policy,
|
||||
&thrashing_hint,
|
||||
UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS,
|
||||
&read_duplicate);
|
||||
@@ -678,10 +679,17 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
fault_entry->fault_source.channel_id = user_channel->hw_channel_id;
|
||||
|
||||
if (!fault_entry->is_fatal) {
|
||||
status = uvm_va_block_find_create(fault_entry->va_space,
|
||||
fault_entry->fault_address,
|
||||
va_block_context,
|
||||
&va_block);
|
||||
if (mm) {
|
||||
status = uvm_va_block_find_create(fault_entry->va_space,
|
||||
fault_entry->fault_address,
|
||||
&va_block_context->hmm.vma,
|
||||
&va_block);
|
||||
}
|
||||
else {
|
||||
status = uvm_va_block_find_create_managed(fault_entry->va_space,
|
||||
fault_entry->fault_address,
|
||||
&va_block);
|
||||
}
|
||||
if (status == NV_OK)
|
||||
status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry);
|
||||
else
|
||||
@@ -734,8 +742,6 @@ void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
|
||||
// Differently to replayable faults, we do not batch up and preprocess
|
||||
// non-replayable faults since getting multiple faults on the same
|
||||
// memory region is not very likely
|
||||
//
|
||||
// TODO: Bug 2103669: [UVM/ATS] Optimize ATS fault servicing
|
||||
for (i = 0; i < cached_faults; ++i) {
|
||||
status = service_fault(gpu, &gpu->parent->fault_buffer_info.non_replayable.fault_cache[i]);
|
||||
if (status != NV_OK)
|
||||
|
||||
@@ -1322,6 +1322,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_fault_buffer_entry_t **ordered_fault_cache = batch_context->ordered_fault_cache;
|
||||
uvm_service_block_context_t *block_context = &replayable_faults->block_service_context;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
const uvm_va_policy_t *policy;
|
||||
NvU64 end;
|
||||
|
||||
// Check that all uvm_fault_access_type_t values can fit into an NvU8
|
||||
@@ -1347,13 +1348,13 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
UVM_ASSERT(ordered_fault_cache[first_fault_index]->fault_address <= va_block->end);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_hmm_find_policy_end(va_block,
|
||||
&block_context->block_context,
|
||||
ordered_fault_cache[first_fault_index]->fault_address,
|
||||
&end);
|
||||
policy = uvm_hmm_find_policy_end(va_block,
|
||||
block_context->block_context.hmm.vma,
|
||||
ordered_fault_cache[first_fault_index]->fault_address,
|
||||
&end);
|
||||
}
|
||||
else {
|
||||
block_context->block_context.policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
end = va_block->end;
|
||||
}
|
||||
|
||||
@@ -1393,7 +1394,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
update_batch_and_notify_fault(gpu,
|
||||
batch_context,
|
||||
va_block,
|
||||
block_context->block_context.policy->preferred_location,
|
||||
policy->preferred_location,
|
||||
current_entry,
|
||||
is_duplicate);
|
||||
}
|
||||
@@ -1473,7 +1474,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
page_index,
|
||||
gpu->id,
|
||||
service_access_type_mask,
|
||||
block_context->block_context.policy,
|
||||
policy,
|
||||
&thrashing_hint,
|
||||
UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS,
|
||||
&read_duplicate);
|
||||
@@ -1625,21 +1626,25 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
|
||||
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
|
||||
const uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
const uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
const uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
const uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
|
||||
uvm_page_mask_t *tmp_mask = &ats_context->tmp_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
|
||||
ats_context->client_type = UVM_FAULT_CLIENT_TYPE_GPC;
|
||||
|
||||
uvm_page_mask_or(tmp_mask, write_fault_mask, read_fault_mask);
|
||||
uvm_page_mask_or(faulted_mask, write_fault_mask, read_fault_mask);
|
||||
|
||||
status = uvm_ats_service_faults(gpu_va_space, vma, base, &batch_context->ats_context);
|
||||
|
||||
UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, tmp_mask));
|
||||
// Remove prefetched pages from the serviced mask since fault servicing
|
||||
// failures belonging to prefetch pages need to be ignored.
|
||||
uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, faulted_mask);
|
||||
|
||||
if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, tmp_mask)) {
|
||||
UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, faulted_mask));
|
||||
|
||||
if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, faulted_mask)) {
|
||||
(*block_faults) += (fault_index_end - fault_index_start);
|
||||
return status;
|
||||
}
|
||||
@@ -1867,7 +1872,13 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
va_range_next = uvm_va_space_iter_next(va_range_next, ~0ULL);
|
||||
}
|
||||
|
||||
status = uvm_va_block_find_create_in_range(va_space, va_range, fault_address, va_block_context, &va_block);
|
||||
if (va_range)
|
||||
status = uvm_va_block_find_create_in_range(va_space, va_range, fault_address, &va_block);
|
||||
else if (mm)
|
||||
status = uvm_hmm_va_block_find_create(va_space, fault_address, &va_block_context->hmm.vma, &va_block);
|
||||
else
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
if (status == NV_OK) {
|
||||
status = service_fault_batch_block(gpu, va_block, batch_context, fault_index, block_faults);
|
||||
}
|
||||
|
||||
@@ -110,7 +110,20 @@ typedef struct
|
||||
|
||||
bool uvm_hmm_is_enabled_system_wide(void)
|
||||
{
|
||||
return !uvm_disable_hmm && !g_uvm_global.ats.enabled && uvm_va_space_mm_enabled_system();
|
||||
if (uvm_disable_hmm)
|
||||
return false;
|
||||
|
||||
if (g_uvm_global.ats.enabled)
|
||||
return false;
|
||||
|
||||
// Confidential Computing and HMM impose mutually exclusive constraints. In
|
||||
// Confidential Computing the GPU can only access pages resident in vidmem,
|
||||
// but in HMM pages may be required to be resident in sysmem: file backed
|
||||
// VMAs, huge pages, etc.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return false;
|
||||
|
||||
return uvm_va_space_mm_enabled_system();
|
||||
}
|
||||
|
||||
bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
|
||||
@@ -127,32 +140,17 @@ static uvm_va_block_t *hmm_va_block_from_node(uvm_range_tree_node_t *node)
|
||||
return container_of(node, uvm_va_block_t, hmm.node);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
|
||||
void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_hmm_va_space_t *hmm_va_space = &va_space->hmm;
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
int ret;
|
||||
|
||||
if (!uvm_hmm_is_enabled(va_space))
|
||||
return NV_OK;
|
||||
|
||||
uvm_assert_mmap_lock_locked_write(mm);
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
return;
|
||||
|
||||
uvm_range_tree_init(&hmm_va_space->blocks);
|
||||
uvm_mutex_init(&hmm_va_space->blocks_lock, UVM_LOCK_ORDER_LEAF);
|
||||
|
||||
// Initialize MMU interval notifiers for this process.
|
||||
// This allows mmu_interval_notifier_insert() to be called without holding
|
||||
// the mmap_lock for write.
|
||||
// Note: there is no __mmu_notifier_unregister(), this call just allocates
|
||||
// memory which is attached to the mm_struct and freed when the mm_struct
|
||||
// is freed.
|
||||
ret = __mmu_notifier_register(NULL, mm);
|
||||
if (ret)
|
||||
return errno_to_nv_status(ret);
|
||||
|
||||
return NV_OK;
|
||||
return;
|
||||
}
|
||||
|
||||
void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
@@ -325,7 +323,6 @@ static bool hmm_invalidate(uvm_va_block_t *va_block,
|
||||
region = uvm_va_block_region_from_start_end(va_block, start, end);
|
||||
|
||||
va_block_context->hmm.vma = NULL;
|
||||
va_block_context->policy = NULL;
|
||||
|
||||
// We only need to unmap GPUs since Linux handles the CPUs.
|
||||
for_each_gpu_id_in_mask(id, &va_block->mapped) {
|
||||
@@ -444,11 +441,11 @@ static void hmm_va_block_init(uvm_va_block_t *va_block,
|
||||
static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
bool allow_unreadable_vma,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct **vma_out,
|
||||
uvm_va_block_t **va_block_ptr)
|
||||
{
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
struct vm_area_struct *vma;
|
||||
struct mm_struct *mm;
|
||||
struct vm_area_struct *va_block_vma;
|
||||
uvm_va_block_t *va_block;
|
||||
NvU64 start, end;
|
||||
NV_STATUS status;
|
||||
@@ -457,15 +454,14 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
if (!uvm_hmm_is_enabled(va_space))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
UVM_ASSERT(mm);
|
||||
UVM_ASSERT(!va_block_context || va_block_context->mm == mm);
|
||||
mm = va_space->va_space_mm.mm;
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
UVM_ASSERT(PAGE_ALIGNED(addr));
|
||||
|
||||
// Note that we have to allow PROT_NONE VMAs so that policies can be set.
|
||||
vma = find_vma(mm, addr);
|
||||
if (!uvm_hmm_vma_is_valid(vma, addr, allow_unreadable_vma))
|
||||
va_block_vma = find_vma(mm, addr);
|
||||
if (!uvm_hmm_vma_is_valid(va_block_vma, addr, allow_unreadable_vma))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
// Since we only hold the va_space read lock, there can be multiple
|
||||
@@ -517,8 +513,8 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
|
||||
done:
|
||||
uvm_mutex_unlock(&va_space->hmm.blocks_lock);
|
||||
if (va_block_context)
|
||||
va_block_context->hmm.vma = vma;
|
||||
if (vma_out)
|
||||
*vma_out = va_block_vma;
|
||||
*va_block_ptr = va_block;
|
||||
return NV_OK;
|
||||
|
||||
@@ -532,43 +528,36 @@ err_unlock:
|
||||
|
||||
NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct **vma,
|
||||
uvm_va_block_t **va_block_ptr)
|
||||
{
|
||||
return hmm_va_block_find_create(va_space, addr, false, va_block_context, va_block_ptr);
|
||||
return hmm_va_block_find_create(va_space, addr, false, vma, va_block_ptr);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
|
||||
NV_STATUS uvm_hmm_find_vma(struct mm_struct *mm, struct vm_area_struct **vma_out, NvU64 addr)
|
||||
{
|
||||
struct mm_struct *mm = va_block_context->mm;
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
if (!mm)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
|
||||
vma = find_vma(mm, addr);
|
||||
if (!uvm_hmm_vma_is_valid(vma, addr, false))
|
||||
*vma_out = find_vma(mm, addr);
|
||||
if (!uvm_hmm_vma_is_valid(*vma_out, addr, false))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
va_block_context->hmm.vma = vma;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(va_block_context->mm == vma->vm_mm);
|
||||
uvm_assert_mmap_lock_locked(va_block_context->mm);
|
||||
UVM_ASSERT(va_block->hmm.va_space->va_space_mm.mm == vma->vm_mm);
|
||||
uvm_assert_mmap_lock_locked(va_block->hmm.va_space->va_space_mm.mm);
|
||||
UVM_ASSERT(vma->vm_start <= uvm_va_block_region_start(va_block, region));
|
||||
UVM_ASSERT(vma->vm_end > uvm_va_block_region_end(va_block, region));
|
||||
}
|
||||
@@ -619,8 +608,6 @@ static NV_STATUS hmm_migrate_range(uvm_va_block_t *va_block,
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
uvm_for_each_va_policy_in(policy, va_block, start, end, node, region) {
|
||||
va_block_context->policy = policy;
|
||||
|
||||
// Even though UVM_VA_BLOCK_RETRY_LOCKED() may unlock and relock the
|
||||
// va_block lock, the policy remains valid because we hold the mmap
|
||||
// lock so munmap can't remove the policy, and the va_space lock so the
|
||||
@@ -670,7 +657,6 @@ void uvm_hmm_evict_va_blocks(uvm_va_space_t *va_space)
|
||||
continue;
|
||||
|
||||
block_context->hmm.vma = vma;
|
||||
block_context->policy = &uvm_va_policy_default;
|
||||
uvm_hmm_va_block_migrate_locked(va_block,
|
||||
NULL,
|
||||
block_context,
|
||||
@@ -1046,11 +1032,7 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
|
||||
uvm_processor_mask_test(&old_policy->accessed_by, old_policy->preferred_location))
|
||||
uvm_processor_mask_set(&set_accessed_by_processors, old_policy->preferred_location);
|
||||
|
||||
va_block_context->policy = uvm_va_policy_set_preferred_location(va_block,
|
||||
region,
|
||||
preferred_location,
|
||||
old_policy);
|
||||
if (!va_block_context->policy)
|
||||
if (!uvm_va_policy_set_preferred_location(va_block, region, preferred_location, old_policy))
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// Establish new remote mappings if the old preferred location had
|
||||
@@ -1109,7 +1091,7 @@ NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
|
||||
for (addr = base; addr < last_address; addr = va_block->end + 1) {
|
||||
NvU64 end;
|
||||
|
||||
status = hmm_va_block_find_create(va_space, addr, true, va_block_context, &va_block);
|
||||
status = hmm_va_block_find_create(va_space, addr, true, &va_block_context->hmm.vma, &va_block);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
@@ -1151,7 +1133,6 @@ static NV_STATUS hmm_set_accessed_by_start_end_locked(uvm_va_block_t *va_block,
|
||||
if (uvm_va_policy_is_read_duplicate(&node->policy, va_space))
|
||||
continue;
|
||||
|
||||
va_block_context->policy = &node->policy;
|
||||
region = uvm_va_block_region_from_start_end(va_block,
|
||||
max(start, node->node.start),
|
||||
min(end, node->node.end));
|
||||
@@ -1196,7 +1177,7 @@ NV_STATUS uvm_hmm_set_accessed_by(uvm_va_space_t *va_space,
|
||||
for (addr = base; addr < last_address; addr = va_block->end + 1) {
|
||||
NvU64 end;
|
||||
|
||||
status = hmm_va_block_find_create(va_space, addr, true, va_block_context, &va_block);
|
||||
status = hmm_va_block_find_create(va_space, addr, true, &va_block_context->hmm.vma, &va_block);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
@@ -1249,8 +1230,6 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
uvm_for_each_va_policy_node_in(node, va_block, va_block->start, va_block->end) {
|
||||
block_context->policy = &node->policy;
|
||||
|
||||
for_each_id_in_mask(id, &node->policy.accessed_by) {
|
||||
status = hmm_set_accessed_by_start_end_locked(va_block,
|
||||
block_context,
|
||||
@@ -1309,13 +1288,13 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
|
||||
}
|
||||
}
|
||||
|
||||
void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
unsigned long addr,
|
||||
NvU64 *endp)
|
||||
const uvm_va_policy_t *uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
NvU64 *endp)
|
||||
{
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
const uvm_va_policy_node_t *node;
|
||||
const uvm_va_policy_t *policy;
|
||||
NvU64 end = va_block->end;
|
||||
|
||||
uvm_assert_mmap_lock_locked(vma->vm_mm);
|
||||
@@ -1326,40 +1305,45 @@ void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
|
||||
node = uvm_va_policy_node_find(va_block, addr);
|
||||
if (node) {
|
||||
va_block_context->policy = &node->policy;
|
||||
policy = &node->policy;
|
||||
if (end > node->node.end)
|
||||
end = node->node.end;
|
||||
}
|
||||
else {
|
||||
va_block_context->policy = &uvm_va_policy_default;
|
||||
policy = &uvm_va_policy_default;
|
||||
}
|
||||
|
||||
*endp = end;
|
||||
|
||||
return policy;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct **vma_out,
|
||||
uvm_page_index_t page_index,
|
||||
const uvm_va_policy_t **policy,
|
||||
uvm_page_index_t *outerp)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long addr;
|
||||
NvU64 end;
|
||||
uvm_page_index_t outer;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
|
||||
if (!mm)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
|
||||
uvm_assert_mmap_lock_locked(va_block_context->mm);
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
addr = uvm_va_block_cpu_page_address(va_block, page_index);
|
||||
|
||||
vma = vma_lookup(va_block_context->mm, addr);
|
||||
if (!vma || !(vma->vm_flags & VM_READ))
|
||||
*vma_out = vma_lookup(mm, addr);
|
||||
if (!*vma_out || !((*vma_out)->vm_flags & VM_READ))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
va_block_context->hmm.vma = vma;
|
||||
|
||||
uvm_hmm_find_policy_end(va_block, va_block_context, addr, &end);
|
||||
*policy = uvm_hmm_find_policy_end(va_block, *vma_out, addr, &end);
|
||||
|
||||
outer = uvm_va_block_cpu_page_index(va_block, end) + 1;
|
||||
if (*outerp > outer)
|
||||
@@ -1379,8 +1363,6 @@ static NV_STATUS hmm_clear_thrashing_policy(uvm_va_block_t *va_block,
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
uvm_for_each_va_policy_in(policy, va_block, va_block->start, va_block->end, node, region) {
|
||||
block_context->policy = policy;
|
||||
|
||||
// Unmap may split PTEs and require a retry. Needs to be called
|
||||
// before the pinned pages information is destroyed.
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
|
||||
@@ -1424,11 +1406,10 @@ NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space)
|
||||
}
|
||||
|
||||
uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
const uvm_va_policy_t *policy,
|
||||
NvU64 address)
|
||||
{
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
const uvm_va_policy_t *policy = va_block_context->policy;
|
||||
NvU64 start, end;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
|
||||
@@ -1457,13 +1438,11 @@ uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
}
|
||||
|
||||
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 addr)
|
||||
{
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
|
||||
uvm_assert_mmap_lock_locked(va_block_context->mm);
|
||||
uvm_assert_mmap_lock_locked(va_block->hmm.va_space->va_space_mm.mm);
|
||||
UVM_ASSERT(vma && addr >= vma->vm_start && addr < vma->vm_end);
|
||||
|
||||
if (!(vma->vm_flags & VM_READ))
|
||||
@@ -2907,8 +2886,6 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma,
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
UVM_ASSERT(!uvm_va_policy_is_read_duplicate(va_block_context->policy, va_block->hmm.va_space));
|
||||
|
||||
status = uvm_va_block_make_resident_copy(va_block,
|
||||
va_block_retry,
|
||||
va_block_context,
|
||||
@@ -3140,7 +3117,7 @@ NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
for (addr = base; addr < last_address; addr = end + 1) {
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
status = hmm_va_block_find_create(va_space, addr, false, va_block_context, &va_block);
|
||||
status = hmm_va_block_find_create(va_space, addr, false, &va_block_context->hmm.vma, &va_block);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@@ -3232,7 +3209,6 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
uvm_for_each_va_policy_in(policy, va_block, start, end, node, region) {
|
||||
npages = uvm_va_block_region_num_pages(region);
|
||||
|
||||
va_block_context->policy = policy;
|
||||
if (out_accessed_by_set && uvm_processor_mask_get_count(&policy->accessed_by) > 0)
|
||||
*out_accessed_by_set = true;
|
||||
|
||||
|
||||
@@ -49,9 +49,7 @@ typedef struct
|
||||
bool uvm_hmm_is_enabled_system_wide(void);
|
||||
|
||||
// Initialize HMM for the given the va_space.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
|
||||
// and the va_space lock must be held in write mode.
|
||||
NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);
|
||||
void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);
|
||||
|
||||
// Destroy any HMM state for the given the va_space.
|
||||
// Locking: va_space lock must be held in write mode.
|
||||
@@ -90,31 +88,30 @@ typedef struct
|
||||
// address 'addr' or the VMA does not have at least PROT_READ permission.
|
||||
// The caller is also responsible for checking that there is no UVM
|
||||
// va_range covering the given address before calling this function.
|
||||
// If va_block_context is not NULL, the VMA is cached in
|
||||
// va_block_context->hmm.vma.
|
||||
// The VMA is returned in vma_out if it's not NULL.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read and the va_space lock at least for read.
|
||||
NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct **vma_out,
|
||||
uvm_va_block_t **va_block_ptr);
|
||||
|
||||
// Find the VMA for the given address and set va_block_context->hmm.vma.
|
||||
// Return NV_ERR_INVALID_ADDRESS if va_block_context->mm is NULL or there
|
||||
// is no VMA associated with the address 'addr' or the VMA does not have at
|
||||
// least PROT_READ permission.
|
||||
// Find the VMA for the given address and return it in vma_out. Return
|
||||
// NV_ERR_INVALID_ADDRESS if mm is NULL or there is no VMA associated with
|
||||
// the address 'addr' or the VMA does not have at least PROT_READ
|
||||
// permission.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read or mm equal to NULL.
|
||||
NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr);
|
||||
NV_STATUS uvm_hmm_find_vma(struct mm_struct *mm, struct vm_area_struct **vma_out, NvU64 addr);
|
||||
|
||||
// If va_block is a HMM va_block, check that va_block_context->hmm.vma is
|
||||
// not NULL and covers the given region. This always returns true and is
|
||||
// intended to only be used with UVM_ASSERT().
|
||||
// If va_block is a HMM va_block, check that vma is not NULL and covers the
|
||||
// given region. This always returns true and is intended to only be used
|
||||
// with UVM_ASSERT().
|
||||
// Locking: This function must be called with the va_block lock held and if
|
||||
// va_block is a HMM block, va_block_context->mm must be retained and
|
||||
// locked for at least read.
|
||||
// va_block is a HMM block, va_space->va_space_mm.mm->mmap_lock must be
|
||||
// retained and locked for at least read.
|
||||
bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Initialize the HMM portion of the service_context.
|
||||
@@ -225,31 +222,29 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// This function assigns va_block_context->policy to the policy covering
|
||||
// the given address 'addr' and assigns the ending address '*endp' to the
|
||||
// minimum of va_block->end, va_block_context->hmm.vma->vm_end - 1, and the
|
||||
// ending address of the policy range. Note that va_block_context->hmm.vma
|
||||
// is expected to be initialized before calling this function.
|
||||
// Locking: This function must be called with
|
||||
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
|
||||
// the va_block lock held.
|
||||
void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
unsigned long addr,
|
||||
NvU64 *endp);
|
||||
// This function returns the policy covering the given address 'addr' and
|
||||
// assigns the ending address '*endp' to the minimum of va_block->end,
|
||||
// vma->vm_end - 1, and the ending address of the policy range. Locking:
|
||||
// This function must be called with vma->vm_mm retained and locked for at
|
||||
// least read and the va_block and va_space lock held.
|
||||
const uvm_va_policy_t *uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
NvU64 *endp);
|
||||
|
||||
// This function finds the VMA for the page index 'page_index' and assigns
|
||||
// it to va_block_context->vma, sets va_block_context->policy to the policy
|
||||
// covering the given address, and sets the ending page range '*outerp'
|
||||
// to the minimum of *outerp, va_block_context->hmm.vma->vm_end - 1, the
|
||||
// ending address of the policy range, and va_block->end.
|
||||
// Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise, NV_OK.
|
||||
// Locking: This function must be called with
|
||||
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
|
||||
// the va_block lock held.
|
||||
// This function finds the VMA for the page index 'page_index' and returns
|
||||
// it in vma_out which must not be NULL. Returns the policy covering the
|
||||
// given address, and sets the ending page range '*outerp' to the minimum of
|
||||
// *outerp, vma->vm_end - 1, the ending address of the policy range, and
|
||||
// va_block->end.
|
||||
// Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise sets *vma
|
||||
// and returns NV_OK.
|
||||
// Locking: This function must be called with mm retained and locked for at
|
||||
// least read and the va_block and va_space lock held.
|
||||
NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct **vma,
|
||||
uvm_page_index_t page_index,
|
||||
const uvm_va_policy_t **policy,
|
||||
uvm_page_index_t *outerp);
|
||||
|
||||
// Clear thrashing policy information from all HMM va_blocks.
|
||||
@@ -258,24 +253,21 @@ typedef struct
|
||||
|
||||
// Return the expanded region around 'address' limited to the intersection
|
||||
// of va_block start/end, vma start/end, and policy start/end.
|
||||
// va_block_context must not be NULL, va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()), and
|
||||
// va_block_context->policy must be valid.
|
||||
// Locking: the caller must hold mm->mmap_lock in at least read mode, the
|
||||
// va_space lock must be held in at least read mode, and the va_block lock
|
||||
// held.
|
||||
// Locking: the caller must hold va_space->va_space_mm.mm->mmap_lock in at
|
||||
// least read mode, the va_space lock must be held in at least read mode,
|
||||
// and the va_block lock held.
|
||||
uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
const uvm_va_policy_t *policy,
|
||||
NvU64 address);
|
||||
|
||||
// Return the logical protection allowed of a HMM va_block for the page at
|
||||
// the given address.
|
||||
// va_block_context must not be NULL and va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()).
|
||||
// Locking: the caller must hold va_block_context->mm mmap_lock in at least
|
||||
// read mode.
|
||||
// the given address within the vma which must be valid. This is usually
|
||||
// obtained from uvm_hmm_va_block_find_create()).
|
||||
// Locking: the caller must hold va_space->va_space_mm.mm mmap_lock in at
|
||||
// least read mode.
|
||||
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 addr);
|
||||
|
||||
// This is called to service a GPU fault.
|
||||
@@ -288,9 +280,9 @@ typedef struct
|
||||
uvm_service_block_context_t *service_context);
|
||||
|
||||
// This is called to migrate a region within a HMM va_block.
|
||||
// va_block_context must not be NULL and va_block_context->policy and
|
||||
// va_block_context->hmm.vma must be valid.
|
||||
// Locking: the va_block_context->mm must be retained, mmap_lock must be
|
||||
// va_block_context must not be NULL and va_block_context->hmm.vma
|
||||
// must be valid.
|
||||
// Locking: the va_space->va_space_mm.mm must be retained, mmap_lock must be
|
||||
// locked, and the va_block lock held.
|
||||
NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
@@ -303,7 +295,7 @@ typedef struct
|
||||
// UvmMigrate().
|
||||
//
|
||||
// va_block_context must not be NULL. The caller is not required to set
|
||||
// va_block_context->policy or va_block_context->hmm.vma.
|
||||
// va_block_context->hmm.vma.
|
||||
//
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked and
|
||||
// the va_space read lock must be held.
|
||||
@@ -412,9 +404,8 @@ typedef struct
|
||||
return false;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
|
||||
static void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
@@ -440,19 +431,19 @@ typedef struct
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct **vma,
|
||||
uvm_va_block_t **va_block_ptr)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
|
||||
static NV_STATUS uvm_hmm_find_vma(struct mm_struct *mm, struct vm_area_struct **vma, NvU64 addr)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
return true;
|
||||
@@ -533,16 +524,19 @@ typedef struct
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
unsigned long addr,
|
||||
NvU64 *endp)
|
||||
static const uvm_va_policy_t *uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
NvU64 *endp)
|
||||
{
|
||||
UVM_ASSERT(0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct **vma,
|
||||
uvm_page_index_t page_index,
|
||||
const uvm_va_policy_t **policy,
|
||||
uvm_page_index_t *outerp)
|
||||
{
|
||||
return NV_OK;
|
||||
@@ -554,14 +548,15 @@ typedef struct
|
||||
}
|
||||
|
||||
static uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
const uvm_va_policy_t *policy,
|
||||
NvU64 address)
|
||||
{
|
||||
return (uvm_va_block_region_t){};
|
||||
}
|
||||
|
||||
static uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 addr)
|
||||
{
|
||||
return UVM_PROT_NONE;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -93,8 +93,9 @@ static bool sysmem_can_be_mapped_on_gpu(uvm_mem_t *sysmem)
|
||||
{
|
||||
UVM_ASSERT(uvm_mem_is_sysmem(sysmem));
|
||||
|
||||
// If SEV is enabled, only unprotected memory can be mapped
|
||||
if (g_uvm_global.sev_enabled)
|
||||
// In Confidential Computing, only unprotected memory can be mapped on the
|
||||
// GPU
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return uvm_mem_is_sysmem_dma(sysmem);
|
||||
|
||||
return true;
|
||||
@@ -737,7 +738,7 @@ static NV_STATUS mem_map_cpu_to_sysmem_kernel(uvm_mem_t *mem)
|
||||
pages[page_index] = mem_cpu_page(mem, page_index * PAGE_SIZE);
|
||||
}
|
||||
|
||||
if (g_uvm_global.sev_enabled && uvm_mem_is_sysmem_dma(mem))
|
||||
if (g_uvm_global.conf_computing_enabled && uvm_mem_is_sysmem_dma(mem))
|
||||
prot = uvm_pgprot_decrypted(PAGE_KERNEL_NOENC);
|
||||
|
||||
mem->kernel.cpu_addr = vmap(pages, num_pages, VM_MAP, prot);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -44,10 +44,10 @@ static NvU32 first_page_size(NvU32 page_sizes)
|
||||
|
||||
static inline NV_STATUS __alloc_map_sysmem(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **sys_mem)
|
||||
{
|
||||
if (g_uvm_global.sev_enabled)
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(size, gpu, current->mm, sys_mem);
|
||||
else
|
||||
return uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, sys_mem);
|
||||
|
||||
return uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, sys_mem);
|
||||
}
|
||||
|
||||
static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
|
||||
@@ -335,9 +335,6 @@ error:
|
||||
|
||||
static bool should_test_page_size(size_t alloc_size, NvU32 page_size)
|
||||
{
|
||||
if (g_uvm_global.sev_enabled)
|
||||
return false;
|
||||
|
||||
if (g_uvm_global.num_simulated_devices == 0)
|
||||
return true;
|
||||
|
||||
|
||||
@@ -223,7 +223,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
NV_STATUS status, tracker_status = NV_OK;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context, region));
|
||||
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context->hmm.vma, region));
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = uvm_hmm_va_block_migrate_locked(va_block,
|
||||
@@ -234,9 +234,9 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
|
||||
}
|
||||
else {
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
|
||||
if (uvm_va_policy_is_read_duplicate(va_block_context->policy, va_space)) {
|
||||
if (uvm_va_policy_is_read_duplicate(policy, va_space)) {
|
||||
status = uvm_va_block_make_resident_read_duplicate(va_block,
|
||||
va_block_retry,
|
||||
va_block_context,
|
||||
@@ -371,8 +371,6 @@ static bool va_block_should_do_cpu_preunmap(uvm_va_block_t *va_block,
|
||||
if (!va_block)
|
||||
return true;
|
||||
|
||||
UVM_ASSERT(va_range_should_do_cpu_preunmap(va_block_context->policy, uvm_va_block_get_va_space(va_block)));
|
||||
|
||||
region = uvm_va_block_region_from_start_end(va_block, max(start, va_block->start), min(end, va_block->end));
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
@@ -496,11 +494,9 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
NvU64 preunmap_range_start = start;
|
||||
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_range);
|
||||
|
||||
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_range));
|
||||
|
||||
should_do_cpu_preunmap = should_do_cpu_preunmap && va_range_should_do_cpu_preunmap(va_block_context->policy,
|
||||
va_range->va_space);
|
||||
should_do_cpu_preunmap = should_do_cpu_preunmap && va_range_should_do_cpu_preunmap(policy, va_range->va_space);
|
||||
|
||||
// Divide migrations into groups of contiguous VA blocks. This is to trigger
|
||||
// CPU unmaps for that region before the migration starts.
|
||||
@@ -577,8 +573,6 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
break;
|
||||
}
|
||||
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_range);
|
||||
|
||||
// For UVM-Lite GPUs, the CUDA driver may suballocate a single va_range
|
||||
// into many range groups. For this reason, we iterate over each va_range first
|
||||
// then through the range groups within.
|
||||
@@ -653,6 +647,8 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
|
||||
if (mm)
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
else if (!first_va_range)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
va_block_context = uvm_va_block_context_alloc(mm);
|
||||
if (!va_block_context)
|
||||
|
||||
@@ -672,6 +672,14 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
|
||||
.finalize_and_map = uvm_migrate_vma_finalize_and_map_helper,
|
||||
};
|
||||
|
||||
// WAR for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
|
||||
// invalidates on read-only to read-write upgrades
|
||||
//
|
||||
// This code path isn't used on GH180 but we need to maintain consistent
|
||||
// behaviour on systems that do.
|
||||
if (!vma_is_anonymous(args->vma))
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
ret = migrate_vma(&uvm_migrate_vma_ops, args->vma, args->start, args->end, args->src, args->dst, state);
|
||||
if (ret < 0)
|
||||
return errno_to_nv_status(ret);
|
||||
@@ -685,6 +693,24 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
|
||||
if (ret < 0)
|
||||
return errno_to_nv_status(ret);
|
||||
|
||||
// TODO: Bug 2419180: support file-backed pages in migrate_vma, when
|
||||
// support for it is added to the Linux kernel
|
||||
//
|
||||
// A side-effect of migrate_vma_setup() is it calls mmu notifiers even if a
|
||||
// page can't be migrated (eg. because it's a non-anonymous mapping). We
|
||||
// need this side-effect for SMMU on GH180 to ensure any cached read-only
|
||||
// entries are flushed from SMMU on permission upgrade.
|
||||
//
|
||||
// TODO: Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
|
||||
// invalidates on read-only to read-write upgrades
|
||||
//
|
||||
// The above WAR doesn't work for HugeTLBfs mappings because
|
||||
// migrate_vma_setup() will fail in that case.
|
||||
if (!vma_is_anonymous(args->vma)) {
|
||||
migrate_vma_finalize(args);
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
}
|
||||
|
||||
uvm_migrate_vma_alloc_and_copy(args, state);
|
||||
if (state->status == NV_OK) {
|
||||
migrate_vma_pages(args);
|
||||
@@ -858,9 +884,13 @@ static NV_STATUS migrate_pageable_vma(struct vm_area_struct *vma,
|
||||
start = max(start, vma->vm_start);
|
||||
outer = min(outer, vma->vm_end);
|
||||
|
||||
// TODO: Bug 2419180: support file-backed pages in migrate_vma, when
|
||||
// support for it is added to the Linux kernel
|
||||
if (!vma_is_anonymous(vma))
|
||||
// migrate_vma only supports anonymous VMAs. We check for those after
|
||||
// calling migrate_vma_setup() to workaround Bug 4130089. We need to check
|
||||
// for HugeTLB VMAs here because migrate_vma_setup() will return a fatal
|
||||
// error for those.
|
||||
// TODO: Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
|
||||
// invalidates on read-only to read-write upgrades
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
if (uvm_processor_mask_empty(&va_space->registered_gpus))
|
||||
|
||||
@@ -34,8 +34,8 @@ typedef struct
|
||||
{
|
||||
uvm_va_space_t *va_space;
|
||||
struct mm_struct *mm;
|
||||
unsigned long start;
|
||||
unsigned long length;
|
||||
const unsigned long start;
|
||||
const unsigned long length;
|
||||
uvm_processor_id_t dst_id;
|
||||
|
||||
// dst_node_id may be clobbered by uvm_migrate_pageable().
|
||||
|
||||
@@ -906,11 +906,10 @@ error:
|
||||
// --------------|-------------------------||----------------|----------------
|
||||
// vidmem | - || vidmem | false
|
||||
// sysmem | - || sysmem | false
|
||||
// default | <not set> || vidmem | true (1)
|
||||
// default | <not set> || vidmem | true
|
||||
// default | vidmem || vidmem | false
|
||||
// default | sysmem || sysmem | false
|
||||
//
|
||||
// (1) When SEV mode is enabled, the fallback path is disabled.
|
||||
//
|
||||
// In SR-IOV heavy the the page tree must be in vidmem, to prevent guest drivers
|
||||
// from updating GPU page tables without hypervisor knowledge.
|
||||
@@ -926,28 +925,27 @@ error:
|
||||
//
|
||||
static void page_tree_set_location(uvm_page_tree_t *tree, uvm_aperture_t location)
|
||||
{
|
||||
bool should_location_be_vidmem;
|
||||
UVM_ASSERT(tree->gpu != NULL);
|
||||
UVM_ASSERT_MSG((location == UVM_APERTURE_VID) ||
|
||||
(location == UVM_APERTURE_SYS) ||
|
||||
(location == UVM_APERTURE_DEFAULT),
|
||||
"Invalid location %s (%d)\n", uvm_aperture_string(location), (int)location);
|
||||
|
||||
should_location_be_vidmem = uvm_gpu_is_virt_mode_sriov_heavy(tree->gpu)
|
||||
|| uvm_conf_computing_mode_enabled(tree->gpu);
|
||||
|
||||
// The page tree of a "fake" GPU used during page tree testing can be in
|
||||
// sysmem even if should_location_be_vidmem is true. A fake GPU can be
|
||||
// identified by having no channel manager.
|
||||
if ((tree->gpu->channel_manager != NULL) && should_location_be_vidmem)
|
||||
UVM_ASSERT(location == UVM_APERTURE_VID);
|
||||
// sysmem in scenarios where a "real" GPU must be in vidmem. Fake GPUs can
|
||||
// be identified by having no channel manager.
|
||||
if (tree->gpu->channel_manager != NULL) {
|
||||
|
||||
if (uvm_gpu_is_virt_mode_sriov_heavy(tree->gpu))
|
||||
UVM_ASSERT(location == UVM_APERTURE_VID);
|
||||
else if (uvm_conf_computing_mode_enabled(tree->gpu))
|
||||
UVM_ASSERT(location == UVM_APERTURE_VID);
|
||||
}
|
||||
|
||||
if (location == UVM_APERTURE_DEFAULT) {
|
||||
if (page_table_aperture == UVM_APERTURE_DEFAULT) {
|
||||
tree->location = UVM_APERTURE_VID;
|
||||
|
||||
// See the comment (1) above.
|
||||
tree->location_sys_fallback = !g_uvm_global.sev_enabled;
|
||||
tree->location_sys_fallback = true;
|
||||
}
|
||||
else {
|
||||
tree->location = page_table_aperture;
|
||||
|
||||
@@ -218,57 +218,11 @@ static void grow_fault_granularity(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
}
|
||||
}
|
||||
|
||||
// Within a block we only allow prefetching to a single processor. Therefore,
|
||||
// if two processors are accessing non-overlapping regions within the same
|
||||
// block they won't benefit from prefetching.
|
||||
//
|
||||
// TODO: Bug 1778034: [uvm] Explore prefetching to different processors within
|
||||
// a VA block.
|
||||
static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t new_residency,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
uvm_va_block_region_t faulted_region,
|
||||
uvm_page_mask_t *prefetch_pages,
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree)
|
||||
static void init_bitmap_tree_from_region(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_va_block_region_t max_prefetch_region,
|
||||
const uvm_page_mask_t *resident_mask,
|
||||
const uvm_page_mask_t *faulted_pages)
|
||||
{
|
||||
uvm_page_index_t page_index;
|
||||
const uvm_page_mask_t *resident_mask = NULL;
|
||||
const uvm_page_mask_t *thrashing_pages = NULL;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
const uvm_va_policy_t *policy = va_block_context->policy;
|
||||
uvm_va_block_region_t max_prefetch_region;
|
||||
NvU32 big_page_size;
|
||||
uvm_va_block_region_t big_pages_region;
|
||||
|
||||
if (!uvm_id_equal(va_block->prefetch_info.last_migration_proc_id, new_residency)) {
|
||||
va_block->prefetch_info.last_migration_proc_id = new_residency;
|
||||
va_block->prefetch_info.fault_migrations_to_last_proc = 0;
|
||||
}
|
||||
|
||||
// Compute the expanded region that prefetching is allowed from.
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
max_prefetch_region = uvm_hmm_get_prefetch_region(va_block,
|
||||
va_block_context,
|
||||
uvm_va_block_region_start(va_block, faulted_region));
|
||||
}
|
||||
else {
|
||||
max_prefetch_region = uvm_va_block_region_from_block(va_block);
|
||||
}
|
||||
|
||||
uvm_page_mask_zero(prefetch_pages);
|
||||
|
||||
if (UVM_ID_IS_CPU(new_residency) || va_block->gpus[uvm_id_gpu_index(new_residency)] != NULL)
|
||||
resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency);
|
||||
|
||||
// If this is a first-touch fault and the destination processor is the
|
||||
// preferred location, populate the whole max_prefetch_region.
|
||||
if (uvm_processor_mask_empty(&va_block->resident) &&
|
||||
uvm_id_equal(new_residency, policy->preferred_location)) {
|
||||
uvm_page_mask_region_fill(prefetch_pages, max_prefetch_region);
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (resident_mask)
|
||||
uvm_page_mask_or(&bitmap_tree->pages, resident_mask, faulted_pages);
|
||||
else
|
||||
@@ -277,6 +231,29 @@ static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_blo
|
||||
// If we are using a subregion of the va_block, align bitmap_tree
|
||||
uvm_page_mask_shift_right(&bitmap_tree->pages, &bitmap_tree->pages, max_prefetch_region.first);
|
||||
|
||||
bitmap_tree->offset = 0;
|
||||
bitmap_tree->leaf_count = uvm_va_block_region_num_pages(max_prefetch_region);
|
||||
bitmap_tree->level_count = ilog2(roundup_pow_of_two(bitmap_tree->leaf_count)) + 1;
|
||||
}
|
||||
|
||||
static void update_bitmap_tree_from_va_block(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t new_residency,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
uvm_va_block_region_t max_prefetch_region)
|
||||
|
||||
{
|
||||
NvU32 big_page_size;
|
||||
uvm_va_block_region_t big_pages_region;
|
||||
uvm_va_space_t *va_space;
|
||||
const uvm_page_mask_t *thrashing_pages;
|
||||
|
||||
UVM_ASSERT(va_block);
|
||||
UVM_ASSERT(va_block_context);
|
||||
|
||||
va_space = uvm_va_block_get_va_space(va_block);
|
||||
|
||||
// Get the big page size for the new residency.
|
||||
// Assume 64K size if the new residency is the CPU or no GPU va space is
|
||||
// registered in the current process for this GPU.
|
||||
@@ -302,13 +279,9 @@ static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_blo
|
||||
UVM_ASSERT(bitmap_tree->leaf_count <= PAGES_PER_UVM_VA_BLOCK);
|
||||
|
||||
uvm_page_mask_shift_left(&bitmap_tree->pages, &bitmap_tree->pages, bitmap_tree->offset);
|
||||
}
|
||||
else {
|
||||
bitmap_tree->offset = 0;
|
||||
bitmap_tree->leaf_count = uvm_va_block_region_num_pages(max_prefetch_region);
|
||||
}
|
||||
|
||||
bitmap_tree->level_count = ilog2(roundup_pow_of_two(bitmap_tree->leaf_count)) + 1;
|
||||
bitmap_tree->level_count = ilog2(roundup_pow_of_two(bitmap_tree->leaf_count)) + 1;
|
||||
}
|
||||
|
||||
thrashing_pages = uvm_perf_thrashing_get_thrashing_pages(va_block);
|
||||
|
||||
@@ -320,25 +293,99 @@ static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_blo
|
||||
max_prefetch_region,
|
||||
faulted_pages,
|
||||
thrashing_pages);
|
||||
}
|
||||
|
||||
// Do not compute prefetch regions with faults on pages that are thrashing
|
||||
if (thrashing_pages)
|
||||
uvm_page_mask_andnot(&va_block_context->scratch_page_mask, faulted_pages, thrashing_pages);
|
||||
else
|
||||
uvm_page_mask_copy(&va_block_context->scratch_page_mask, faulted_pages);
|
||||
static void compute_prefetch_mask(uvm_va_block_region_t faulted_region,
|
||||
uvm_va_block_region_t max_prefetch_region,
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
uvm_page_mask_t *out_prefetch_mask)
|
||||
{
|
||||
uvm_page_index_t page_index;
|
||||
|
||||
// Update the tree using the scratch mask to compute the pages to prefetch
|
||||
for_each_va_block_page_in_region_mask(page_index, &va_block_context->scratch_page_mask, faulted_region) {
|
||||
uvm_page_mask_zero(out_prefetch_mask);
|
||||
|
||||
// Update the tree using the faulted mask to compute the pages to prefetch.
|
||||
for_each_va_block_page_in_region_mask(page_index, faulted_pages, faulted_region) {
|
||||
uvm_va_block_region_t region = compute_prefetch_region(page_index, bitmap_tree, max_prefetch_region);
|
||||
|
||||
uvm_page_mask_region_fill(prefetch_pages, region);
|
||||
uvm_page_mask_region_fill(out_prefetch_mask, region);
|
||||
|
||||
// Early out if we have already prefetched until the end of the VA block
|
||||
if (region.outer == max_prefetch_region.outer)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Within a block we only allow prefetching to a single processor. Therefore,
|
||||
// if two processors are accessing non-overlapping regions within the same
|
||||
// block they won't benefit from prefetching.
|
||||
//
|
||||
// TODO: Bug 1778034: [uvm] Explore prefetching to different processors within
|
||||
// a VA block.
|
||||
static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t new_residency,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
uvm_va_block_region_t faulted_region,
|
||||
uvm_page_mask_t *prefetch_pages,
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree)
|
||||
{
|
||||
const uvm_page_mask_t *resident_mask = NULL;
|
||||
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, faulted_region);
|
||||
uvm_va_block_region_t max_prefetch_region;
|
||||
const uvm_page_mask_t *thrashing_pages = uvm_perf_thrashing_get_thrashing_pages(va_block);
|
||||
|
||||
if (!uvm_id_equal(va_block->prefetch_info.last_migration_proc_id, new_residency)) {
|
||||
va_block->prefetch_info.last_migration_proc_id = new_residency;
|
||||
va_block->prefetch_info.fault_migrations_to_last_proc = 0;
|
||||
}
|
||||
|
||||
// Compute the expanded region that prefetching is allowed from.
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
max_prefetch_region = uvm_hmm_get_prefetch_region(va_block,
|
||||
va_block_context->hmm.vma,
|
||||
policy,
|
||||
uvm_va_block_region_start(va_block, faulted_region));
|
||||
}
|
||||
else {
|
||||
max_prefetch_region = uvm_va_block_region_from_block(va_block);
|
||||
}
|
||||
|
||||
uvm_page_mask_zero(prefetch_pages);
|
||||
|
||||
if (UVM_ID_IS_CPU(new_residency) || va_block->gpus[uvm_id_gpu_index(new_residency)] != NULL)
|
||||
resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency);
|
||||
|
||||
// If this is a first-touch fault and the destination processor is the
|
||||
// preferred location, populate the whole max_prefetch_region.
|
||||
if (uvm_processor_mask_empty(&va_block->resident) &&
|
||||
uvm_id_equal(new_residency, policy->preferred_location)) {
|
||||
uvm_page_mask_region_fill(prefetch_pages, max_prefetch_region);
|
||||
}
|
||||
else {
|
||||
init_bitmap_tree_from_region(bitmap_tree, max_prefetch_region, resident_mask, faulted_pages);
|
||||
|
||||
update_bitmap_tree_from_va_block(bitmap_tree,
|
||||
va_block,
|
||||
va_block_context,
|
||||
new_residency,
|
||||
faulted_pages,
|
||||
max_prefetch_region);
|
||||
|
||||
// Do not compute prefetch regions with faults on pages that are thrashing
|
||||
if (thrashing_pages)
|
||||
uvm_page_mask_andnot(&va_block_context->scratch_page_mask, faulted_pages, thrashing_pages);
|
||||
else
|
||||
uvm_page_mask_copy(&va_block_context->scratch_page_mask, faulted_pages);
|
||||
|
||||
compute_prefetch_mask(faulted_region,
|
||||
max_prefetch_region,
|
||||
bitmap_tree,
|
||||
&va_block_context->scratch_page_mask,
|
||||
prefetch_pages);
|
||||
}
|
||||
|
||||
done:
|
||||
// Do not prefetch pages that are going to be migrated/populated due to a
|
||||
// fault
|
||||
uvm_page_mask_andnot(prefetch_pages, prefetch_pages, faulted_pages);
|
||||
@@ -364,31 +411,58 @@ done:
|
||||
return uvm_page_mask_weight(prefetch_pages);
|
||||
}
|
||||
|
||||
void uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t new_residency,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
uvm_va_block_region_t faulted_region,
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_perf_prefetch_hint_t *out_hint)
|
||||
bool uvm_perf_prefetch_enabled(uvm_va_space_t *va_space)
|
||||
{
|
||||
if (!g_uvm_perf_prefetch_enable)
|
||||
return false;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
return va_space->test.page_prefetch_enabled;
|
||||
}
|
||||
|
||||
void uvm_perf_prefetch_compute_ats(uvm_va_space_t *va_space,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
uvm_va_block_region_t faulted_region,
|
||||
uvm_va_block_region_t max_prefetch_region,
|
||||
const uvm_page_mask_t *residency_mask,
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_page_mask_t *out_prefetch_mask)
|
||||
{
|
||||
UVM_ASSERT(faulted_pages);
|
||||
UVM_ASSERT(bitmap_tree);
|
||||
UVM_ASSERT(out_prefetch_mask);
|
||||
|
||||
uvm_page_mask_zero(out_prefetch_mask);
|
||||
|
||||
if (!uvm_perf_prefetch_enabled(va_space))
|
||||
return;
|
||||
|
||||
init_bitmap_tree_from_region(bitmap_tree, max_prefetch_region, residency_mask, faulted_pages);
|
||||
|
||||
compute_prefetch_mask(faulted_region, max_prefetch_region, bitmap_tree, faulted_pages, out_prefetch_mask);
|
||||
}
|
||||
|
||||
void uvm_perf_prefetch_get_hint_va_block(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t new_residency,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
uvm_va_block_region_t faulted_region,
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_perf_prefetch_hint_t *out_hint)
|
||||
{
|
||||
const uvm_va_policy_t *policy = va_block_context->policy;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_page_mask_t *prefetch_pages = &out_hint->prefetch_pages_mask;
|
||||
NvU32 pending_prefetch_pages;
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, faulted_region));
|
||||
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context, faulted_region));
|
||||
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context->hmm.vma, faulted_region));
|
||||
|
||||
out_hint->residency = UVM_ID_INVALID;
|
||||
uvm_page_mask_zero(prefetch_pages);
|
||||
|
||||
if (!g_uvm_perf_prefetch_enable)
|
||||
return;
|
||||
|
||||
if (!va_space->test.page_prefetch_enabled)
|
||||
if (!uvm_perf_prefetch_enabled(va_space))
|
||||
return;
|
||||
|
||||
pending_prefetch_pages = uvm_perf_prefetch_prenotify_fault_migrations(va_block,
|
||||
|
||||
@@ -61,21 +61,41 @@ typedef struct
|
||||
// Global initialization function (no clean up needed).
|
||||
NV_STATUS uvm_perf_prefetch_init(void);
|
||||
|
||||
// Returns whether prefetching is enabled in the VA space.
|
||||
// va_space cannot be NULL.
|
||||
bool uvm_perf_prefetch_enabled(uvm_va_space_t *va_space);
|
||||
|
||||
// Return the prefetch mask with the pages that may be prefetched in a ATS
|
||||
// block. ATS block is a system allocated memory block with base aligned to
|
||||
// UVM_VA_BLOCK_SIZE and a maximum size of UVM_VA_BLOCK_SIZE. The faulted_pages
|
||||
// mask and faulted_region are the pages being faulted on the given residency.
|
||||
//
|
||||
// Only residency_mask can be NULL.
|
||||
//
|
||||
// Locking: The caller must hold the va_space lock.
|
||||
void uvm_perf_prefetch_compute_ats(uvm_va_space_t *va_space,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
uvm_va_block_region_t faulted_region,
|
||||
uvm_va_block_region_t max_prefetch_region,
|
||||
const uvm_page_mask_t *residency_mask,
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_page_mask_t *out_prefetch_mask);
|
||||
|
||||
// Return a hint with the pages that may be prefetched in the block.
|
||||
// The faulted_pages mask and faulted_region are the pages being migrated to
|
||||
// the given residency.
|
||||
// va_block_context must not be NULL, va_block_context->policy must be valid,
|
||||
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
|
||||
// which also means the va_block_context->mm is not NULL, retained, and locked
|
||||
// for at least read.
|
||||
// va_block_context must not be NULL, and if the va_block is a HMM
|
||||
// block, va_block_context->hmm.vma must be valid which also means the
|
||||
// va_block_context->mm is not NULL, retained, and locked for at least
|
||||
// read.
|
||||
// Locking: The caller must hold the va_space lock and va_block lock.
|
||||
void uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t new_residency,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
uvm_va_block_region_t faulted_region,
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_perf_prefetch_hint_t *out_hint);
|
||||
void uvm_perf_prefetch_get_hint_va_block(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t new_residency,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
uvm_va_block_region_t faulted_region,
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_perf_prefetch_hint_t *out_hint);
|
||||
|
||||
void uvm_perf_prefetch_bitmap_tree_iter_init(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_page_index_t page_index,
|
||||
|
||||
@@ -1095,7 +1095,7 @@ static NV_STATUS unmap_remote_pinned_pages(uvm_va_block_t *va_block,
|
||||
NV_STATUS tracker_status;
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
uvm_processor_id_t processor_id;
|
||||
const uvm_va_policy_t *policy = va_block_context->policy;
|
||||
const uvm_va_policy_t *policy = uvm_va_policy_get(va_block, uvm_va_block_region_start(va_block, region));
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
@@ -1141,10 +1141,9 @@ NV_STATUS uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t *va_bl
|
||||
{
|
||||
block_thrashing_info_t *block_thrashing;
|
||||
uvm_processor_mask_t unmap_processors;
|
||||
const uvm_va_policy_t *policy = va_block_context->policy;
|
||||
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, region));
|
||||
|
||||
block_thrashing = thrashing_info_get(va_block);
|
||||
if (!block_thrashing || !block_thrashing->pages)
|
||||
@@ -1867,8 +1866,6 @@ static void thrashing_unpin_pages(struct work_struct *work)
|
||||
UVM_ASSERT(uvm_page_mask_test(&block_thrashing->pinned_pages.mask, page_index));
|
||||
|
||||
uvm_va_block_context_init(va_block_context, NULL);
|
||||
va_block_context->policy =
|
||||
uvm_va_policy_get(va_block, uvm_va_block_cpu_page_address(va_block, page_index));
|
||||
|
||||
uvm_perf_thrashing_unmap_remote_pinned_pages_all(va_block,
|
||||
va_block_context,
|
||||
@@ -2123,8 +2120,6 @@ NV_STATUS uvm_test_set_page_thrashing_policy(UVM_TEST_SET_PAGE_THRASHING_POLICY_
|
||||
uvm_va_block_region_t va_block_region = uvm_va_block_region_from_block(va_block);
|
||||
uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
|
||||
|
||||
block_context->policy = uvm_va_range_get_policy(va_range);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
// Unmap may split PTEs and require a retry. Needs to be called
|
||||
|
||||
@@ -103,11 +103,11 @@ void uvm_perf_thrashing_unload(uvm_va_space_t *va_space);
|
||||
// Destroy the thrashing detection struct for the given block.
|
||||
void uvm_perf_thrashing_info_destroy(uvm_va_block_t *va_block);
|
||||
|
||||
// Unmap remote mappings from all processors on the pinned pages
|
||||
// described by region and block_thrashing->pinned pages.
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid() in uvm_va_block.h.
|
||||
// Locking: the va_block lock must be held.
|
||||
// Unmap remote mappings from all processors on the pinned pages described by
|
||||
// region and block_thrashing->pinned pages. va_block_context must not be NULL
|
||||
// and policy for the region must match. See the comments for
|
||||
// uvm_va_block_check_policy_is_valid() in uvm_va_block.h. Locking: the
|
||||
// va_block lock must be held.
|
||||
NV_STATUS uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
@@ -3820,18 +3820,11 @@ NV_STATUS uvm_test_evict_chunk(UVM_TEST_EVICT_CHUNK_PARAMS *params, struct file
|
||||
// For virtual mode, look up and retain the block first so that eviction can
|
||||
// be started without the VA space lock held.
|
||||
if (params->eviction_mode == UvmTestEvictModeVirtual) {
|
||||
uvm_va_block_context_t *block_context;
|
||||
if (mm)
|
||||
status = uvm_va_block_find_create(va_space, params->address, NULL, &block);
|
||||
else
|
||||
status = uvm_va_block_find_create_managed(va_space, params->address, &block);
|
||||
|
||||
block_context = uvm_va_block_context_alloc(mm);
|
||||
if (!block_context) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = uvm_va_block_find_create(va_space, params->address, block_context, &block);
|
||||
uvm_va_block_context_free(block_context);
|
||||
if (status != NV_OK) {
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -324,7 +324,7 @@ static NV_STATUS gpu_mem_check(uvm_gpu_t *gpu,
|
||||
|
||||
// TODO: Bug 3839176: [UVM][HCC][uvm_test] Update tests that assume GPU
|
||||
// engines can directly access sysmem
|
||||
// Skip this test for now. To enable this test under SEV,
|
||||
// Skip this test for now. To enable this test in Confidential Computing,
|
||||
// The GPU->CPU CE copy needs to be updated so it uses encryption when
|
||||
// CC is enabled.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
@@ -1223,8 +1223,6 @@ static NV_STATUS test_indirect_peers(uvm_gpu_t *owning_gpu, uvm_gpu_t *accessing
|
||||
if (!chunks)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
UVM_ASSERT(!g_uvm_global.sev_enabled);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(UVM_CHUNK_SIZE_MAX, current->mm, &verif_mem), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, owning_gpu), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, accessing_gpu), out);
|
||||
|
||||
@@ -160,7 +160,7 @@ static NV_STATUS preferred_location_unmap_remote_pages(uvm_va_block_t *va_block,
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS tracker_status;
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
const uvm_va_policy_t *policy = va_block_context->policy;
|
||||
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
|
||||
uvm_processor_id_t preferred_location = policy->preferred_location;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
const uvm_page_mask_t *mapped_mask;
|
||||
@@ -279,6 +279,9 @@ static NV_STATUS preferred_location_set(uvm_va_space_t *va_space,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
if (!mm)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
return uvm_hmm_set_preferred_location(va_space, preferred_location, base, last_address, out_tracker);
|
||||
}
|
||||
|
||||
@@ -445,7 +448,6 @@ NV_STATUS uvm_va_block_set_accessed_by_locked(uvm_va_block_t *va_block,
|
||||
NV_STATUS tracker_status;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
|
||||
|
||||
status = uvm_va_block_add_mappings(va_block,
|
||||
va_block_context,
|
||||
@@ -467,13 +469,13 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region = uvm_va_block_region_from_block(va_block);
|
||||
NV_STATUS status;
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
|
||||
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
|
||||
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
|
||||
|
||||
// Read duplication takes precedence over SetAccessedBy. Do not add mappings
|
||||
// if read duplication is enabled.
|
||||
if (uvm_va_policy_is_read_duplicate(va_block_context->policy, va_space))
|
||||
if (uvm_va_policy_is_read_duplicate(policy, va_space))
|
||||
return NV_OK;
|
||||
|
||||
status = UVM_VA_BLOCK_LOCK_RETRY(va_block,
|
||||
@@ -592,8 +594,15 @@ static NV_STATUS accessed_by_set(uvm_va_space_t *va_space,
|
||||
UVM_ASSERT(va_range_last->node.end >= last_address);
|
||||
}
|
||||
else {
|
||||
// NULL mm case already filtered by uvm_api_range_type_check()
|
||||
UVM_ASSERT(mm);
|
||||
UVM_ASSERT(type == UVM_API_RANGE_TYPE_HMM);
|
||||
status = uvm_hmm_set_accessed_by(va_space, processor_id, set_bit, base, last_address, &local_tracker);
|
||||
status = uvm_hmm_set_accessed_by(va_space,
|
||||
processor_id,
|
||||
set_bit,
|
||||
base,
|
||||
last_address,
|
||||
&local_tracker);
|
||||
}
|
||||
|
||||
done:
|
||||
@@ -656,7 +665,6 @@ NV_STATUS uvm_va_block_set_read_duplication(uvm_va_block_t *va_block,
|
||||
|
||||
// TODO: Bug 3660922: need to implement HMM read duplication support.
|
||||
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
|
||||
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
|
||||
|
||||
status = UVM_VA_BLOCK_LOCK_RETRY(va_block, &va_block_retry,
|
||||
va_block_set_read_duplication_locked(va_block,
|
||||
@@ -675,7 +683,7 @@ static NV_STATUS va_block_unset_read_duplication_locked(uvm_va_block_t *va_block
|
||||
uvm_processor_id_t processor_id;
|
||||
uvm_va_block_region_t block_region = uvm_va_block_region_from_block(va_block);
|
||||
uvm_page_mask_t *break_read_duplication_pages = &va_block_context->caller_page_mask;
|
||||
const uvm_va_policy_t *policy = va_block_context->policy;
|
||||
const uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
uvm_processor_id_t preferred_location = policy->preferred_location;
|
||||
uvm_processor_mask_t accessed_by = policy->accessed_by;
|
||||
|
||||
@@ -757,7 +765,6 @@ NV_STATUS uvm_va_block_unset_read_duplication(uvm_va_block_t *va_block,
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
|
||||
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
|
||||
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
|
||||
|
||||
// Restore all SetAccessedBy mappings
|
||||
status = UVM_VA_BLOCK_LOCK_RETRY(va_block, &va_block_retry,
|
||||
@@ -915,7 +922,6 @@ static NV_STATUS system_wide_atomics_set(uvm_va_space_t *va_space, const NvProce
|
||||
if (va_range->type != UVM_VA_RANGE_TYPE_MANAGED)
|
||||
continue;
|
||||
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_range);
|
||||
for_each_va_block_in_va_range(va_range, va_block) {
|
||||
uvm_page_mask_t *non_resident_pages = &va_block_context->caller_page_mask;
|
||||
|
||||
|
||||
@@ -264,7 +264,6 @@ NV_STATUS uvm_range_group_va_range_migrate(uvm_va_range_t *va_range,
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
uvm_assert_rwsem_locked(&va_range->va_space->lock);
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_range);
|
||||
|
||||
// Iterate over blocks, populating them if necessary
|
||||
for (i = uvm_va_range_block_index(va_range, start); i <= uvm_va_range_block_index(va_range, end); ++i) {
|
||||
|
||||
@@ -2069,7 +2069,11 @@ static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space,
|
||||
|
||||
// The RM flavor of the lock is needed to perform ECC checks.
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
status = uvm_va_block_find_create(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), block_context, &block);
|
||||
if (mm)
|
||||
status = uvm_va_block_find_create(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block_context->hmm.vma, &block);
|
||||
else
|
||||
status = uvm_va_block_find_create_managed(va_space, UVM_PAGE_ALIGN_DOWN(target_va_start), &block);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto unlock_and_exit;
|
||||
|
||||
|
||||
@@ -106,36 +106,6 @@ uvm_va_space_t *uvm_va_block_get_va_space(uvm_va_block_t *va_block)
|
||||
return va_space;
|
||||
}
|
||||
|
||||
bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
|
||||
const uvm_va_policy_t *policy,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
const uvm_va_policy_node_t *node;
|
||||
|
||||
if (uvm_va_policy_is_default(policy)) {
|
||||
// There should only be the default policy within the region.
|
||||
node = uvm_va_policy_node_iter_first(va_block,
|
||||
uvm_va_block_region_start(va_block, region),
|
||||
uvm_va_block_region_end(va_block, region));
|
||||
UVM_ASSERT(!node);
|
||||
}
|
||||
else {
|
||||
// The policy node should cover the region.
|
||||
node = uvm_va_policy_node_from_policy(policy);
|
||||
UVM_ASSERT(node->node.start <= uvm_va_block_region_start(va_block, region));
|
||||
UVM_ASSERT(node->node.end >= uvm_va_block_region_end(va_block, region));
|
||||
}
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(policy == uvm_va_range_get_policy(va_block->va_range));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static NvU64 block_gpu_pte_flag_cacheable(uvm_va_block_t *block, uvm_gpu_t *gpu, uvm_processor_id_t resident_id)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
|
||||
@@ -3697,7 +3667,6 @@ NV_STATUS uvm_va_block_make_resident_copy(uvm_va_block_t *va_block,
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block) || va_block->va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
|
||||
|
||||
resident_mask = block_resident_mask_get_alloc(va_block, dest_id);
|
||||
if (!resident_mask)
|
||||
@@ -3944,7 +3913,6 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
|
||||
// TODO: Bug 3660922: need to implement HMM read duplication support.
|
||||
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
|
||||
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
|
||||
|
||||
va_block_context->make_resident.dest_id = dest_id;
|
||||
va_block_context->make_resident.cause = cause;
|
||||
@@ -4742,7 +4710,7 @@ static void block_unmap_cpu(uvm_va_block_t *block, uvm_va_block_region_t region,
|
||||
// Given a mask of mapped pages, returns true if any of the pages in the mask
|
||||
// are mapped remotely by the given GPU.
|
||||
static bool block_has_remote_mapping_gpu(uvm_va_block_t *block,
|
||||
uvm_va_block_context_t *block_context,
|
||||
uvm_page_mask_t *scratch_page_mask,
|
||||
uvm_gpu_id_t gpu_id,
|
||||
const uvm_page_mask_t *mapped_pages)
|
||||
{
|
||||
@@ -4764,7 +4732,7 @@ static bool block_has_remote_mapping_gpu(uvm_va_block_t *block,
|
||||
}
|
||||
|
||||
// Remote pages are pages which are mapped but not resident locally
|
||||
return uvm_page_mask_andnot(&block_context->scratch_page_mask, mapped_pages, &gpu_state->resident);
|
||||
return uvm_page_mask_andnot(scratch_page_mask, mapped_pages, &gpu_state->resident);
|
||||
}
|
||||
|
||||
// Writes pte_clear_val to the 4k PTEs covered by clear_page_mask. If
|
||||
@@ -6659,7 +6627,7 @@ static NV_STATUS block_unmap_gpu(uvm_va_block_t *block,
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
only_local_mappings = !block_has_remote_mapping_gpu(block, block_context, gpu->id, pages_to_unmap);
|
||||
only_local_mappings = !block_has_remote_mapping_gpu(block, &block_context->scratch_page_mask, gpu->id, pages_to_unmap);
|
||||
tlb_membar = uvm_hal_downgrade_membar_type(gpu, only_local_mappings);
|
||||
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager,
|
||||
@@ -6794,16 +6762,15 @@ static NV_STATUS uvm_cpu_insert_page(struct vm_area_struct *vma,
|
||||
}
|
||||
|
||||
static uvm_prot_t compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *hmm_vma,
|
||||
uvm_page_index_t page_index)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
uvm_prot_t logical_prot;
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
NvU64 addr = uvm_va_block_cpu_page_address(va_block, page_index);
|
||||
|
||||
logical_prot = uvm_hmm_compute_logical_prot(va_block, va_block_context, addr);
|
||||
logical_prot = uvm_hmm_compute_logical_prot(va_block, hmm_vma, addr);
|
||||
}
|
||||
else {
|
||||
uvm_va_range_t *va_range = va_block->va_range;
|
||||
@@ -6815,6 +6782,8 @@ static uvm_prot_t compute_logical_prot(uvm_va_block_t *va_block,
|
||||
logical_prot = UVM_PROT_NONE;
|
||||
}
|
||||
else {
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
vma = uvm_va_range_vma(va_range);
|
||||
|
||||
if (!(vma->vm_flags & VM_READ))
|
||||
@@ -6864,13 +6833,15 @@ static struct page *block_page_get(uvm_va_block_t *block, block_phys_page_t bloc
|
||||
// with new_prot permissions
|
||||
// - Guarantee that vm_insert_page is safe to use (vma->vm_mm has a reference
|
||||
// and mmap_lock is held in at least read mode)
|
||||
// - For HMM blocks that vma is valid and safe to use, vma->vm_mm has a
|
||||
// reference and mmap_lock is held in at least read mode
|
||||
// - Ensure that the struct page corresponding to the physical memory being
|
||||
// mapped exists
|
||||
// - Manage the block's residency bitmap
|
||||
// - Ensure that the block hasn't been killed (block->va_range is present)
|
||||
// - Update the pte/mapping tracking state on success
|
||||
static NV_STATUS block_map_cpu_page_to(uvm_va_block_t *block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *hmm_vma,
|
||||
uvm_processor_id_t resident_id,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_prot_t new_prot)
|
||||
@@ -6883,7 +6854,7 @@ static NV_STATUS block_map_cpu_page_to(uvm_va_block_t *block,
|
||||
NvU64 addr;
|
||||
struct page *page;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(block) || va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
|
||||
UVM_ASSERT((uvm_va_block_is_hmm(block) && hmm_vma) || va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
|
||||
UVM_ASSERT(new_prot != UVM_PROT_NONE);
|
||||
UVM_ASSERT(new_prot < UVM_PROT_MAX);
|
||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(resident_id)], UVM_ID_CPU));
|
||||
@@ -6904,7 +6875,7 @@ static NV_STATUS block_map_cpu_page_to(uvm_va_block_t *block,
|
||||
|
||||
// Check for existing VMA permissions. They could have been modified after
|
||||
// the initial mmap by mprotect.
|
||||
if (new_prot > compute_logical_prot(block, va_block_context, page_index))
|
||||
if (new_prot > compute_logical_prot(block, hmm_vma, page_index))
|
||||
return NV_ERR_INVALID_ACCESS_TYPE;
|
||||
|
||||
if (uvm_va_block_is_hmm(block)) {
|
||||
@@ -7001,7 +6972,7 @@ static NV_STATUS block_map_cpu_to(uvm_va_block_t *block,
|
||||
|
||||
for_each_va_block_page_in_region_mask(page_index, pages_to_map, region) {
|
||||
status = block_map_cpu_page_to(block,
|
||||
block_context,
|
||||
block_context->hmm.vma,
|
||||
resident_id,
|
||||
page_index,
|
||||
new_prot);
|
||||
@@ -7234,13 +7205,13 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
|
||||
const uvm_page_mask_t *pte_mask;
|
||||
uvm_page_mask_t *running_page_mask = &va_block_context->mapping.map_running_page_mask;
|
||||
NV_STATUS status;
|
||||
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
|
||||
|
||||
va_block_context->mapping.cause = cause;
|
||||
|
||||
UVM_ASSERT(new_prot != UVM_PROT_NONE);
|
||||
UVM_ASSERT(new_prot < UVM_PROT_MAX);
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
|
||||
|
||||
// Mapping is not supported on the eviction path that doesn't hold the VA
|
||||
// space lock.
|
||||
@@ -7282,7 +7253,7 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
|
||||
|
||||
// Map per resident location so we can more easily detect physically-
|
||||
// contiguous mappings.
|
||||
map_get_allowed_destinations(va_block, va_block_context, va_block_context->policy, id, &allowed_destinations);
|
||||
map_get_allowed_destinations(va_block, va_block_context, policy, id, &allowed_destinations);
|
||||
|
||||
for_each_closest_id(resident_id, &allowed_destinations, id, va_space) {
|
||||
if (UVM_ID_IS_CPU(id)) {
|
||||
@@ -7588,8 +7559,6 @@ NV_STATUS uvm_va_block_map_mask(uvm_va_block_t *va_block,
|
||||
NV_STATUS tracker_status;
|
||||
uvm_processor_id_t id;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
|
||||
|
||||
for_each_id_in_mask(id, map_processor_mask) {
|
||||
status = uvm_va_block_map(va_block,
|
||||
va_block_context,
|
||||
@@ -9573,7 +9542,7 @@ static bool block_region_might_read_duplicate(uvm_va_block_t *va_block,
|
||||
// could be changed in the future to optimize multiple faults/counters on
|
||||
// contiguous pages.
|
||||
static uvm_prot_t compute_new_permission(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct *hmm_vma,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t fault_processor_id,
|
||||
uvm_processor_id_t new_residency,
|
||||
@@ -9586,7 +9555,7 @@ static uvm_prot_t compute_new_permission(uvm_va_block_t *va_block,
|
||||
// query_promote: upgrade access privileges to avoid future faults IF
|
||||
// they don't trigger further revocations.
|
||||
new_prot = uvm_fault_access_type_to_prot(access_type);
|
||||
logical_prot = compute_logical_prot(va_block, va_block_context, page_index);
|
||||
logical_prot = compute_logical_prot(va_block, hmm_vma, page_index);
|
||||
|
||||
UVM_ASSERT(logical_prot >= new_prot);
|
||||
|
||||
@@ -9729,11 +9698,10 @@ NV_STATUS uvm_va_block_add_mappings_after_migration(uvm_va_block_t *va_block,
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
const uvm_page_mask_t *final_page_mask = map_page_mask;
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
const uvm_va_policy_t *policy = va_block_context->policy;
|
||||
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
|
||||
uvm_processor_id_t preferred_location;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, region));
|
||||
|
||||
// Read duplication takes precedence over SetAccessedBy.
|
||||
//
|
||||
@@ -9959,8 +9927,6 @@ NV_STATUS uvm_va_block_add_mappings(uvm_va_block_t *va_block,
|
||||
uvm_range_group_range_iter_t iter;
|
||||
uvm_prot_t prot_to_map;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
|
||||
|
||||
if (UVM_ID_IS_CPU(processor_id) && !uvm_va_block_is_hmm(va_block)) {
|
||||
if (!uvm_va_range_vma_check(va_range, va_block_context->mm))
|
||||
return NV_OK;
|
||||
@@ -10207,11 +10173,8 @@ uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
|
||||
{
|
||||
uvm_processor_id_t id;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
|
||||
va_block_context->policy,
|
||||
uvm_va_block_region_for_page(page_index)));
|
||||
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block,
|
||||
va_block_context,
|
||||
va_block_context->hmm.vma,
|
||||
uvm_va_block_region_for_page(page_index)));
|
||||
|
||||
id = block_select_residency(va_block,
|
||||
@@ -10255,6 +10218,7 @@ static bool check_access_counters_dont_revoke(uvm_va_block_t *block,
|
||||
// Update service_context->prefetch_hint, service_context->per_processor_masks,
|
||||
// and service_context->region.
|
||||
static void uvm_va_block_get_prefetch_hint(uvm_va_block_t *va_block,
|
||||
const uvm_va_policy_t *policy,
|
||||
uvm_service_block_context_t *service_context)
|
||||
{
|
||||
uvm_processor_id_t new_residency;
|
||||
@@ -10265,20 +10229,19 @@ static void uvm_va_block_get_prefetch_hint(uvm_va_block_t *va_block,
|
||||
if (uvm_processor_mask_get_count(&service_context->resident_processors) == 1) {
|
||||
uvm_page_index_t page_index;
|
||||
uvm_page_mask_t *new_residency_mask;
|
||||
const uvm_va_policy_t *policy = service_context->block_context.policy;
|
||||
|
||||
new_residency = uvm_processor_mask_find_first_id(&service_context->resident_processors);
|
||||
new_residency_mask = &service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency;
|
||||
|
||||
// Update prefetch tracking structure with the pages that will migrate
|
||||
// due to faults
|
||||
uvm_perf_prefetch_get_hint(va_block,
|
||||
&service_context->block_context,
|
||||
new_residency,
|
||||
new_residency_mask,
|
||||
service_context->region,
|
||||
&service_context->prefetch_bitmap_tree,
|
||||
&service_context->prefetch_hint);
|
||||
uvm_perf_prefetch_get_hint_va_block(va_block,
|
||||
&service_context->block_context,
|
||||
new_residency,
|
||||
new_residency_mask,
|
||||
service_context->region,
|
||||
&service_context->prefetch_bitmap_tree,
|
||||
&service_context->prefetch_hint);
|
||||
|
||||
// Obtain the prefetch hint and give a fake fault access type to the
|
||||
// prefetched pages
|
||||
@@ -10463,7 +10426,7 @@ NV_STATUS uvm_va_block_service_finish(uvm_processor_id_t processor_id,
|
||||
|
||||
for_each_va_block_page_in_region_mask(page_index, new_residency_mask, service_context->region) {
|
||||
new_prot = compute_new_permission(va_block,
|
||||
&service_context->block_context,
|
||||
service_context->block_context.hmm.vma,
|
||||
page_index,
|
||||
processor_id,
|
||||
new_residency,
|
||||
@@ -10706,11 +10669,8 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
|
||||
service_context->block_context.policy,
|
||||
service_context->region));
|
||||
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block,
|
||||
&service_context->block_context,
|
||||
service_context->block_context.hmm.vma,
|
||||
service_context->region));
|
||||
|
||||
// GPU fault servicing must be done under the VA space read lock. GPU fault
|
||||
@@ -10724,7 +10684,9 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
else
|
||||
uvm_assert_rwsem_locked_read(&va_space->lock);
|
||||
|
||||
uvm_va_block_get_prefetch_hint(va_block, service_context);
|
||||
uvm_va_block_get_prefetch_hint(va_block,
|
||||
uvm_va_policy_get_region(va_block, service_context->region),
|
||||
service_context);
|
||||
|
||||
for_each_id_in_mask(new_residency, &service_context->resident_processors) {
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
@@ -10757,11 +10719,8 @@ NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
|
||||
uvm_va_range_t *va_range = va_block->va_range;
|
||||
uvm_prot_t access_prot = uvm_fault_access_type_to_prot(access_type);
|
||||
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
|
||||
va_block_context->policy,
|
||||
uvm_va_block_region_for_page(page_index)));
|
||||
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block,
|
||||
va_block_context,
|
||||
va_block_context->hmm.vma,
|
||||
uvm_va_block_region_for_page(page_index)));
|
||||
|
||||
// CPU permissions are checked later by block_map_cpu_page.
|
||||
@@ -10779,8 +10738,8 @@ NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
|
||||
// vm_flags at any moment (for example on mprotect) and here we are not
|
||||
// guaranteed to have vma->vm_mm->mmap_lock. During tests we ensure that
|
||||
// this scenario does not happen.
|
||||
if ((va_block_context->mm || uvm_enable_builtin_tests) &&
|
||||
(access_prot > compute_logical_prot(va_block, va_block_context, page_index)))
|
||||
if (((va_block->hmm.va_space && va_block->hmm.va_space->va_space_mm.mm) || uvm_enable_builtin_tests) &&
|
||||
(access_prot > compute_logical_prot(va_block, va_block_context->hmm.vma, page_index)))
|
||||
return NV_ERR_INVALID_ACCESS_TYPE;
|
||||
}
|
||||
|
||||
@@ -10866,6 +10825,7 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
|
||||
uvm_perf_thrashing_hint_t thrashing_hint;
|
||||
uvm_processor_id_t new_residency;
|
||||
bool read_duplicate;
|
||||
const uvm_va_policy_t *policy;
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
@@ -10874,13 +10834,13 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
|
||||
|
||||
uvm_assert_mmap_lock_locked(service_context->block_context.mm);
|
||||
|
||||
service_context->block_context.policy = uvm_va_policy_get(va_block, fault_addr);
|
||||
policy = uvm_va_policy_get(va_block, fault_addr);
|
||||
|
||||
if (service_context->num_retries == 0) {
|
||||
// notify event to tools/performance heuristics
|
||||
uvm_perf_event_notify_cpu_fault(&va_space->perf_events,
|
||||
va_block,
|
||||
service_context->block_context.policy->preferred_location,
|
||||
policy->preferred_location,
|
||||
fault_addr,
|
||||
fault_access_type > UVM_FAULT_ACCESS_TYPE_READ,
|
||||
KSTK_EIP(current));
|
||||
@@ -10925,7 +10885,7 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
|
||||
page_index,
|
||||
UVM_ID_CPU,
|
||||
uvm_fault_access_type_mask_bit(fault_access_type),
|
||||
service_context->block_context.policy,
|
||||
policy,
|
||||
&thrashing_hint,
|
||||
UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS,
|
||||
&read_duplicate);
|
||||
@@ -11025,7 +10985,6 @@ NV_STATUS uvm_va_block_find(uvm_va_space_t *va_space, NvU64 addr, uvm_va_block_t
|
||||
NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
|
||||
uvm_va_range_t *va_range,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_t **out_block)
|
||||
{
|
||||
size_t index;
|
||||
@@ -11033,12 +10992,7 @@ NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
|
||||
if (uvm_enable_builtin_tests && atomic_dec_if_positive(&va_space->test.va_block_allocation_fail_nth) == 0)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
if (!va_range) {
|
||||
if (!va_block_context || !va_block_context->mm)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
return uvm_hmm_va_block_find_create(va_space, addr, va_block_context, out_block);
|
||||
}
|
||||
|
||||
UVM_ASSERT(va_range);
|
||||
UVM_ASSERT(addr >= va_range->node.start);
|
||||
UVM_ASSERT(addr <= va_range->node.end);
|
||||
|
||||
@@ -11052,14 +11006,32 @@ NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
|
||||
return uvm_va_range_block_create(va_range, index, out_block);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_t **out_block)
|
||||
{
|
||||
uvm_va_range_t *va_range = uvm_va_range_find(va_space, addr);
|
||||
|
||||
return uvm_va_block_find_create_in_range(va_space, va_range, addr, va_block_context, out_block);
|
||||
if (va_range)
|
||||
return uvm_va_block_find_create_in_range(va_space, va_range, addr, out_block);
|
||||
else
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
struct vm_area_struct **hmm_vma,
|
||||
uvm_va_block_t **out_block)
|
||||
{
|
||||
uvm_va_range_t *va_range = uvm_va_range_find(va_space, addr);
|
||||
|
||||
if (hmm_vma)
|
||||
*hmm_vma = NULL;
|
||||
|
||||
if (va_range)
|
||||
return uvm_va_block_find_create_in_range(va_space, va_range, addr, out_block);
|
||||
else
|
||||
return uvm_hmm_va_block_find_create(va_space, addr, hmm_vma, out_block);
|
||||
}
|
||||
|
||||
// Launch a synchronous, encrypted copy between GPU and CPU.
|
||||
@@ -11236,8 +11208,6 @@ NV_STATUS uvm_va_block_write_from_cpu(uvm_va_block_t *va_block,
|
||||
if (UVM_ID_IS_INVALID(proc))
|
||||
proc = UVM_ID_CPU;
|
||||
|
||||
block_context->policy = uvm_va_policy_get(va_block, dst);
|
||||
|
||||
// Use make_resident() in all cases to break read-duplication, but
|
||||
// block_retry can be NULL as if the page is not resident yet we will make
|
||||
// it resident on the CPU.
|
||||
@@ -11406,7 +11376,6 @@ static void block_add_eviction_mappings(void *args)
|
||||
uvm_va_range_t *va_range = va_block->va_range;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
block_context->policy = uvm_va_range_get_policy(va_range);
|
||||
for_each_id_in_mask(id, &uvm_va_range_get_policy(va_range)->accessed_by) {
|
||||
status = uvm_va_block_set_accessed_by(va_block, block_context, id);
|
||||
if (status != NV_OK)
|
||||
@@ -11557,8 +11526,8 @@ NV_STATUS uvm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
&accessed_by_set);
|
||||
}
|
||||
else {
|
||||
block_context->policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
accessed_by_set = uvm_processor_mask_get_count(&block_context->policy->accessed_by) > 0;
|
||||
const uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
accessed_by_set = uvm_processor_mask_get_count(&policy->accessed_by) > 0;
|
||||
|
||||
// TODO: Bug 1765193: make_resident() breaks read-duplication, but it's
|
||||
// not necessary to do so for eviction. Add a version that unmaps only
|
||||
@@ -11749,19 +11718,16 @@ NV_STATUS uvm_test_va_block_inject_error(UVM_TEST_VA_BLOCK_INJECT_ERROR_PARAMS *
|
||||
struct mm_struct *mm;
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_va_block_test_t *va_block_test;
|
||||
uvm_va_block_context_t *block_context = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
mm = uvm_va_space_mm_or_current_retain_lock(va_space);
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
block_context = uvm_va_block_context_alloc(mm);
|
||||
if (!block_context) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
if (mm)
|
||||
status = uvm_va_block_find_create(va_space, params->lookup_address, NULL, &va_block);
|
||||
else
|
||||
status = uvm_va_block_find_create_managed(va_space, params->lookup_address, &va_block);
|
||||
|
||||
status = uvm_va_block_find_create(va_space, params->lookup_address, block_context, &va_block);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
@@ -11801,7 +11767,6 @@ block_unlock:
|
||||
out:
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
|
||||
uvm_va_block_context_free(block_context);
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -11872,7 +11837,11 @@ NV_STATUS uvm_test_change_pte_mapping(UVM_TEST_CHANGE_PTE_MAPPING_PARAMS *params
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = uvm_va_block_find_create(va_space, params->va, block_context, &block);
|
||||
if (mm)
|
||||
status = uvm_va_block_find_create(va_space, params->va, &block_context->hmm.vma, &block);
|
||||
else
|
||||
status = uvm_va_block_find_create_managed(va_space, params->va, &block);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
@@ -11899,8 +11868,6 @@ NV_STATUS uvm_test_change_pte_mapping(UVM_TEST_CHANGE_PTE_MAPPING_PARAMS *params
|
||||
goto out_block;
|
||||
}
|
||||
|
||||
block_context->policy = uvm_va_policy_get(block, params->va);
|
||||
|
||||
if (new_prot == UVM_PROT_NONE) {
|
||||
status = uvm_va_block_unmap(block, block_context, id, region, NULL, &block->tracker);
|
||||
}
|
||||
|
||||
@@ -453,11 +453,12 @@ struct uvm_va_block_struct
|
||||
NvU16 fault_migrations_to_last_proc;
|
||||
} prefetch_info;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
struct
|
||||
{
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
// The MMU notifier is registered per va_block.
|
||||
struct mmu_interval_notifier notifier;
|
||||
#endif
|
||||
|
||||
// This is used to serialize migrations between CPU and GPU while
|
||||
// allowing the va_block lock to be dropped.
|
||||
@@ -487,7 +488,6 @@ struct uvm_va_block_struct
|
||||
// Storage node for range tree of va_blocks.
|
||||
uvm_range_tree_node_t node;
|
||||
} hmm;
|
||||
#endif
|
||||
};
|
||||
|
||||
// We define additional per-VA Block fields for testing. When
|
||||
@@ -678,18 +678,8 @@ static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context,
|
||||
memset(va_block_context, 0xff, sizeof(*va_block_context));
|
||||
|
||||
va_block_context->mm = mm;
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
va_block_context->hmm.vma = NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Check that a single policy covers the given region for the given va_block.
|
||||
// This always returns true and is intended to only be used with UVM_ASSERT().
|
||||
// Locking: the va_block lock must be held.
|
||||
bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
|
||||
const uvm_va_policy_t *policy,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// TODO: Bug 1766480: Using only page masks instead of a combination of regions
|
||||
// and page masks could simplify the below APIs and their implementations
|
||||
// at the cost of having to scan the whole mask for small regions.
|
||||
@@ -734,15 +724,15 @@ bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
|
||||
// user memory is guaranteed not to happen. Allocation-retry of GPU page tables
|
||||
// can still occur though.
|
||||
//
|
||||
// va_block_context must not be NULL. This function will set a bit in
|
||||
// va_block_context->make_resident.pages_changed_residency for each page that
|
||||
// changed residency (due to a migration or first population) as a result of the
|
||||
// operation and va_block_context->make_resident.all_involved_processors for
|
||||
// each processor involved in the copy. This function only sets bits in those
|
||||
// masks. It is the caller's responsiblity to zero the masks or not first.
|
||||
//
|
||||
// va_block_context->policy must also be set by the caller for the given region.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
// va_block_context must not be NULL and policy for the region must
|
||||
// match. This function will set a bit in
|
||||
// va_block_context->make_resident.pages_changed_residency for each
|
||||
// page that changed residency (due to a migration or first
|
||||
// population) as a result of the operation and
|
||||
// va_block_context->make_resident.all_involved_processors for each
|
||||
// processor involved in the copy. This function only sets bits in
|
||||
// those masks. It is the caller's responsiblity to zero the masks or
|
||||
// not first.
|
||||
//
|
||||
// Notably any status other than NV_OK indicates that the block's lock might
|
||||
// have been unlocked and relocked.
|
||||
@@ -839,7 +829,7 @@ void uvm_va_block_make_resident_finish(uvm_va_block_t *va_block,
|
||||
// pages because the earlier operation can cause a PTE split or merge which is
|
||||
// assumed by the later operation.
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// va_block_context must not be NULL and policy for the region must match.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// If allocation-retry was required as part of the operation and was successful,
|
||||
@@ -896,7 +886,7 @@ NV_STATUS uvm_va_block_map_mask(uvm_va_block_t *va_block,
|
||||
// pages because the earlier operation can cause a PTE split or merge which is
|
||||
// assumed by the later operation.
|
||||
//
|
||||
// va_block_context must not be NULL. The va_block_context->policy is unused.
|
||||
// va_block_context must not be NULL.
|
||||
//
|
||||
// If allocation-retry was required as part of the operation and was successful,
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the
|
||||
@@ -929,7 +919,7 @@ NV_STATUS uvm_va_block_unmap_mask(uvm_va_block_t *va_block,
|
||||
// - Unmap the preferred location's processor from any pages in this region
|
||||
// which are not resident on the preferred location.
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// va_block_context must not be NULL and policy for the region must match.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// LOCKING: The caller must hold the VA block lock.
|
||||
@@ -941,7 +931,7 @@ NV_STATUS uvm_va_block_set_preferred_location_locked(uvm_va_block_t *va_block,
|
||||
// location and policy. Waits for the operation to complete before returning.
|
||||
// This function should only be called with managed va_blocks.
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// va_block_context must not be NULL and policy for the region must match.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// LOCKING: This takes and releases the VA block lock. If va_block_context->mm
|
||||
@@ -956,7 +946,7 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
|
||||
// the tracker after all mappings have been started.
|
||||
// This function can be called with HMM and managed va_blocks.
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// va_block_context must not be NULL and policy for the region must match.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock and
|
||||
@@ -970,7 +960,7 @@ NV_STATUS uvm_va_block_set_accessed_by_locked(uvm_va_block_t *va_block,
|
||||
// Breaks SetAccessedBy and remote mappings
|
||||
// This function should only be called with managed va_blocks.
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// va_block_context must not be NULL and policy for the region must match.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// LOCKING: This takes and releases the VA block lock. If va_block_context->mm
|
||||
@@ -982,7 +972,7 @@ NV_STATUS uvm_va_block_set_read_duplication(uvm_va_block_t *va_block,
|
||||
// Restores SetAccessedBy mappings
|
||||
// This function should only be called with managed va_blocks.
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// va_block_context must not be NULL and policy for the region must match.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// LOCKING: This takes and releases the VA block lock. If va_block_context->mm
|
||||
@@ -1002,10 +992,9 @@ NV_STATUS uvm_va_block_unset_read_duplication(uvm_va_block_t *va_block,
|
||||
// NV_ERR_INVALID_OPERATION The access would violate the policies specified
|
||||
// by UvmPreventMigrationRangeGroups.
|
||||
//
|
||||
// va_block_context must not be NULL, va_block_context->policy must be valid,
|
||||
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
|
||||
// which also means the va_block_context->mm is not NULL, retained, and locked
|
||||
// for at least read.
|
||||
// va_block_context must not be NULL, policy must match, and if the va_block is
|
||||
// a HMM block, va_block_context->hmm.vma must be valid which also means the
|
||||
// va_block_context->mm is not NULL, retained, and locked for at least read.
|
||||
// Locking: the va_block lock must be held.
|
||||
NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
@@ -1041,7 +1030,7 @@ NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
|
||||
// different pages because the earlier operation can cause a PTE split or merge
|
||||
// which is assumed by the later operation.
|
||||
//
|
||||
// va_block_context must not be NULL. The va_block_context->policy is unused.
|
||||
// va_block_context must not be NULL.
|
||||
//
|
||||
// If allocation-retry was required as part of the operation and was successful,
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the
|
||||
@@ -1081,7 +1070,7 @@ NV_STATUS uvm_va_block_revoke_prot_mask(uvm_va_block_t *va_block,
|
||||
// processor_id, which triggered the migration and should have already been
|
||||
// mapped).
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// va_block_context must not be NULL and policy for the region must match.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// This function acquires/waits for the va_block tracker and updates that
|
||||
@@ -1112,7 +1101,7 @@ NV_STATUS uvm_va_block_add_mappings_after_migration(uvm_va_block_t *va_block,
|
||||
// Note that this can return NV_ERR_MORE_PROCESSING_REQUIRED just like
|
||||
// uvm_va_block_map() indicating that the operation needs to be retried.
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// va_block_context must not be NULL and policy must for the region must match.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// LOCKING: The caller must hold the va block lock. If va_block_context->mm !=
|
||||
@@ -1134,7 +1123,7 @@ NV_STATUS uvm_va_block_add_gpu_va_space(uvm_va_block_t *va_block, uvm_gpu_va_spa
|
||||
// If mm != NULL, that mm is used for any CPU mappings which may be created as
|
||||
// a result of this call. See uvm_va_block_context_t::mm for details.
|
||||
//
|
||||
// va_block_context must not be NULL. The va_block_context->policy is unused.
|
||||
// va_block_context must not be NULL.
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock. If block_context->mm is not
|
||||
// NULL, the caller must hold mm->mmap_lock in at least read mode.
|
||||
@@ -1225,7 +1214,6 @@ NV_STATUS uvm_va_block_split_locked(uvm_va_block_t *existing_va_block,
|
||||
// - va_space lock must be held in at least read mode
|
||||
//
|
||||
// service_context->block_context.mm is ignored and vma->vm_mm is used instead.
|
||||
// service_context->block_context.policy is set by this function.
|
||||
//
|
||||
// Returns NV_ERR_INVALID_ACCESS_TYPE if a CPU mapping to fault_addr cannot be
|
||||
// accessed, for example because it's within a range group which is non-
|
||||
@@ -1239,10 +1227,10 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
|
||||
// (migrations, cache invalidates, etc.) in response to the given service block
|
||||
// context.
|
||||
//
|
||||
// service_context must not be NULL and service_context->block_context.policy
|
||||
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
|
||||
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
|
||||
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context must not be NULL and policy for service_context->region must
|
||||
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
|
||||
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
|
||||
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context->prefetch_hint is set by this function.
|
||||
//
|
||||
// Locking:
|
||||
@@ -1267,10 +1255,10 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
// Performs population of the destination pages, unmapping and copying source
|
||||
// pages to new_residency.
|
||||
//
|
||||
// service_context must not be NULL and service_context->block_context.policy
|
||||
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
|
||||
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
|
||||
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context must not be NULL and policy for service_context->region must
|
||||
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
|
||||
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
|
||||
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context->prefetch_hint should be set before calling this function.
|
||||
//
|
||||
// Locking:
|
||||
@@ -1296,10 +1284,10 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
|
||||
// This updates the va_block residency state and maps the faulting processor_id
|
||||
// to the new residency (which may be remote).
|
||||
//
|
||||
// service_context must not be NULL and service_context->block_context.policy
|
||||
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
|
||||
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
|
||||
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context must not be NULL and policy for service_context->region must
|
||||
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
|
||||
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
|
||||
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context must be initialized by calling uvm_va_block_service_copy()
|
||||
// before calling this function.
|
||||
//
|
||||
@@ -1428,40 +1416,34 @@ const uvm_page_mask_t *uvm_va_block_map_mask_get(uvm_va_block_t *block, uvm_proc
|
||||
NV_STATUS uvm_va_block_find(uvm_va_space_t *va_space, NvU64 addr, uvm_va_block_t **out_block);
|
||||
|
||||
// Same as uvm_va_block_find except that the block is created if not found.
|
||||
// If addr is covered by a UVM_VA_RANGE_TYPE_MANAGED va_range, a managed block
|
||||
// will be created. Otherwise, if addr is not covered by any va_range, HMM is
|
||||
// enabled in the va_space, and va_block_context and va_block_context->mm are
|
||||
// non-NULL, then a HMM block will be created and va_block_context->hmm.vma is
|
||||
// set to the VMA covering 'addr'. The va_block_context->policy field is left
|
||||
// unchanged.
|
||||
// In either case, if va_block_context->mm is non-NULL, it must be retained and
|
||||
// locked in at least read mode. Return values:
|
||||
// If addr is covered by a UVM_VA_RANGE_TYPE_MANAGED va_range a managed block
|
||||
// will be created. If addr is not covered by any va_range and HMM is
|
||||
// enabled in the va_space then a HMM block will be created and hmm_vma is
|
||||
// set to the VMA covering 'addr'. The va_space_mm must be retained and locked.
|
||||
// Otherwise hmm_vma is set to NULL.
|
||||
// Return values:
|
||||
// NV_ERR_INVALID_ADDRESS addr is not a UVM_VA_RANGE_TYPE_MANAGED va_range nor
|
||||
// a HMM enabled VMA.
|
||||
// NV_ERR_NO_MEMORY memory could not be allocated.
|
||||
NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
struct vm_area_struct **hmm_vma,
|
||||
uvm_va_block_t **out_block);
|
||||
|
||||
// Same as uvm_va_block_find_create except that va_range lookup was already done
|
||||
// by the caller. If the supplied va_range is NULL, this function behaves just
|
||||
// like when the va_range lookup in uvm_va_block_find_create is NULL.
|
||||
// Same as uvm_va_block_find_create except that only managed va_blocks are
|
||||
// created if not already present in the VA range. Does not require va_space_mm
|
||||
// to be locked or retained.
|
||||
NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_block_t **out_block);
|
||||
|
||||
// Same as uvm_va_block_find_create_managed except that va_range lookup was
|
||||
// already done by the caller. The supplied va_range must not be NULL.
|
||||
NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
|
||||
uvm_va_range_t *va_range,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_t **out_block);
|
||||
|
||||
// Same as uvm_va_block_find_create except that only managed va_blocks are
|
||||
// created if not already present in the VA range.
|
||||
static NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_block_t **out_block)
|
||||
{
|
||||
return uvm_va_block_find_create(va_space, addr, NULL, out_block);
|
||||
}
|
||||
|
||||
// Look up a chunk backing a specific address within the VA block.
|
||||
// Returns NULL if none.
|
||||
uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu_t *gpu, NvU64 address);
|
||||
@@ -1476,10 +1458,10 @@ uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu
|
||||
// The caller needs to handle allocation-retry. va_block_retry can be NULL if
|
||||
// the destination is the CPU.
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
|
||||
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// va_block_context must not be NULL and policy for the region must match. See
|
||||
// the comments for uvm_va_block_check_policy_is_valid(). If va_block is a HMM
|
||||
// block, va_block_context->hmm.vma must be valid. See the comments for
|
||||
// uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock. If va_block_context->mm !=
|
||||
// NULL, va_block_context->mm->mmap_lock must be held in at least
|
||||
@@ -1497,7 +1479,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
// The [dst, dst + size) range has to fit within a single PAGE_SIZE page.
|
||||
//
|
||||
// va_block_context must not be NULL. The caller is not required to set
|
||||
// va_block_context->policy or va_block_context->hmm.vma.
|
||||
// va_block_context->hmm.vma.
|
||||
//
|
||||
// The caller needs to support allocation-retry of page tables.
|
||||
//
|
||||
@@ -1569,7 +1551,7 @@ void uvm_va_block_mark_cpu_dirty(uvm_va_block_t *va_block);
|
||||
// successful, NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case the
|
||||
// block's lock was unlocked and relocked.
|
||||
//
|
||||
// va_block_context must not be NULL. The va_block_context->policy is unused.
|
||||
// va_block_context must not be NULL.
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock.
|
||||
NV_STATUS uvm_va_block_set_cancel(uvm_va_block_t *va_block, uvm_va_block_context_t *block_context, uvm_gpu_t *gpu);
|
||||
@@ -1650,12 +1632,18 @@ static uvm_va_block_region_t uvm_va_block_region_from_block(uvm_va_block_t *va_b
|
||||
return uvm_va_block_region(0, uvm_va_block_num_cpu_pages(va_block));
|
||||
}
|
||||
|
||||
// Create a block region from a va block and page mask. Note that the region
|
||||
// Create a block region from a va block and page mask. If va_block is NULL, the
|
||||
// region is assumed to cover the maximum va_block size. Note that the region
|
||||
// covers the first through the last set bit and may have unset bits in between.
|
||||
static uvm_va_block_region_t uvm_va_block_region_from_mask(uvm_va_block_t *va_block, const uvm_page_mask_t *page_mask)
|
||||
{
|
||||
uvm_va_block_region_t region;
|
||||
uvm_page_index_t outer = uvm_va_block_num_cpu_pages(va_block);
|
||||
uvm_page_index_t outer;
|
||||
|
||||
if (va_block)
|
||||
outer = uvm_va_block_num_cpu_pages(va_block);
|
||||
else
|
||||
outer = PAGES_PER_UVM_VA_BLOCK;
|
||||
|
||||
region.first = find_first_bit(page_mask->bitmap, outer);
|
||||
if (region.first >= outer) {
|
||||
@@ -2140,15 +2128,14 @@ uvm_va_block_region_t uvm_va_block_big_page_region_subset(uvm_va_block_t *va_blo
|
||||
// MAX_BIG_PAGES_PER_UVM_VA_BLOCK is returned.
|
||||
size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t page_index, NvU32 big_page_size);
|
||||
|
||||
// Returns the new residency for a page that faulted or triggered access
|
||||
// counter notifications. The read_duplicate output parameter indicates if the
|
||||
// page meets the requirements to be read-duplicated
|
||||
// va_block_context must not be NULL, va_block_context->policy must be valid,
|
||||
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
|
||||
// which also means the va_block_context->mm is not NULL, retained, and locked
|
||||
// for at least read. See the comments for uvm_va_block_check_policy_is_valid()
|
||||
// and uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// Locking: the va_block lock must be held.
|
||||
// Returns the new residency for a page that faulted or triggered access counter
|
||||
// notifications. The read_duplicate output parameter indicates if the page
|
||||
// meets the requirements to be read-duplicated va_block_context must not be
|
||||
// NULL, and if the va_block is a HMM block, va_block_context->hmm.vma must be
|
||||
// valid which also means the va_block_context->mm is not NULL, retained, and
|
||||
// locked for at least read. See the comments for
|
||||
// uvm_va_block_check_policy_is_valid() and uvm_hmm_check_context_vma_is_valid()
|
||||
// in uvm_hmm.h. Locking: the va_block lock must be held.
|
||||
uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_index_t page_index,
|
||||
|
||||
@@ -29,9 +29,7 @@
|
||||
#include "uvm_tlb_batch.h"
|
||||
#include "uvm_forward_decl.h"
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
#include <linux/migrate.h>
|
||||
#endif
|
||||
|
||||
// UVM_VA_BLOCK_BITS is 21, meaning the maximum block size is 2MB. Rationale:
|
||||
// - 2MB matches the largest Pascal GPU page size so it's a natural fit
|
||||
@@ -234,9 +232,6 @@ typedef struct
|
||||
// the mm, such as creating CPU mappings.
|
||||
struct mm_struct *mm;
|
||||
|
||||
const uvm_va_policy_t *policy;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
struct
|
||||
{
|
||||
// These are used for migrate_vma_*(), hmm_range_fault(), and
|
||||
@@ -257,10 +252,11 @@ typedef struct
|
||||
// Cached VMA pointer. This is only valid while holding the mmap_lock.
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
// Used for migrate_vma_*() to migrate pages to/from GPU/CPU.
|
||||
struct migrate_vma migrate_vma_args;
|
||||
} hmm;
|
||||
#endif
|
||||
} hmm;
|
||||
|
||||
// Convenience buffer for page mask prints
|
||||
char page_mask_string_buffer[UVM_PAGE_MASK_PRINT_MIN_BUFFER_SIZE];
|
||||
|
||||
@@ -54,6 +54,52 @@ const uvm_va_policy_t *uvm_va_policy_get(uvm_va_block_t *va_block, NvU64 addr)
|
||||
}
|
||||
}
|
||||
|
||||
// HMM va_blocks can have different polices for different regions withing the
|
||||
// va_block. This function checks the given region is covered by the same policy
|
||||
// and asserts if the region is covered by different policies.
|
||||
// This always returns true and is intended to only be used with UVM_ASSERT() to
|
||||
// avoid calling it on release builds.
|
||||
// Locking: the va_block lock must be held.
|
||||
static bool uvm_hmm_va_block_assert_policy_is_valid(uvm_va_block_t *va_block,
|
||||
const uvm_va_policy_t *policy,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
const uvm_va_policy_node_t *node;
|
||||
|
||||
if (uvm_va_policy_is_default(policy)) {
|
||||
// There should only be the default policy within the region.
|
||||
node = uvm_va_policy_node_iter_first(va_block,
|
||||
uvm_va_block_region_start(va_block, region),
|
||||
uvm_va_block_region_end(va_block, region));
|
||||
UVM_ASSERT(!node);
|
||||
}
|
||||
else {
|
||||
// The policy node should cover the region.
|
||||
node = uvm_va_policy_node_from_policy(policy);
|
||||
UVM_ASSERT(node->node.start <= uvm_va_block_region_start(va_block, region));
|
||||
UVM_ASSERT(node->node.end >= uvm_va_block_region_end(va_block, region));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const uvm_va_policy_t *uvm_va_policy_get_region(uvm_va_block_t *va_block, uvm_va_block_region_t region)
|
||||
{
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
const uvm_va_policy_t *policy;
|
||||
const uvm_va_policy_node_t *node = uvm_va_policy_node_find(va_block, uvm_va_block_region_start(va_block, region));
|
||||
|
||||
policy = node ? &node->policy : &uvm_va_policy_default;
|
||||
UVM_ASSERT(uvm_hmm_va_block_assert_policy_is_valid(va_block, policy, region));
|
||||
return policy;
|
||||
}
|
||||
else {
|
||||
return uvm_va_range_get_policy(va_block->va_range);
|
||||
}
|
||||
}
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
static struct kmem_cache *g_uvm_va_policy_node_cache __read_mostly;
|
||||
|
||||
@@ -100,6 +100,9 @@ bool uvm_va_policy_is_read_duplicate(const uvm_va_policy_t *policy, uvm_va_space
|
||||
// Locking: The va_block lock must be held.
|
||||
const uvm_va_policy_t *uvm_va_policy_get(uvm_va_block_t *va_block, NvU64 addr);
|
||||
|
||||
// Same as above but asserts the policy covers the whole region
|
||||
const uvm_va_policy_t *uvm_va_policy_get_region(uvm_va_block_t *va_block, uvm_va_block_region_t region);
|
||||
|
||||
// Return a uvm_va_policy_node_t given a uvm_va_policy_t pointer.
|
||||
static const uvm_va_policy_node_t *uvm_va_policy_node_from_policy(const uvm_va_policy_t *policy)
|
||||
{
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -376,7 +376,7 @@ NV_STATUS uvm_va_range_create_semaphore_pool(uvm_va_space_t *va_space,
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
if (i == 0 && g_uvm_global.sev_enabled)
|
||||
if (i == 0 && g_uvm_global.conf_computing_enabled)
|
||||
mem_alloc_params.dma_owner = gpu;
|
||||
|
||||
if (attrs.is_cacheable) {
|
||||
@@ -608,7 +608,6 @@ static NV_STATUS va_range_add_gpu_va_space_managed(uvm_va_range_t *va_range,
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, mm);
|
||||
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_range);
|
||||
|
||||
// TODO: Bug 2090378. Consolidate all per-VA block operations within
|
||||
// uvm_va_block_add_gpu_va_space so we only need to take the VA block
|
||||
@@ -687,7 +686,6 @@ static void va_range_remove_gpu_va_space_managed(uvm_va_range_t *va_range,
|
||||
bool should_enable_read_duplicate;
|
||||
uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, mm);
|
||||
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_range);
|
||||
should_enable_read_duplicate =
|
||||
uvm_va_range_get_policy(va_range)->read_duplication == UVM_READ_DUPLICATION_ENABLED &&
|
||||
uvm_va_space_can_read_duplicate(va_space, NULL) != uvm_va_space_can_read_duplicate(va_space, gpu_va_space->gpu);
|
||||
@@ -769,7 +767,6 @@ static NV_STATUS uvm_va_range_enable_peer_managed(uvm_va_range_t *va_range, uvm_
|
||||
uvm_va_space_t *va_space = va_range->va_space;
|
||||
uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, NULL);
|
||||
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_range);
|
||||
|
||||
for_each_va_block_in_va_range(va_range, va_block) {
|
||||
// TODO: Bug 1767224: Refactor the uvm_va_block_set_accessed_by logic
|
||||
@@ -1322,7 +1319,6 @@ static NV_STATUS range_unmap_mask(uvm_va_range_t *va_range,
|
||||
if (uvm_processor_mask_empty(mask))
|
||||
return NV_OK;
|
||||
|
||||
block_context->policy = uvm_va_range_get_policy(va_range);
|
||||
|
||||
for_each_va_block_in_va_range(va_range, block) {
|
||||
NV_STATUS status;
|
||||
@@ -1364,7 +1360,6 @@ static NV_STATUS range_map_uvm_lite_gpus(uvm_va_range_t *va_range, uvm_tracker_t
|
||||
if (uvm_processor_mask_empty(&va_range->uvm_lite_gpus))
|
||||
return NV_OK;
|
||||
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_range);
|
||||
|
||||
for_each_va_block_in_va_range(va_range, va_block) {
|
||||
// UVM-Lite GPUs always map with RWA
|
||||
@@ -1528,7 +1523,6 @@ NV_STATUS uvm_va_range_set_preferred_location(uvm_va_range_t *va_range,
|
||||
uvm_processor_mask_copy(&va_range->uvm_lite_gpus, &new_uvm_lite_gpus);
|
||||
|
||||
va_block_context = uvm_va_space_block_context(va_space, mm);
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_range);
|
||||
|
||||
for_each_va_block_in_va_range(va_range, va_block) {
|
||||
uvm_processor_id_t id;
|
||||
@@ -1610,7 +1604,6 @@ NV_STATUS uvm_va_range_set_accessed_by(uvm_va_range_t *va_range,
|
||||
|
||||
uvm_processor_mask_copy(&va_range->uvm_lite_gpus, &new_uvm_lite_gpus);
|
||||
va_block_context = uvm_va_space_block_context(va_space, mm);
|
||||
va_block_context->policy = policy;
|
||||
|
||||
for_each_va_block_in_va_range(va_range, va_block) {
|
||||
status = uvm_va_block_set_accessed_by(va_block, va_block_context, processor_id);
|
||||
@@ -1657,7 +1650,6 @@ NV_STATUS uvm_va_range_set_read_duplication(uvm_va_range_t *va_range, struct mm_
|
||||
return NV_OK;
|
||||
|
||||
va_block_context = uvm_va_space_block_context(va_range->va_space, mm);
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_range);
|
||||
|
||||
for_each_va_block_in_va_range(va_range, va_block) {
|
||||
NV_STATUS status = uvm_va_block_set_read_duplication(va_block, va_block_context);
|
||||
@@ -1679,7 +1671,6 @@ NV_STATUS uvm_va_range_unset_read_duplication(uvm_va_range_t *va_range, struct m
|
||||
return NV_OK;
|
||||
|
||||
va_block_context = uvm_va_space_block_context(va_range->va_space, mm);
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_range);
|
||||
|
||||
for_each_va_block_in_va_range(va_range, va_block) {
|
||||
status = uvm_va_block_unset_read_duplication(va_block, va_block_context);
|
||||
@@ -1816,7 +1807,7 @@ NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params,
|
||||
if (params->gpuAttributesCount > UVM_MAX_GPUS)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
if (g_uvm_global.sev_enabled && params->gpuAttributesCount == 0)
|
||||
if (g_uvm_global.conf_computing_enabled && params->gpuAttributesCount == 0)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
// The mm needs to be locked in order to remove stale HMM va_blocks.
|
||||
|
||||
@@ -242,9 +242,7 @@ NV_STATUS uvm_va_space_create(struct address_space *mapping, uvm_va_space_t **va
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
status = uvm_hmm_va_space_initialize(va_space);
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
uvm_hmm_va_space_initialize(va_space);
|
||||
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_up_write_mmap_lock(current->mm);
|
||||
@@ -2226,11 +2224,12 @@ static vm_fault_t uvm_va_space_cpu_fault(uvm_va_space_t *va_space,
|
||||
// address with mremap() so create a new va_block if needed.
|
||||
status = uvm_hmm_va_block_find_create(va_space,
|
||||
fault_addr,
|
||||
&service_context->block_context,
|
||||
&service_context->block_context.hmm.vma,
|
||||
&va_block);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
UVM_ASSERT(service_context->block_context.hmm.vma == vma);
|
||||
status = uvm_hmm_migrate_begin(va_block);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
@@ -274,6 +274,22 @@ NV_STATUS uvm_va_space_mm_register(uvm_va_space_t *va_space)
|
||||
}
|
||||
}
|
||||
|
||||
if ((UVM_IS_CONFIG_HMM() || UVM_ATS_PREFETCH_SUPPORTED()) && uvm_va_space_pageable_mem_access_supported(va_space)) {
|
||||
#if UVM_CAN_USE_MMU_NOTIFIERS()
|
||||
// Initialize MMU interval notifiers for this process. This allows
|
||||
// mmu_interval_notifier_insert() to be called without holding the
|
||||
// mmap_lock for write.
|
||||
// Note: there is no __mmu_notifier_unregister(), this call just
|
||||
// allocates memory which is attached to the mm_struct and freed
|
||||
// when the mm_struct is freed.
|
||||
ret = __mmu_notifier_register(NULL, current->mm);
|
||||
if (ret)
|
||||
return errno_to_nv_status(ret);
|
||||
#else
|
||||
UVM_ASSERT(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
|
||||
33
kernel-open/nvidia/detect-self-hosted.h
Normal file
33
kernel-open/nvidia/detect-self-hosted.h
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __DETECT_SELF_HOSTED_H__
|
||||
#define __DETECT_SELF_HOSTED_H__
|
||||
|
||||
// PCI devIds 0x2340-0x237f are for Self-Hosted Hopper
|
||||
static inline int pci_devid_is_self_hosted(unsigned short devid)
|
||||
{
|
||||
return devid >= 0x2340 && devid <= 0x237f;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -96,6 +96,8 @@
|
||||
|
||||
#include "conftest/patches.h"
|
||||
|
||||
#include "detect-self-hosted.h"
|
||||
|
||||
#define RM_THRESHOLD_TOTAL_IRQ_COUNT 100000
|
||||
#define RM_THRESHOLD_UNAHNDLED_IRQ_COUNT 99900
|
||||
#define RM_UNHANDLED_TIMEOUT_US 100000
|
||||
|
||||
@@ -209,7 +209,7 @@ NV_STATUS nvUvmInterfaceSessionCreate(uvmGpuSessionHandle *session,
|
||||
memset(platformInfo, 0, sizeof(*platformInfo));
|
||||
platformInfo->atsSupported = nv_ats_supported;
|
||||
|
||||
platformInfo->sevEnabled = os_cc_enabled;
|
||||
platformInfo->confComputingEnabled = os_cc_enabled;
|
||||
|
||||
status = rm_gpu_ops_create_session(sp, (gpuSessionHandle *)session);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user