530.30.02

This commit is contained in:
Andy Ritger
2023-02-28 11:12:44 -08:00
parent e598191e8e
commit 4397463e73
928 changed files with 124728 additions and 88525 deletions

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -43,6 +43,7 @@
#include "nv-kthread-q.h"
#include <linux/mmu_notifier.h>
#include <linux/wait.h>
// VA blocks are the leaf nodes in the uvm_va_space tree for managed allocations
// (VA ranges with type == UVM_VA_RANGE_TYPE_MANAGED):
@@ -328,7 +329,7 @@ struct uvm_va_block_struct
// allocations. See uvm_pmm_sysmem.h for more details on CPU chunks.
// This member is meant to hold an opaque value indicating the CPU
// chunk storage method. For more details on CPU chunk storage,
// see uvm_cpu_chunk_storage_type_t in uvm_pmm_sysmem.c.
// see uvm_cpu_chunk_storage_type_t in uvm_va_block.c.
unsigned long chunks;
// Per-page allocation bit vector.
@@ -352,12 +353,18 @@ struct uvm_va_block_struct
// UVM_PTE_BITS_CPU_WRITE is not set, the mapping is read-only.
// Otherwise, the mapping is read-write.
//
// Note that this is the maximum permissions a PTE could have, but not
// necessarily the actual current permissions of the CPU PTEs. The UVM
// driver will never change the PTEs without updating this state, but
// the kernel can downgrade our CPU mappings at any time without
// notifying the UVM driver (for example in response to user space
// calling madvise with MADV_DONTNEED).
// For managed allocations, this is the maximum permissions a PTE
// could have, but not necessarily the actual current permissions of the
// CPU PTEs. The UVM driver will never change the PTEs without updating
// this state, but the kernel can downgrade our CPU mappings at any time
// without notifying the UVM driver (for example in response to user
// space calling madvise with MADV_DONTNEED).
//
// For HMM allocations, this is the minimum permission the CPU has since
// Linux can upgrade a read-only PTE to read-write without notifying
// the UVM driver. This is why read duplication isn't currently
// supported.
// TODO: Bug 3660922: Need to handle read duplication at some point.
uvm_page_mask_t pte_bits[UVM_PTE_BITS_CPU_MAX];
// Whether the CPU has ever mapped a page on this VA block. This is
@@ -452,6 +459,19 @@ struct uvm_va_block_struct
// The MMU notifier is registered per va_block.
struct mmu_interval_notifier notifier;
// Wait queue for GPU atomic operations to system memory.
struct wait_queue_head atomic_waitq;
// Mask of pages being migrated to system memory for GPU atomic access.
// It is used so other threads don't try to migrate those pages while
// make_device_exclusive_range() is called without holding the va_block
// lock.
uvm_page_mask_t atomic_busy;
// Sequence number to tell if any changes were made to the va_block
// while not holding the block lock and calling hmm_range_fault().
unsigned long changed;
// Parent VA space pointer. It is NULL for managed blocks or if
// the HMM block is dead. This field can be read while holding the
// block lock and is only modified while holding the va_space write
@@ -522,7 +542,7 @@ struct uvm_va_block_wrapper_struct
};
// Tracking needed for supporting allocation-retry of user GPU memory
typedef struct
struct uvm_va_block_retry_struct
{
// A tracker used for all allocations from PMM.
uvm_tracker_t tracker;
@@ -537,7 +557,7 @@ typedef struct
// can contain chunks from multiple GPUs. All the used chunks are unpinned
// when the operation is finished with uvm_va_block_retry_deinit().
struct list_head used_chunks;
} uvm_va_block_retry_t;
};
// Module load/exit
NV_STATUS uvm_va_block_init(void);
@@ -635,6 +655,12 @@ uvm_va_space_t *uvm_va_block_get_va_space_maybe_dead(uvm_va_block_t *va_block);
// is held in write mode.
uvm_va_space_t *uvm_va_block_get_va_space(uvm_va_block_t *va_block);
// Return true if the VA space has access counter migrations enabled and should
// remote map pages evicted to system memory. This is OK since access counters
// can pull the data back to vidmem if sufficient accesses trigger a migration.
// The caller must ensure that the VA space cannot go away.
bool uvm_va_space_map_remote_on_eviction(uvm_va_space_t *va_space);
// Dynamic cache-based allocation for uvm_va_block_context_t.
//
// See uvm_va_block_context_init() for a description of the mm parameter.
@@ -663,7 +689,7 @@ static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context,
// This always returns true and is intended to only be used with UVM_ASSERT().
// Locking: the va_block lock must be held.
bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
uvm_va_policy_t *policy,
const uvm_va_policy_t *policy,
uvm_va_block_region_t region);
// TODO: Bug 1766480: Using only page masks instead of a combination of regions
@@ -697,7 +723,7 @@ bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
//
// Allocation-retry: this operation may need to perform eviction to be able to
// allocate GPU memory successfully and if that happens,
// NV_WARN_MORE_PROCESSING_REQUIRED will be returned. That also means that the
// NV_ERR_MORE_PROCESSING_REQUIRED will be returned. That also means that the
// block's lock has been unlocked and relocked as part of the call and that the
// whole sequence of operations performed under the block's lock needs to be
// attempted again. To facilitate that, the caller needs to provide the same
@@ -707,14 +733,15 @@ bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
// caller.
//
// If dest_id is the CPU then va_block_retry can be NULL and allocation-retry of
// user memory is guaranteed not to happen. Allocation-retry of page tables can
// still occur though.
// user memory is guaranteed not to happen. Allocation-retry of GPU page tables
// can still occur though.
//
// va_block_context must not be NULL. This function will set a bit in
// va_block_context->make_resident.pages_changed_residency for each page that
// changed residency (due to a migration or first population) as a result of the
// operation. This function only sets bits in that mask. It is the caller's
// responsiblity to zero the mask or not first.
// operation and va_block_context->make_resident.all_involved_processors for
// each processor involved in the copy. This function only sets bits in those
// masks. It is the caller's responsiblity to zero the masks or not first.
//
// va_block_context->policy must also be set by the caller for the given region.
// See the comments for uvm_va_block_check_policy_is_valid().
@@ -723,6 +750,8 @@ bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
// have been unlocked and relocked.
//
// LOCKING: The caller must hold the va_block lock.
// If va_block_context->mm != NULL, va_block_context->mm->mmap_lock must be
// held in at least read mode.
NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
uvm_va_block_retry_t *va_block_retry,
uvm_va_block_context_t *va_block_context,
@@ -743,8 +772,6 @@ NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
// are broken
// - Only managed va_blocks are supported.
// TODO: Bug 3660922: need to implement HMM read duplication support.
// - LOCKING: If va_block_context->mm != NULL, va_block_context->mm->mmap_lock
// must be held in at least read mode.
NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
uvm_va_block_retry_t *va_block_retry,
uvm_va_block_context_t *va_block_context,
@@ -756,31 +783,35 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
// Similar to uvm_va_block_make_resident() (read documentation there). The
// difference is that source pages are only copied to the destination and the
// residency is not updated until uvm_va_block_make_resident_post() is called.
// Otherwise, the combination of uvm_va_block_make_resident_pre() and
// uvm_va_block_make_resident_post() should be the same as just calling
// uvm_va_block_make_resident().
// residency is not updated until uvm_va_block_make_resident_finish() is called.
// Otherwise, the combination of uvm_va_block_make_resident_copy() and
// uvm_va_block_make_resident_finish() is the same as just calling
// uvm_va_block_make_resident(). Note, however, that the va_block lock must be
// held across the two calls for the operation to be complete. The va_block
// lock can be dropped after calling uvm_va_block_make_resident_copy() but
// uvm_va_block_make_resident_copy() must be called again after relocking the
// va_block lock and before calling uvm_va_block_make_resident_finish().
// This split is needed when using migrate_vma_setup() and migrate_vma_pages()
// so that when migrate_vma_pages() indicates a page is not migrating, the
// va_block state is not updated.
// LOCKING: The caller must hold the va_block lock.
NV_STATUS uvm_va_block_make_resident_pre(uvm_va_block_t *va_block,
uvm_va_block_retry_t *va_block_retry,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t dest_id,
uvm_va_block_region_t region,
const uvm_page_mask_t *page_mask,
const uvm_page_mask_t *prefetch_page_mask,
uvm_make_resident_cause_t cause);
NV_STATUS uvm_va_block_make_resident_copy(uvm_va_block_t *va_block,
uvm_va_block_retry_t *va_block_retry,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t dest_id,
uvm_va_block_region_t region,
const uvm_page_mask_t *page_mask,
const uvm_page_mask_t *prefetch_page_mask,
uvm_make_resident_cause_t cause);
// The page_mask must be the same or a subset of the page_mask passed to
// uvm_va_block_make_resident_pre(). This step updates the residency and breaks
// uvm_va_block_make_resident_copy(). This step updates the residency and breaks
// read duplication.
// LOCKING: The caller must hold the va_block lock.
void uvm_va_block_make_resident_post(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region,
const uvm_page_mask_t *page_mask);
void uvm_va_block_make_resident_finish(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region,
const uvm_page_mask_t *page_mask);
// Creates or upgrades a mapping from the input processor to the given virtual
// address region. Pages which already have new_prot permissions or higher are
@@ -905,7 +936,8 @@ NV_STATUS uvm_va_block_unmap_mask(uvm_va_block_t *va_block,
//
// LOCKING: The caller must hold the VA block lock.
NV_STATUS uvm_va_block_set_preferred_location_locked(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context);
uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region);
// Maps the given processor to all resident pages in this block, as allowed by
// location and policy. Waits for the operation to complete before returning.
@@ -921,6 +953,22 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t processor_id);
// Maps given processor to all resident pages in this block and region, as
// allowed by location and policy. The caller is responsible for waiting for
// the tracker after all mappings have been started.
// This function can be called with HMM and managed va_blocks.
//
// va_block_context must not be NULL and va_block_context->policy must be valid.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// LOCKING: The caller must hold the va_block lock and
// va_block_context->mm->mmap_lock must be held in at least read mode.
NV_STATUS uvm_va_block_set_accessed_by_locked(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t processor_id,
uvm_va_block_region_t region,
uvm_tracker_t *out_tracker);
// Breaks SetAccessedBy and remote mappings
// This function should only be called with managed va_blocks.
//
@@ -1124,6 +1172,11 @@ void uvm_va_block_unmap_preferred_location_uvm_lite(uvm_va_block_t *va_block, uv
// must hold mm->mmap_lock in at least read mode.
void uvm_va_block_unregister_gpu(uvm_va_block_t *va_block, uvm_gpu_t *gpu, struct mm_struct *mm);
// Same as uvm_va_block_unregister_gpu() but the VA block lock must be held.
// Note that this handles allocation-retry internally and hence might unlock
// and relock block's lock.
void uvm_va_block_unregister_gpu_locked(uvm_va_block_t *va_block, uvm_gpu_t *gpu, struct mm_struct *mm);
// Unmaps all memory associated with the block and drops the ref count of the
// block. This allows the caller to free resources associated with this block
// regardless of the block's current ref count. Most importantly it allows the
@@ -1186,9 +1239,13 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
// Performs any operations necessary to establish a coherent mapping
// (migrations, cache invalidates, etc.) in response to the given service block
// context
// context.
//
// service_context->block_context.policy is set by this function.
// service_context must not be NULL and service_context->block_context.policy
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context->prefetch_hint is set by this function.
//
// Locking:
// - service_context->block_context.mm->mmap_lock must be held in at least
@@ -1197,8 +1254,8 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
// - va_block lock must be held
//
// If allocation-retry was required as part of the operation and was successful,
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock was
// unlocked and relocked.
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock
// was unlocked and relocked.
//
// NV_WARN_MORE_PROCESSING_REQUIRED indicates that thrashing has been detected
// and the performance heuristics logic decided to throttle execution.
@@ -1209,6 +1266,77 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
uvm_va_block_retry_t *block_retry,
uvm_service_block_context_t *service_context);
// Performs population of the destination pages, unmapping and copying source
// pages to new_residency.
//
// service_context must not be NULL and service_context->block_context.policy
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context->prefetch_hint should be set before calling this function.
//
// Locking:
// - service_context->block_context.mm->mmap_lock must be held in at least
// read mode, if valid.
// - va_space lock must be held in at least read mode
// - va_block lock must be held
//
// If allocation-retry was required as part of the operation and was successful,
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock
// was unlocked and relocked.
//
// NV_WARN_MORE_PROCESSING_REQUIRED indicates that thrashing has been detected
// and the performance heuristics logic decided to throttle execution.
// Any other error code different than NV_OK indicates OOM or a global fatal
// error.
NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
uvm_processor_id_t new_residency,
uvm_va_block_t *va_block,
uvm_va_block_retry_t *block_retry,
uvm_service_block_context_t *service_context);
// This updates the va_block residency state and maps the faulting processor_id
// to the new residency (which may be remote).
//
// service_context must not be NULL and service_context->block_context.policy
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context must be initialized by calling uvm_va_block_service_copy()
// before calling this function.
//
// Locking:
// - service_context->block_context.mm->mmap_lock must be held in at least
// read mode, if valid.
// - va_space lock must be held in at least read mode
// - va_block lock must be held
// - the mmap lock and va_space lock must be held across the calls to
// uvm_va_block_service_copy() and this function. If the va_block lock is
// dropped inbetween, special care is needed to check for eviction and
// invalidation callbacks.
//
// If allocation-retry was required as part of the operation and was successful,
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock
// was unlocked and relocked.
//
// NV_WARN_MORE_PROCESSING_REQUIRED indicates that thrashing has been detected
// and the performance heuristics logic decided to throttle execution.
// Any other error code different than NV_OK indicates OOM or a global fatal
// error.
NV_STATUS uvm_va_block_service_finish(uvm_processor_id_t processor_id,
uvm_va_block_t *va_block,
uvm_service_block_context_t *service_context);
// Allocate GPU state for the given va_block and registered GPUs.
// Locking: The block lock must be held.
NV_STATUS uvm_va_block_gpu_state_alloc(uvm_va_block_t *va_block);
// Release any GPU or policy data associated with the given region in response
// to munmap().
// Locking: The va_block lock must be held.
void uvm_va_block_munmap_region(uvm_va_block_t *va_block,
uvm_va_block_region_t region);
// Size of the block in bytes. Guaranteed to be a page-aligned value between
// PAGE_SIZE and UVM_VA_BLOCK_SIZE.
static inline NvU64 uvm_va_block_size(uvm_va_block_t *block)
@@ -1275,7 +1403,7 @@ const uvm_page_mask_t *uvm_va_block_map_mask_get(uvm_va_block_t *block, uvm_proc
// VA block lookup functions. There are a number of permutations which might be
// useful, such as looking up the block from {va_space, va_range} x {addr,
// block index}. The ones implemented here and in uvm_va_range.h support the
// primary two use cases, which are:
// primary three use cases, which are:
// 1) Iterating over all VA blocks in a VA range. This uses block indices on the
// VA range:
// uvm_va_range_num_blocks
@@ -1286,6 +1414,9 @@ const uvm_page_mask_t *uvm_va_block_map_mask_get(uvm_va_block_t *block, uvm_proc
// VA space and address:
// uvm_va_block_find
// uvm_va_block_find_create
// 3) Operating on a single VA block (fault). This looks up the block using the
// supplied VA range and address:
// uvm_va_block_find_create_in_range
// Finds the UVM or HMM VA block containing addr, if any. The va_space->lock
// must be held in at least read mode. Return values:
@@ -1315,6 +1446,15 @@ NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
uvm_va_block_context_t *va_block_context,
uvm_va_block_t **out_block);
// Same as uvm_va_block_find_create except that va_range lookup was already done
// by the caller. If the supplied va_range is NULL, this function behaves just
// like when the va_range lookup in uvm_va_block_find_create is NULL.
NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
uvm_va_range_t *va_range,
NvU64 addr,
uvm_va_block_context_t *va_block_context,
uvm_va_block_t **out_block);
// Same as uvm_va_block_find_create except that only managed va_blocks are
// created if not already present in the VA range.
static NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
@@ -1324,15 +1464,10 @@ static NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
return uvm_va_block_find_create(va_space, addr, NULL, out_block);
}
// Look up a chunk backing a specific address within the VA block. Returns NULL if none.
// Look up a chunk backing a specific address within the VA block.
// Returns NULL if none.
uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu_t *gpu, NvU64 address);
typedef enum
{
UVM_MIGRATE_MODE_MAKE_RESIDENT,
UVM_MIGRATE_MODE_MAKE_RESIDENT_AND_MAP,
} uvm_migrate_mode_t;
// Implementation of the UvmMigrate() API at the VA block scope.
//
// The out_tracker can be NULL.
@@ -1345,6 +1480,8 @@ typedef enum
//
// va_block_context must not be NULL and va_block_context->policy must be valid.
// See the comments for uvm_va_block_check_policy_is_valid().
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
//
// LOCKING: The caller must hold the va_block lock. If va_block_context->mm !=
// NULL, va_block_context->mm->mmap_lock must be held in at least
@@ -1362,7 +1499,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
// The [dst, dst + size) range has to fit within a single PAGE_SIZE page.
//
// va_block_context must not be NULL. The caller is not required to set
// va_block_context->policy.
// va_block_context->policy or va_block_context->hmm.vma.
//
// The caller needs to support allocation-retry of page tables.
//
@@ -1416,6 +1553,8 @@ static uvm_page_index_t uvm_va_block_cpu_page_index(uvm_va_block_t *va_block, Nv
// Computes the size and index in the gpu_state chunks array of the GPU chunk
// which corresponds to the given page_index of the VA region.
// Note this is only used for testing and does not work on HMM va_blocks as it
// returns incorrect results for those.
size_t uvm_va_block_gpu_chunk_index_range(NvU64 start,
NvU64 size,
uvm_gpu_t *gpu,
@@ -1868,77 +2007,66 @@ static uvm_va_block_region_t uvm_va_block_chunk_region(uvm_va_block_t *block,
// Helpers for page state (permissions, size, residency)
//
// Compute the gpus that have at least the given access permissions for the
// range described by region and page_mask. The function sets the bit if any
// page in the region has the permissions.
void uvm_va_block_region_authorized_gpus(uvm_va_block_t *va_block,
uvm_va_block_region_t region,
uvm_prot_t access_permission,
uvm_processor_mask_t *authorized_gpus);
// Compute the processors that have at least the given access permissions for the
// range described by region and page_mask. The function sets the bit if any
// page in the region has the permissions.
void uvm_va_block_region_authorized_processors(uvm_va_block_t *va_block,
uvm_va_block_region_t region,
uvm_prot_t access_permission,
uvm_processor_mask_t *authorized_processors);
void uvm_va_block_page_authorized_gpus(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
uvm_prot_t access_permission,
uvm_processor_mask_t *authorized_gpus);
void uvm_va_block_page_authorized_processors(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
uvm_prot_t access_permission,
uvm_processor_mask_t *authorized_processors);
bool uvm_va_block_is_gpu_authorized_on_whole_region(uvm_va_block_t *va_block,
uvm_va_block_region_t region,
uvm_gpu_id_t gpu_id,
uvm_prot_t required_prot);
bool uvm_va_block_is_processor_authorized_on_whole_region(uvm_va_block_t *va_block,
uvm_va_block_region_t region,
uvm_processor_id_t processor_id,
uvm_prot_t required_prot);
bool uvm_va_block_page_is_gpu_authorized(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
uvm_gpu_id_t gpu_id,
uvm_prot_t required_prot);
bool uvm_va_block_page_is_processor_authorized(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
uvm_processor_id_t processor_id,
uvm_prot_t required_prot);
// Compute the gpus that have a copy of the given page resident in their memory
void uvm_va_block_page_resident_gpus(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
uvm_processor_mask_t *resident_gpus);
// Compute the processors that have a copy of the given page resident in their memory
// Compute the processors that have a copy of the given page resident in their
// memory.
void uvm_va_block_page_resident_processors(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
uvm_processor_mask_t *resident_processors);
// Count how many processors have a copy of the given page resident in their memory
// Count how many processors have a copy of the given page resident in their
// memory.
NvU32 uvm_va_block_page_resident_processors_count(uvm_va_block_t *va_block, uvm_page_index_t page_index);
// Get the processor with a resident copy of a page closest to the given processor
// Get the processor with a resident copy of a page closest to the given
// processor.
uvm_processor_id_t uvm_va_block_page_get_closest_resident(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
uvm_processor_id_t processor);
uvm_processor_id_t uvm_va_block_page_get_closest_resident_in_mask(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
uvm_processor_id_t processor,
const uvm_processor_mask_t *processor_mask);
// Insert a CPU chunk at the given page_index into the va_block.
// Locking: The va_block lock must be held.
NV_STATUS uvm_cpu_chunk_insert_in_block(uvm_va_block_t *va_block,
uvm_cpu_chunk_t *chunk,
uvm_page_index_t page_index);
// Remove a CPU chunk at the given page_index from the va_block.
// Locking: The va_block lock must be held.
void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block,
uvm_page_index_t page_index);
// Return the CPU chunk at the given page_index from the va_block.
// Locking: The va_block lock must be held.
uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page(uvm_va_block_t *va_block,
uvm_page_index_t page_index);
// Return the CPU chunk at the given page_index from the va_block.
// Locking: The va_block lock must be held.
struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block,
uvm_page_index_t page_index);
// Physically map a CPU chunk so it is DMA'able from all registered GPUs.
// Locking: The va_block lock must be held.
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
uvm_page_index_t page_index);
// Physically unmap a CPU chunk from all registered GPUs.
// Locking: The va_block lock must be held.
void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
uvm_cpu_chunk_t *chunk,
uvm_page_index_t page_index);
// Remove any CPU chunks in the given region.
// Locking: The va_block lock must be held.
void uvm_va_block_remove_cpu_chunks(uvm_va_block_t *va_block, uvm_va_block_region_t region);
// Get CPU page size or 0 if it is not mapped
NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block, uvm_page_index_t page_index);
NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block,
uvm_page_index_t page_index);
// Get GPU page size or 0 if it is not mapped on the given GPU
NvU32 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index);
@@ -1999,14 +2127,14 @@ size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t pa
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
// which also means the va_block_context->mm is not NULL, retained, and locked
// for at least read. See the comments for uvm_va_block_check_policy_is_valid()
// and uvm_hmm_va_block_context_vma_is_valid() in uvm_hmm.h.
// and uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// Locking: the va_block lock must be held.
uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_index_t page_index,
uvm_processor_id_t processor_id,
NvU32 access_type_mask,
uvm_va_policy_t *policy,
const uvm_va_policy_t *policy,
const uvm_perf_thrashing_hint_t *thrashing_hint,
uvm_service_operation_t operation,
bool *read_duplicate);