mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-05-04 05:01:55 +00:00
530.30.02
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -43,6 +43,7 @@
|
||||
#include "nv-kthread-q.h"
|
||||
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
// VA blocks are the leaf nodes in the uvm_va_space tree for managed allocations
|
||||
// (VA ranges with type == UVM_VA_RANGE_TYPE_MANAGED):
|
||||
@@ -328,7 +329,7 @@ struct uvm_va_block_struct
|
||||
// allocations. See uvm_pmm_sysmem.h for more details on CPU chunks.
|
||||
// This member is meant to hold an opaque value indicating the CPU
|
||||
// chunk storage method. For more details on CPU chunk storage,
|
||||
// see uvm_cpu_chunk_storage_type_t in uvm_pmm_sysmem.c.
|
||||
// see uvm_cpu_chunk_storage_type_t in uvm_va_block.c.
|
||||
unsigned long chunks;
|
||||
|
||||
// Per-page allocation bit vector.
|
||||
@@ -352,12 +353,18 @@ struct uvm_va_block_struct
|
||||
// UVM_PTE_BITS_CPU_WRITE is not set, the mapping is read-only.
|
||||
// Otherwise, the mapping is read-write.
|
||||
//
|
||||
// Note that this is the maximum permissions a PTE could have, but not
|
||||
// necessarily the actual current permissions of the CPU PTEs. The UVM
|
||||
// driver will never change the PTEs without updating this state, but
|
||||
// the kernel can downgrade our CPU mappings at any time without
|
||||
// notifying the UVM driver (for example in response to user space
|
||||
// calling madvise with MADV_DONTNEED).
|
||||
// For managed allocations, this is the maximum permissions a PTE
|
||||
// could have, but not necessarily the actual current permissions of the
|
||||
// CPU PTEs. The UVM driver will never change the PTEs without updating
|
||||
// this state, but the kernel can downgrade our CPU mappings at any time
|
||||
// without notifying the UVM driver (for example in response to user
|
||||
// space calling madvise with MADV_DONTNEED).
|
||||
//
|
||||
// For HMM allocations, this is the minimum permission the CPU has since
|
||||
// Linux can upgrade a read-only PTE to read-write without notifying
|
||||
// the UVM driver. This is why read duplication isn't currently
|
||||
// supported.
|
||||
// TODO: Bug 3660922: Need to handle read duplication at some point.
|
||||
uvm_page_mask_t pte_bits[UVM_PTE_BITS_CPU_MAX];
|
||||
|
||||
// Whether the CPU has ever mapped a page on this VA block. This is
|
||||
@@ -452,6 +459,19 @@ struct uvm_va_block_struct
|
||||
// The MMU notifier is registered per va_block.
|
||||
struct mmu_interval_notifier notifier;
|
||||
|
||||
// Wait queue for GPU atomic operations to system memory.
|
||||
struct wait_queue_head atomic_waitq;
|
||||
|
||||
// Mask of pages being migrated to system memory for GPU atomic access.
|
||||
// It is used so other threads don't try to migrate those pages while
|
||||
// make_device_exclusive_range() is called without holding the va_block
|
||||
// lock.
|
||||
uvm_page_mask_t atomic_busy;
|
||||
|
||||
// Sequence number to tell if any changes were made to the va_block
|
||||
// while not holding the block lock and calling hmm_range_fault().
|
||||
unsigned long changed;
|
||||
|
||||
// Parent VA space pointer. It is NULL for managed blocks or if
|
||||
// the HMM block is dead. This field can be read while holding the
|
||||
// block lock and is only modified while holding the va_space write
|
||||
@@ -522,7 +542,7 @@ struct uvm_va_block_wrapper_struct
|
||||
};
|
||||
|
||||
// Tracking needed for supporting allocation-retry of user GPU memory
|
||||
typedef struct
|
||||
struct uvm_va_block_retry_struct
|
||||
{
|
||||
// A tracker used for all allocations from PMM.
|
||||
uvm_tracker_t tracker;
|
||||
@@ -537,7 +557,7 @@ typedef struct
|
||||
// can contain chunks from multiple GPUs. All the used chunks are unpinned
|
||||
// when the operation is finished with uvm_va_block_retry_deinit().
|
||||
struct list_head used_chunks;
|
||||
} uvm_va_block_retry_t;
|
||||
};
|
||||
|
||||
// Module load/exit
|
||||
NV_STATUS uvm_va_block_init(void);
|
||||
@@ -635,6 +655,12 @@ uvm_va_space_t *uvm_va_block_get_va_space_maybe_dead(uvm_va_block_t *va_block);
|
||||
// is held in write mode.
|
||||
uvm_va_space_t *uvm_va_block_get_va_space(uvm_va_block_t *va_block);
|
||||
|
||||
// Return true if the VA space has access counter migrations enabled and should
|
||||
// remote map pages evicted to system memory. This is OK since access counters
|
||||
// can pull the data back to vidmem if sufficient accesses trigger a migration.
|
||||
// The caller must ensure that the VA space cannot go away.
|
||||
bool uvm_va_space_map_remote_on_eviction(uvm_va_space_t *va_space);
|
||||
|
||||
// Dynamic cache-based allocation for uvm_va_block_context_t.
|
||||
//
|
||||
// See uvm_va_block_context_init() for a description of the mm parameter.
|
||||
@@ -663,7 +689,7 @@ static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context,
|
||||
// This always returns true and is intended to only be used with UVM_ASSERT().
|
||||
// Locking: the va_block lock must be held.
|
||||
bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_policy_t *policy,
|
||||
const uvm_va_policy_t *policy,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// TODO: Bug 1766480: Using only page masks instead of a combination of regions
|
||||
@@ -697,7 +723,7 @@ bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
|
||||
//
|
||||
// Allocation-retry: this operation may need to perform eviction to be able to
|
||||
// allocate GPU memory successfully and if that happens,
|
||||
// NV_WARN_MORE_PROCESSING_REQUIRED will be returned. That also means that the
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED will be returned. That also means that the
|
||||
// block's lock has been unlocked and relocked as part of the call and that the
|
||||
// whole sequence of operations performed under the block's lock needs to be
|
||||
// attempted again. To facilitate that, the caller needs to provide the same
|
||||
@@ -707,14 +733,15 @@ bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
|
||||
// caller.
|
||||
//
|
||||
// If dest_id is the CPU then va_block_retry can be NULL and allocation-retry of
|
||||
// user memory is guaranteed not to happen. Allocation-retry of page tables can
|
||||
// still occur though.
|
||||
// user memory is guaranteed not to happen. Allocation-retry of GPU page tables
|
||||
// can still occur though.
|
||||
//
|
||||
// va_block_context must not be NULL. This function will set a bit in
|
||||
// va_block_context->make_resident.pages_changed_residency for each page that
|
||||
// changed residency (due to a migration or first population) as a result of the
|
||||
// operation. This function only sets bits in that mask. It is the caller's
|
||||
// responsiblity to zero the mask or not first.
|
||||
// operation and va_block_context->make_resident.all_involved_processors for
|
||||
// each processor involved in the copy. This function only sets bits in those
|
||||
// masks. It is the caller's responsiblity to zero the masks or not first.
|
||||
//
|
||||
// va_block_context->policy must also be set by the caller for the given region.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
@@ -723,6 +750,8 @@ bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
|
||||
// have been unlocked and relocked.
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock.
|
||||
// If va_block_context->mm != NULL, va_block_context->mm->mmap_lock must be
|
||||
// held in at least read mode.
|
||||
NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
@@ -743,8 +772,6 @@ NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
|
||||
// are broken
|
||||
// - Only managed va_blocks are supported.
|
||||
// TODO: Bug 3660922: need to implement HMM read duplication support.
|
||||
// - LOCKING: If va_block_context->mm != NULL, va_block_context->mm->mmap_lock
|
||||
// must be held in at least read mode.
|
||||
NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
@@ -756,31 +783,35 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
|
||||
// Similar to uvm_va_block_make_resident() (read documentation there). The
|
||||
// difference is that source pages are only copied to the destination and the
|
||||
// residency is not updated until uvm_va_block_make_resident_post() is called.
|
||||
// Otherwise, the combination of uvm_va_block_make_resident_pre() and
|
||||
// uvm_va_block_make_resident_post() should be the same as just calling
|
||||
// uvm_va_block_make_resident().
|
||||
// residency is not updated until uvm_va_block_make_resident_finish() is called.
|
||||
// Otherwise, the combination of uvm_va_block_make_resident_copy() and
|
||||
// uvm_va_block_make_resident_finish() is the same as just calling
|
||||
// uvm_va_block_make_resident(). Note, however, that the va_block lock must be
|
||||
// held across the two calls for the operation to be complete. The va_block
|
||||
// lock can be dropped after calling uvm_va_block_make_resident_copy() but
|
||||
// uvm_va_block_make_resident_copy() must be called again after relocking the
|
||||
// va_block lock and before calling uvm_va_block_make_resident_finish().
|
||||
// This split is needed when using migrate_vma_setup() and migrate_vma_pages()
|
||||
// so that when migrate_vma_pages() indicates a page is not migrating, the
|
||||
// va_block state is not updated.
|
||||
// LOCKING: The caller must hold the va_block lock.
|
||||
NV_STATUS uvm_va_block_make_resident_pre(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *page_mask,
|
||||
const uvm_page_mask_t *prefetch_page_mask,
|
||||
uvm_make_resident_cause_t cause);
|
||||
NV_STATUS uvm_va_block_make_resident_copy(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *page_mask,
|
||||
const uvm_page_mask_t *prefetch_page_mask,
|
||||
uvm_make_resident_cause_t cause);
|
||||
|
||||
// The page_mask must be the same or a subset of the page_mask passed to
|
||||
// uvm_va_block_make_resident_pre(). This step updates the residency and breaks
|
||||
// uvm_va_block_make_resident_copy(). This step updates the residency and breaks
|
||||
// read duplication.
|
||||
// LOCKING: The caller must hold the va_block lock.
|
||||
void uvm_va_block_make_resident_post(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *page_mask);
|
||||
void uvm_va_block_make_resident_finish(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *page_mask);
|
||||
|
||||
// Creates or upgrades a mapping from the input processor to the given virtual
|
||||
// address region. Pages which already have new_prot permissions or higher are
|
||||
@@ -905,7 +936,8 @@ NV_STATUS uvm_va_block_unmap_mask(uvm_va_block_t *va_block,
|
||||
//
|
||||
// LOCKING: The caller must hold the VA block lock.
|
||||
NV_STATUS uvm_va_block_set_preferred_location_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context);
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Maps the given processor to all resident pages in this block, as allowed by
|
||||
// location and policy. Waits for the operation to complete before returning.
|
||||
@@ -921,6 +953,22 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t processor_id);
|
||||
|
||||
// Maps given processor to all resident pages in this block and region, as
|
||||
// allowed by location and policy. The caller is responsible for waiting for
|
||||
// the tracker after all mappings have been started.
|
||||
// This function can be called with HMM and managed va_blocks.
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock and
|
||||
// va_block_context->mm->mmap_lock must be held in at least read mode.
|
||||
NV_STATUS uvm_va_block_set_accessed_by_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// Breaks SetAccessedBy and remote mappings
|
||||
// This function should only be called with managed va_blocks.
|
||||
//
|
||||
@@ -1124,6 +1172,11 @@ void uvm_va_block_unmap_preferred_location_uvm_lite(uvm_va_block_t *va_block, uv
|
||||
// must hold mm->mmap_lock in at least read mode.
|
||||
void uvm_va_block_unregister_gpu(uvm_va_block_t *va_block, uvm_gpu_t *gpu, struct mm_struct *mm);
|
||||
|
||||
// Same as uvm_va_block_unregister_gpu() but the VA block lock must be held.
|
||||
// Note that this handles allocation-retry internally and hence might unlock
|
||||
// and relock block's lock.
|
||||
void uvm_va_block_unregister_gpu_locked(uvm_va_block_t *va_block, uvm_gpu_t *gpu, struct mm_struct *mm);
|
||||
|
||||
// Unmaps all memory associated with the block and drops the ref count of the
|
||||
// block. This allows the caller to free resources associated with this block
|
||||
// regardless of the block's current ref count. Most importantly it allows the
|
||||
@@ -1186,9 +1239,13 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
|
||||
|
||||
// Performs any operations necessary to establish a coherent mapping
|
||||
// (migrations, cache invalidates, etc.) in response to the given service block
|
||||
// context
|
||||
// context.
|
||||
//
|
||||
// service_context->block_context.policy is set by this function.
|
||||
// service_context must not be NULL and service_context->block_context.policy
|
||||
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
|
||||
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
|
||||
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context->prefetch_hint is set by this function.
|
||||
//
|
||||
// Locking:
|
||||
// - service_context->block_context.mm->mmap_lock must be held in at least
|
||||
@@ -1197,8 +1254,8 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
|
||||
// - va_block lock must be held
|
||||
//
|
||||
// If allocation-retry was required as part of the operation and was successful,
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock was
|
||||
// unlocked and relocked.
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock
|
||||
// was unlocked and relocked.
|
||||
//
|
||||
// NV_WARN_MORE_PROCESSING_REQUIRED indicates that thrashing has been detected
|
||||
// and the performance heuristics logic decided to throttle execution.
|
||||
@@ -1209,6 +1266,77 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
uvm_va_block_retry_t *block_retry,
|
||||
uvm_service_block_context_t *service_context);
|
||||
|
||||
// Performs population of the destination pages, unmapping and copying source
|
||||
// pages to new_residency.
|
||||
//
|
||||
// service_context must not be NULL and service_context->block_context.policy
|
||||
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
|
||||
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
|
||||
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context->prefetch_hint should be set before calling this function.
|
||||
//
|
||||
// Locking:
|
||||
// - service_context->block_context.mm->mmap_lock must be held in at least
|
||||
// read mode, if valid.
|
||||
// - va_space lock must be held in at least read mode
|
||||
// - va_block lock must be held
|
||||
//
|
||||
// If allocation-retry was required as part of the operation and was successful,
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock
|
||||
// was unlocked and relocked.
|
||||
//
|
||||
// NV_WARN_MORE_PROCESSING_REQUIRED indicates that thrashing has been detected
|
||||
// and the performance heuristics logic decided to throttle execution.
|
||||
// Any other error code different than NV_OK indicates OOM or a global fatal
|
||||
// error.
|
||||
NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
|
||||
uvm_processor_id_t new_residency,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *block_retry,
|
||||
uvm_service_block_context_t *service_context);
|
||||
|
||||
// This updates the va_block residency state and maps the faulting processor_id
|
||||
// to the new residency (which may be remote).
|
||||
//
|
||||
// service_context must not be NULL and service_context->block_context.policy
|
||||
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
|
||||
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
|
||||
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context must be initialized by calling uvm_va_block_service_copy()
|
||||
// before calling this function.
|
||||
//
|
||||
// Locking:
|
||||
// - service_context->block_context.mm->mmap_lock must be held in at least
|
||||
// read mode, if valid.
|
||||
// - va_space lock must be held in at least read mode
|
||||
// - va_block lock must be held
|
||||
// - the mmap lock and va_space lock must be held across the calls to
|
||||
// uvm_va_block_service_copy() and this function. If the va_block lock is
|
||||
// dropped inbetween, special care is needed to check for eviction and
|
||||
// invalidation callbacks.
|
||||
//
|
||||
// If allocation-retry was required as part of the operation and was successful,
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock
|
||||
// was unlocked and relocked.
|
||||
//
|
||||
// NV_WARN_MORE_PROCESSING_REQUIRED indicates that thrashing has been detected
|
||||
// and the performance heuristics logic decided to throttle execution.
|
||||
// Any other error code different than NV_OK indicates OOM or a global fatal
|
||||
// error.
|
||||
NV_STATUS uvm_va_block_service_finish(uvm_processor_id_t processor_id,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_service_block_context_t *service_context);
|
||||
|
||||
// Allocate GPU state for the given va_block and registered GPUs.
|
||||
// Locking: The block lock must be held.
|
||||
NV_STATUS uvm_va_block_gpu_state_alloc(uvm_va_block_t *va_block);
|
||||
|
||||
// Release any GPU or policy data associated with the given region in response
|
||||
// to munmap().
|
||||
// Locking: The va_block lock must be held.
|
||||
void uvm_va_block_munmap_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Size of the block in bytes. Guaranteed to be a page-aligned value between
|
||||
// PAGE_SIZE and UVM_VA_BLOCK_SIZE.
|
||||
static inline NvU64 uvm_va_block_size(uvm_va_block_t *block)
|
||||
@@ -1275,7 +1403,7 @@ const uvm_page_mask_t *uvm_va_block_map_mask_get(uvm_va_block_t *block, uvm_proc
|
||||
// VA block lookup functions. There are a number of permutations which might be
|
||||
// useful, such as looking up the block from {va_space, va_range} x {addr,
|
||||
// block index}. The ones implemented here and in uvm_va_range.h support the
|
||||
// primary two use cases, which are:
|
||||
// primary three use cases, which are:
|
||||
// 1) Iterating over all VA blocks in a VA range. This uses block indices on the
|
||||
// VA range:
|
||||
// uvm_va_range_num_blocks
|
||||
@@ -1286,6 +1414,9 @@ const uvm_page_mask_t *uvm_va_block_map_mask_get(uvm_va_block_t *block, uvm_proc
|
||||
// VA space and address:
|
||||
// uvm_va_block_find
|
||||
// uvm_va_block_find_create
|
||||
// 3) Operating on a single VA block (fault). This looks up the block using the
|
||||
// supplied VA range and address:
|
||||
// uvm_va_block_find_create_in_range
|
||||
|
||||
// Finds the UVM or HMM VA block containing addr, if any. The va_space->lock
|
||||
// must be held in at least read mode. Return values:
|
||||
@@ -1315,6 +1446,15 @@ NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_t **out_block);
|
||||
|
||||
// Same as uvm_va_block_find_create except that va_range lookup was already done
|
||||
// by the caller. If the supplied va_range is NULL, this function behaves just
|
||||
// like when the va_range lookup in uvm_va_block_find_create is NULL.
|
||||
NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
|
||||
uvm_va_range_t *va_range,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_t **out_block);
|
||||
|
||||
// Same as uvm_va_block_find_create except that only managed va_blocks are
|
||||
// created if not already present in the VA range.
|
||||
static NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
|
||||
@@ -1324,15 +1464,10 @@ static NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
|
||||
return uvm_va_block_find_create(va_space, addr, NULL, out_block);
|
||||
}
|
||||
|
||||
// Look up a chunk backing a specific address within the VA block. Returns NULL if none.
|
||||
// Look up a chunk backing a specific address within the VA block.
|
||||
// Returns NULL if none.
|
||||
uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu_t *gpu, NvU64 address);
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_MIGRATE_MODE_MAKE_RESIDENT,
|
||||
UVM_MIGRATE_MODE_MAKE_RESIDENT_AND_MAP,
|
||||
} uvm_migrate_mode_t;
|
||||
|
||||
// Implementation of the UvmMigrate() API at the VA block scope.
|
||||
//
|
||||
// The out_tracker can be NULL.
|
||||
@@ -1345,6 +1480,8 @@ typedef enum
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
|
||||
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock. If va_block_context->mm !=
|
||||
// NULL, va_block_context->mm->mmap_lock must be held in at least
|
||||
@@ -1362,7 +1499,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
// The [dst, dst + size) range has to fit within a single PAGE_SIZE page.
|
||||
//
|
||||
// va_block_context must not be NULL. The caller is not required to set
|
||||
// va_block_context->policy.
|
||||
// va_block_context->policy or va_block_context->hmm.vma.
|
||||
//
|
||||
// The caller needs to support allocation-retry of page tables.
|
||||
//
|
||||
@@ -1416,6 +1553,8 @@ static uvm_page_index_t uvm_va_block_cpu_page_index(uvm_va_block_t *va_block, Nv
|
||||
|
||||
// Computes the size and index in the gpu_state chunks array of the GPU chunk
|
||||
// which corresponds to the given page_index of the VA region.
|
||||
// Note this is only used for testing and does not work on HMM va_blocks as it
|
||||
// returns incorrect results for those.
|
||||
size_t uvm_va_block_gpu_chunk_index_range(NvU64 start,
|
||||
NvU64 size,
|
||||
uvm_gpu_t *gpu,
|
||||
@@ -1868,77 +2007,66 @@ static uvm_va_block_region_t uvm_va_block_chunk_region(uvm_va_block_t *block,
|
||||
// Helpers for page state (permissions, size, residency)
|
||||
//
|
||||
|
||||
// Compute the gpus that have at least the given access permissions for the
|
||||
// range described by region and page_mask. The function sets the bit if any
|
||||
// page in the region has the permissions.
|
||||
void uvm_va_block_region_authorized_gpus(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_prot_t access_permission,
|
||||
uvm_processor_mask_t *authorized_gpus);
|
||||
|
||||
// Compute the processors that have at least the given access permissions for the
|
||||
// range described by region and page_mask. The function sets the bit if any
|
||||
// page in the region has the permissions.
|
||||
void uvm_va_block_region_authorized_processors(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_prot_t access_permission,
|
||||
uvm_processor_mask_t *authorized_processors);
|
||||
|
||||
void uvm_va_block_page_authorized_gpus(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_prot_t access_permission,
|
||||
uvm_processor_mask_t *authorized_gpus);
|
||||
|
||||
void uvm_va_block_page_authorized_processors(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_prot_t access_permission,
|
||||
uvm_processor_mask_t *authorized_processors);
|
||||
|
||||
bool uvm_va_block_is_gpu_authorized_on_whole_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_gpu_id_t gpu_id,
|
||||
uvm_prot_t required_prot);
|
||||
|
||||
bool uvm_va_block_is_processor_authorized_on_whole_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_prot_t required_prot);
|
||||
|
||||
bool uvm_va_block_page_is_gpu_authorized(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_gpu_id_t gpu_id,
|
||||
uvm_prot_t required_prot);
|
||||
|
||||
bool uvm_va_block_page_is_processor_authorized(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_prot_t required_prot);
|
||||
|
||||
// Compute the gpus that have a copy of the given page resident in their memory
|
||||
void uvm_va_block_page_resident_gpus(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_mask_t *resident_gpus);
|
||||
|
||||
// Compute the processors that have a copy of the given page resident in their memory
|
||||
// Compute the processors that have a copy of the given page resident in their
|
||||
// memory.
|
||||
void uvm_va_block_page_resident_processors(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_mask_t *resident_processors);
|
||||
|
||||
// Count how many processors have a copy of the given page resident in their memory
|
||||
// Count how many processors have a copy of the given page resident in their
|
||||
// memory.
|
||||
NvU32 uvm_va_block_page_resident_processors_count(uvm_va_block_t *va_block, uvm_page_index_t page_index);
|
||||
|
||||
// Get the processor with a resident copy of a page closest to the given processor
|
||||
// Get the processor with a resident copy of a page closest to the given
|
||||
// processor.
|
||||
uvm_processor_id_t uvm_va_block_page_get_closest_resident(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t processor);
|
||||
|
||||
uvm_processor_id_t uvm_va_block_page_get_closest_resident_in_mask(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t processor,
|
||||
const uvm_processor_mask_t *processor_mask);
|
||||
// Insert a CPU chunk at the given page_index into the va_block.
|
||||
// Locking: The va_block lock must be held.
|
||||
NV_STATUS uvm_cpu_chunk_insert_in_block(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Remove a CPU chunk at the given page_index from the va_block.
|
||||
// Locking: The va_block lock must be held.
|
||||
void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Return the CPU chunk at the given page_index from the va_block.
|
||||
// Locking: The va_block lock must be held.
|
||||
uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Return the CPU chunk at the given page_index from the va_block.
|
||||
// Locking: The va_block lock must be held.
|
||||
struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Physically map a CPU chunk so it is DMA'able from all registered GPUs.
|
||||
// Locking: The va_block lock must be held.
|
||||
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Physically unmap a CPU chunk from all registered GPUs.
|
||||
// Locking: The va_block lock must be held.
|
||||
void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Remove any CPU chunks in the given region.
|
||||
// Locking: The va_block lock must be held.
|
||||
void uvm_va_block_remove_cpu_chunks(uvm_va_block_t *va_block, uvm_va_block_region_t region);
|
||||
|
||||
// Get CPU page size or 0 if it is not mapped
|
||||
NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block, uvm_page_index_t page_index);
|
||||
NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Get GPU page size or 0 if it is not mapped on the given GPU
|
||||
NvU32 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index);
|
||||
@@ -1999,14 +2127,14 @@ size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t pa
|
||||
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
|
||||
// which also means the va_block_context->mm is not NULL, retained, and locked
|
||||
// for at least read. See the comments for uvm_va_block_check_policy_is_valid()
|
||||
// and uvm_hmm_va_block_context_vma_is_valid() in uvm_hmm.h.
|
||||
// and uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// Locking: the va_block lock must be held.
|
||||
uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t processor_id,
|
||||
NvU32 access_type_mask,
|
||||
uvm_va_policy_t *policy,
|
||||
const uvm_va_policy_t *policy,
|
||||
const uvm_perf_thrashing_hint_t *thrashing_hint,
|
||||
uvm_service_operation_t operation,
|
||||
bool *read_duplicate);
|
||||
|
||||
Reference in New Issue
Block a user