530.30.02

This commit is contained in:
Andy Ritger
2023-02-28 11:12:44 -08:00
parent e598191e8e
commit 4397463e73
928 changed files with 124728 additions and 88525 deletions

View File

@@ -28,6 +28,7 @@
#include "uvm_linux.h"
#include "uvm_forward_decl.h"
#include "uvm_lock.h"
#include "uvm_pmm_gpu.h"
// Module to handle per-GPU user mappings to sysmem physical memory. Notably,
// this implements a reverse map of the DMA address to {va_block, virt_addr}.
@@ -176,17 +177,25 @@ size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_map
uvm_reverse_map_t *out_mappings,
size_t max_out_mappings);
#define UVM_CPU_CHUNK_SIZES PAGE_SIZE
#define UVM_CPU_CHUNK_SIZES (UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | PAGE_SIZE)
#if UVM_CPU_CHUNK_SIZES == PAGE_SIZE
#define UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE() 1
typedef struct page uvm_cpu_chunk_t;
typedef enum
{
UVM_CPU_CHUNK_ALLOC_FLAGS_NONE = 0,
#define UVM_CPU_CHUNK_PAGE_INDEX(chunk, page_index) (page_index)
// Zero the chunk.
UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO = (1 << 0),
#else
#define UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE() 0
typedef struct uvm_cpu_chunk_struct uvm_cpu_chunk_t;
// Account for the chunk in the cgroup context.
UVM_CPU_CHUNK_ALLOC_FLAGS_ACCOUNT = (1 << 1),
} uvm_cpu_chunk_alloc_flags_t;
typedef enum
{
UVM_CPU_CHUNK_TYPE_PHYSICAL,
UVM_CPU_CHUNK_TYPE_LOGICAL,
UVM_CPU_CHUNK_TYPE_HMM
} uvm_cpu_chunk_type_t;
// CPU memory chunk descriptor.
// CPU memory chunks represent a physically contiguous CPU memory
@@ -197,6 +206,22 @@ typedef struct uvm_cpu_chunk_struct uvm_cpu_chunk_t;
// splitting are referred to as "logical chunks".
struct uvm_cpu_chunk_struct
{
uvm_cpu_chunk_type_t type:2;
// Size of the chunk.
// For chunks resulting from page allocations (physical chunks),
// this value is the size of the physical allocation.
size_t log2_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE);
// Chunk reference count used when a CPU chunk is split. Each
// child sub-chunk will increment the reference count of its
// parent.
// The reference count is set to 1 when the chunk is created.
// This initial reference is dropped if the chunk is split in
// order to automatically destroy the chunk when all logical
// chunks resulting from the split are destroyed.
nv_kref_t refcount;
// Pointer to the CPU page backing this CPU chunk.
// For physical chunks, this will point to the head page. Physical
// chunk allocation will set the reference count for the struct
@@ -209,100 +234,110 @@ struct uvm_cpu_chunk_struct
// reference counted, there is no need to take separate references
// to the struct page for logical chunks.
struct page *page;
};
// For logical chunks, this points to the parent chunk (which
// could also be a logical chunk). For physical chunks, this
// is NULL.
uvm_cpu_chunk_t *parent;
typedef struct
{
NvU64 dma_addr;
NvU32 map_count;
} uvm_cpu_phys_mapping_t;
// Page offset of this chunk within the physical size of
// the parent.
uvm_page_index_t offset;
typedef struct
{
uvm_cpu_chunk_t common;
// Region within the VA block covered by this CPU chunk.
uvm_va_block_region_t region;
// Lock protecting dirty_bitmap and gpu_mappings.
uvm_mutex_t lock;
// Chunk reference count used when a CPU chunk is split. Each
// child sub-chunk will increment the reference count of its
// parent.
nv_kref_t refcount;
// Size of the chunk.
// For chunks resulting from page allocations (physical chunks),
// this value is the size of the physical allocation.
size_t log2_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE);
struct {
struct
{
// Per-GPU array of DMA mapping addresses for the chunk.
// The DMA mapping addresses for logical chunks are adjusted
// to the correct offset within the parent chunk.
union {
NvU64 static_entry;
NvU64 *dynamic_entries;
union
{
uvm_cpu_phys_mapping_t static_entry;
uvm_cpu_phys_mapping_t *dynamic_entries;
};
// Miximum number of physical mapping entries available.
// The initial value is 1 since the static_entry is always
// available.
// When using the dynamic_entries, it holds the size of the
// dynamic_entries array. This may be more than the number
// of GPUs with active mappings. The number of active entries
// is the number of set bits in dma_addrs_mask.
size_t max_entries;
// The set of GPU ID's that have an active physical mapping.
// Since physical mappings are shared by all GPUs under a
// parent GPU, this mask only needs to track uvm_parent_gpu_t.
uvm_processor_mask_t dma_addrs_mask;
} gpu_mappings;
// Lock protecting dirty_bitmap
uvm_spinlock_t lock;
// A dynamically allocated bitmap (one per PAGE_SIZE page) used
// to track dirty state of each PAGE_SIZE page.
// Dirty state is tracked only by physical chunks. Therefore,
// for logical chunks this will be NULL;
// Large CPU chunks are allocated as compound pages. For such
// pages, the kernel keeps dirtiness state with a single bit
// (in the compound page head) that covers the entire compound
// page.
//
// In the case of UVM-Lite GPUs, using the dirty bit of the
// the compound page will cause performance regression due to
// the copying of extra data. We mitigate this by using this
// bitmap to track which base pages are dirty.
unsigned long *dirty_bitmap;
};
#define UVM_CPU_CHUNK_PAGE_INDEX(chunk, page_index) (chunk->region.first)
#endif // UVM_CPU_CHUNK_SIZES == PAGE_SIZE
} uvm_cpu_physical_chunk_t;
typedef struct
{
uvm_cpu_chunk_t common;
// Pointer to the parent chunk (which could also be a logical chunk).
uvm_cpu_chunk_t *parent;
uvm_processor_mask_t mapped_gpus;
} uvm_cpu_logical_chunk_t;
// Return the set of allowed CPU chunk allocation sizes.
uvm_chunk_sizes_mask_t uvm_cpu_chunk_get_allocation_sizes(void);
// Allocate a physical CPU chunk for the specified page index and owned by
// va_block.
// Allocate a physical CPU chunk of the specified size.
//
// The size of the allocated CPU chunk may be any of the allowed sizes and
// depends on several factors:
// * Allocation will be attempted in reverse order - highest to lowest - in
// order ensure that the highest possible size is used.
// * An allocation size will be used if:
// - the VA region within the block covered by the allocation size is
// aligned to that allocation size,
// - the VA block region corresponding to the allocation size is empty
// (has no previously populated pages), and
// - the system allows a page allocation of that size.
//
// If mm is not NULL, the chunks memory will be added to the mm's memory cgroup.
//
// If a CPU chunk allocation succeeds, NV_OK is returned. If new_chunk is not
// NULL it will be set to point to the newly allocated chunk. On failure,
// NV_ERR_NO_MEMORY is returned.
NV_STATUS uvm_cpu_chunk_alloc(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
struct mm_struct *mm,
// If a CPU chunk allocation succeeds, NV_OK is returned. new_chunk will be set
// to point to the newly allocated chunk. On failure, NV_ERR_NO_MEMORY is
// returned.
NV_STATUS uvm_cpu_chunk_alloc(uvm_chunk_size_t alloc_size,
uvm_cpu_chunk_alloc_flags_t flags,
uvm_cpu_chunk_t **new_chunk);
// Insert a CPU chunk in the va_block's storage structures.
// Allocate a HMM CPU chunk.
//
// On success, NV_OK is returned. On error,
// - NV_ERR_NO_MEMORY is returned if memory allocation for any if the internal
// structures did not succeed.
// - NV_ERR_INVALID_ARGUMENT is returned if the size of the chunk to be inserted
// in invalid.
// - NV_ERR_INVALID_STATE is returned if a matching chunk already exists in the
// block.
NV_STATUS uvm_cpu_chunk_insert_in_block(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
// HMM chunks differ from normal CPU chunks in that the kernel has already
// allocated the page for them. This means we don't allocate any CPU memory
// here. It also means the kernel holds the reference to the page, so we
// shouldn't call put_page() when freeing the chunk.
//
// If a CPU chunk allocation succeeds NV_OK is returned and new_chunk will be
// set to point to the newly allocated chunk. On failure, NV_ERR_NO_MEMORY is
// returned.
//
// Note that the kernel retains logical ownership of the page. This means page
// properties should not be directly modified by UVM. In particular page flags
// such as PageDirty should not be modified by UVM, nor can UVM directly free
// the page. The kernel is also responsible for mapping/unmapping the page on
// the CPU. We create a CPU chunk for the page primarily to allow GPU mappings
// for the page to be created.
NV_STATUS uvm_cpu_chunk_alloc_hmm(struct page *page,
uvm_cpu_chunk_t **new_chunk);
// Remove a CPU chunk from the va_block's storage structures.
// The chunk is not freed, only removed from the block's storage structures.
void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
// Convert a physical chunk to an HMM chunk.
static void uvm_cpu_chunk_make_hmm(uvm_cpu_chunk_t *chunk)
{
UVM_ASSERT(chunk->type == UVM_CPU_CHUNK_TYPE_PHYSICAL);
// Return the CPU chunk backing page_index within the VA block.
// If page_index is beyond the boundary of the VA block or a CPU chunk for
// the specified page has not been allocated and/or inserted into the block,
// NULL is returned.
uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page(uvm_va_block_t *block, uvm_page_index_t page_index);
chunk->type = UVM_CPU_CHUNK_TYPE_HMM;
}
uvm_chunk_size_t uvm_cpu_chunk_get_size(uvm_cpu_chunk_t *chunk);
@@ -313,158 +348,105 @@ static size_t uvm_cpu_chunk_num_pages(uvm_cpu_chunk_t *chunk)
return uvm_cpu_chunk_get_size(chunk) / PAGE_SIZE;
}
static inline bool uvm_cpu_chunk_is_hmm(uvm_cpu_chunk_t *chunk)
{
return chunk->type == UVM_CPU_CHUNK_TYPE_HMM;
}
static bool uvm_cpu_chunk_is_physical(uvm_cpu_chunk_t *chunk)
{
#if UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
return true;
#else
return chunk->parent == NULL;
#endif
return (chunk->type == UVM_CPU_CHUNK_TYPE_PHYSICAL || uvm_cpu_chunk_is_hmm(chunk));
}
// Return a pointer to the struct page backing page_index within the owning
// VA block.
struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
static bool uvm_cpu_chunk_is_logical(uvm_cpu_chunk_t *chunk)
{
return chunk->type == UVM_CPU_CHUNK_TYPE_LOGICAL;
}
// Take a reference to the CPU chunk.
void uvm_cpu_chunk_get(uvm_cpu_chunk_t *chunk);
static uvm_cpu_physical_chunk_t *uvm_cpu_chunk_to_physical(uvm_cpu_chunk_t *chunk)
{
UVM_ASSERT(uvm_cpu_chunk_is_physical(chunk));
return container_of((chunk), uvm_cpu_physical_chunk_t, common);
}
// Release a reference to the CPU chunk. When the reference count
// drops to zero, the CPU chunk will be freed. Physical CPU chunks
// will also free the CPU pages backing the chunk.
void uvm_cpu_chunk_put(uvm_cpu_chunk_t *chunk);
static uvm_cpu_logical_chunk_t *uvm_cpu_chunk_to_logical(uvm_cpu_chunk_t *chunk)
{
UVM_ASSERT(uvm_cpu_chunk_is_logical(chunk));
return container_of((chunk), uvm_cpu_logical_chunk_t, common);
}
NV_STATUS uvm_cpu_chunk_gpu_mapping_alloc(uvm_va_block_t *va_block, uvm_gpu_id_t id);
void uvm_cpu_chunk_gpu_mapping_split(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_gpu_id_t id);
void uvm_cpu_chunk_gpu_mapping_free(uvm_va_block_t *va_block, uvm_gpu_id_t id);
// Free a CPU chunk.
// This may not result in the immediate freeing of the physical pages of the
// chunk if this is a logical chunk and there are other logical chunks holding
// references to the physical chunk.
// If any DMA mappings to this chunk are still active, they are implicitly
// destroyed.
void uvm_cpu_chunk_free(uvm_cpu_chunk_t *chunk);
// Set the CPU chunk's DMA mapping address for the specified GPU ID.
NV_STATUS uvm_cpu_chunk_set_gpu_mapping_addr(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
uvm_cpu_chunk_t *chunk,
uvm_gpu_id_t id,
NvU64 dma_addr);
// In some configurations such as SR-IOV heavy, a CPU chunk cannot be
// referenced using its physical address. There needs to be a kernel virtual
// mapping created.
//
// This helper function creates a DMA mapping on the GPU (see
// uvm_cpu_chunk_map_gpu()) and if necessary a kernel virtual mapping for the
// chunk. The virtual mapping persists until GPU deinitialization, such that no
// unmap functionality is exposed. For more details see uvm_mmu_sysmem_map().
//
// Note that unlike uvm_cpu_chunk_map_gpu(), this helper requires the GPU
// object instead of the parent GPU object.
NV_STATUS uvm_cpu_chunk_map_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu);
// Destroy a CPU chunk's DMA mapping for the parent GPU.
// If chunk is a logical chunk, this call may not necessary destroy the DMA
// mapping of the parent physical chunk since all logical chunks share the
// parent's DMA mapping.
void uvm_cpu_chunk_unmap_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu);
// Get the CPU chunk's DMA mapping address for the specified GPU ID.
NvU64 uvm_cpu_chunk_get_gpu_mapping_addr(uvm_va_block_t *block,
uvm_page_index_t page_index,
uvm_cpu_chunk_t *chunk,
uvm_gpu_id_t id);
// If there is no mapping for the GPU, 0 is returned.
NvU64 uvm_cpu_chunk_get_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu);
#if !UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
// Split a CPU chunk into a set of CPU chunks of size new_size.
// new_size has to be one of the supported CPU chunk allocation sizes and has to
// be smaller than the current size of chunk.
// Split a CPU chunk into a set of CPU chunks of the next size down from the set
// of enabled CPU chunk sizes.
//
// On success, NV_OK is returned. On failure NV_ERR_NO_MEMORY will be returned.
NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block,
uvm_cpu_chunk_t *chunk,
uvm_chunk_size_t new_size,
uvm_page_index_t page_index,
uvm_cpu_chunk_t **new_chunks);
// This function expects that the chunk to be split is larger than the minimum
// enabled chunk size and that new_chunks has enough space for all chunks
// resulting from the split.
//
// On success, NV_OK is returned and the caller-provided new_chunks array will
// be filled out with the newly-created logical chunks.
//
// After a successfull split, the input chunk can no longer be used.
//
// On failure NV_ERR_NO_MEMORY will be returned.
//
// Should never be called for HMM chunks as these don't need splitting (they can
// only be PAGE_SIZE) and even if larger chunks could exist UVM could not split
// them without kernel interaction which currently isn't exported. Will return
// NV_ERR_INVALID_ARGUMENT for a HMM chunk.
// TODO: Bug 3368756: add support for transparent huge page (THP)
NV_STATUS uvm_cpu_chunk_split(uvm_cpu_chunk_t *chunk, uvm_cpu_chunk_t **new_chunks);
// Merge chunks to merge_size.
//
// All input chunks must have the same parent and size. If not,
// NV_ERR_INVALID_ARGUMENT is returned.
//
// If a merge cannot be done, NV_WARN_NOTHING_TO_DO is returned.
//
// On success, NV_OK is returned and merged_chunk is set to point to the
// merged chunk.
NV_STATUS uvm_cpu_chunk_merge(uvm_va_block_t *va_block,
uvm_cpu_chunk_t **chunks,
size_t num_merge_chunks,
uvm_chunk_size_t merge_size,
uvm_cpu_chunk_t **merged_chunk);
// Merge an array of logical chunks into their parent chunk. All chunks have to
// have the same size, parent, and set of mapped GPUs.
uvm_cpu_chunk_t *uvm_cpu_chunk_merge(uvm_cpu_chunk_t **chunks);
// Mark the CPU sub-page page_index in the CPU chunk as dirty.
// page_index has to be a page withing the chunk's region.
// Mark the page_index sub-page of the chunk as dirty.
// page_index is an offset into the chunk.
//
// Note that dirty status for HMM chunks should not be modified directly from
// UVM. Instead the kernel will mark the backing struct pages dirty either on
// fault when written to from the CPU, or when the PTE is mirrored to the GPU
// using hmm_range_fault().
void uvm_cpu_chunk_mark_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
// Mark the CPU sub-pages page_index in the CPU chunk as clean.
// page_index has to be a page withing the chunk's region.
// Mark the page_index sub-page of the chunk as clean.
// page_index is an offset into the chunk.
void uvm_cpu_chunk_mark_clean(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
// Return true if the CPU sub-pages page_index in the CPU chunk are dirty.
// page_index has to be a page withing the chunk's region.
// Return true if the page_index base page of the CPU chunk is dirty.
bool uvm_cpu_chunk_is_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
#else // UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
static NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block,
uvm_cpu_chunk_t *chunk,
uvm_chunk_size_t new_size,
uvm_page_index_t page_index,
uvm_cpu_chunk_t **new_chunks)
{
return NV_OK;
}
static NV_STATUS uvm_cpu_chunk_merge(uvm_va_block_t *va_block,
uvm_cpu_chunk_t **chunk,
size_t num_merge_chunks,
uvm_chunk_size_t merge_size,
uvm_cpu_chunk_t **merged_chunk)
{
return NV_WARN_NOTHING_TO_DO;
}
static void uvm_cpu_chunk_mark_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index)
{
SetPageDirty(chunk);
}
static void uvm_cpu_chunk_mark_clean(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index)
{
ClearPageDirty(chunk);
}
static bool uvm_cpu_chunk_is_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index)
{
return PageDirty(chunk);
}
#endif // !UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
// Return the first CPU chunk in the block. If no CPU chunks have been
// allocated and/or inserted into the block, NULL is returned.
// If not NULL, page_index will be set to the first page of the block covered by
// the returned chunk.
uvm_cpu_chunk_t *uvm_cpu_chunk_first_in_block(uvm_va_block_t *va_block, uvm_page_index_t *out_page_index);
// Return the next CPU chunk in the block owning chunk.
// previous_page_index is the index after which to start searching. Its value
// will be updated with the starting page index of the next chunk in the block.
uvm_cpu_chunk_t *uvm_cpu_chunk_next(uvm_va_block_t *va_block, uvm_page_index_t *previous_page_index);
#define for_each_cpu_chunk_in_block(chunk, page_index, va_block) \
for ((chunk) = uvm_cpu_chunk_first_in_block((va_block), &(page_index)); \
(chunk) != NULL; \
(page_index) += uvm_cpu_chunk_num_pages(chunk) - 1, (chunk) = uvm_cpu_chunk_next((va_block), &(page_index)))
#define for_each_cpu_chunk_in_block_safe(chunk, page_index, next_page_index, va_block) \
for ((chunk) = uvm_cpu_chunk_first_in_block((va_block), &(page_index)), \
(next_page_index) = (page_index) + ((chunk) ? uvm_cpu_chunk_num_pages(chunk) : 0); \
(chunk) != NULL; \
(page_index) = (next_page_index) - 1, (chunk) = uvm_cpu_chunk_next((va_block), &(page_index)), \
(next_page_index) = (page_index) + ((chunk) ? uvm_cpu_chunk_num_pages(chunk) : 0))
// Use a special symbol for the region so it does not replace the chunk's region
// structure member.
#define for_each_cpu_chunk_in_block_region(chunk, page_index, va_block, __region) \
for ((page_index) = uvm_va_block_first_page_in_mask((__region), &(va_block)->cpu.allocated), \
(chunk) = uvm_cpu_chunk_get_chunk_for_page((va_block), (page_index)); \
(chunk) != NULL && page_index < (__region).outer; \
(page_index) += uvm_cpu_chunk_num_pages(chunk) - 1, (chunk) = uvm_cpu_chunk_next((va_block), &(page_index))
#define for_each_cpu_chunk_in_block_region_safe(chunk, page_index, next_page_index, va_block, __region) \
for ((page_index) = uvm_va_block_first_page_in_mask((__region), &(va_block)->cpu.allocated), \
(chunk) = uvm_cpu_chunk_get_chunk_for_page((va_block), (page_index)), \
(next_page_index) = (page_index) + (chunk ? uvm_cpu_chunk_num_pages(chunk) : 0); \
(chunk) != NULL && page_index < (__region).outer; \
(page_index) = (next_page_index) - 1, (chunk) = uvm_cpu_chunk_next((va_block), &(page_index)), \
(next_page_index) = (page_index) + (chunk ? uvm_cpu_chunk_num_pages(chunk) : 0))
static NV_STATUS uvm_test_get_cpu_chunk_allocation_sizes(UVM_TEST_GET_CPU_CHUNK_ALLOC_SIZES_PARAMS *params,
struct file *filp)
{