mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-27 03:29:47 +00:00
282 lines
11 KiB
C
282 lines
11 KiB
C
/*******************************************************************************
|
|
Copyright (c) 2018-2025 NVIDIA Corporation
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to
|
|
deal in the Software without restriction, including without limitation the
|
|
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
sell copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be
|
|
included in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
DEALINGS IN THE SOFTWARE.
|
|
|
|
*******************************************************************************/
|
|
|
|
#ifndef __UVM_MIGRATE_PAGEABLE_H__
|
|
#define __UVM_MIGRATE_PAGEABLE_H__
|
|
|
|
#include "uvm_common.h"
|
|
#include "uvm_linux.h"
|
|
#include "uvm_populate_pageable.h"
|
|
#include "uvm_forward_decl.h"
|
|
#include "uvm_processors.h"
|
|
#include "uvm_va_block_types.h"
|
|
|
|
#include <linux/nodemask.h>
|
|
|
|
typedef struct
|
|
{
|
|
uvm_va_space_t *va_space;
|
|
struct mm_struct *mm;
|
|
const unsigned long start;
|
|
const unsigned long length;
|
|
uvm_processor_id_t dst_id;
|
|
|
|
// dst_node_id may be clobbered by uvm_migrate_pageable().
|
|
int dst_node_id;
|
|
uvm_populate_permissions_t populate_permissions;
|
|
NvU32 populate_flags;
|
|
bool skip_mapped : 1;
|
|
uvm_make_resident_cause_t cause;
|
|
bool populate_on_cpu_alloc_failures : 1;
|
|
bool populate_on_migrate_vma_failures : 1;
|
|
|
|
// access_counters_buffer_index is only valid if cause is
|
|
// UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER.
|
|
NvU32 access_counters_buffer_index;
|
|
NvU64 *user_space_start;
|
|
NvU64 *user_space_length;
|
|
|
|
uvm_processor_mask_t *gpus_to_check_for_nvlink_errors;
|
|
bool fail_on_unresolved_sto_errors;
|
|
} uvm_migrate_args_t;
|
|
|
|
#if defined(CONFIG_MIGRATE_VMA_HELPER)
|
|
#define UVM_MIGRATE_VMA_SUPPORTED 1
|
|
#else
|
|
#if NV_IS_EXPORT_SYMBOL_PRESENT_migrate_vma_setup
|
|
#define UVM_MIGRATE_VMA_SUPPORTED 1
|
|
#endif
|
|
#endif
|
|
|
|
#ifdef UVM_MIGRATE_VMA_SUPPORTED
|
|
#include <linux/migrate.h>
|
|
|
|
// The calls to migrate_vma are capped at 512 pages to set an upper bound on the
|
|
// amount of metadata that needs to be allocated for the operation. This number
|
|
// was chosen because performance seems to plateau at this size on 64K-pages
|
|
// kernels. On kernels with PAGE_SIZE == 4K, 512 pages correspond to 2M VA block,
|
|
// which is also a standard size for batch operations.
|
|
#define UVM_MIGRATE_VMA_MAX_PAGES (512UL)
|
|
#define UVM_MIGRATE_VMA_MAX_SIZE (UVM_MIGRATE_VMA_MAX_PAGES * PAGE_SIZE)
|
|
|
|
typedef struct
|
|
{
|
|
DECLARE_BITMAP(page_mask, UVM_MIGRATE_VMA_MAX_PAGES);
|
|
} uvm_migrate_vma_page_mask_t;
|
|
|
|
typedef struct
|
|
{
|
|
// Scatter list managing the necessary copying GPU's IOMMU mappings
|
|
// used to copy pages from a source processor.
|
|
struct sg_table sgt_from;
|
|
|
|
// The copying GPUs that performs the copy from the source processors
|
|
// for which IOMMU mappings are created.
|
|
uvm_gpu_t *sgt_from_gpu;
|
|
|
|
// Number of DMA mapped pages in this scatterlist.
|
|
unsigned long dma_count;
|
|
} uvm_sgt_t;
|
|
|
|
typedef struct
|
|
{
|
|
// Input parameters
|
|
uvm_migrate_args_t *uvm_migrate_args;
|
|
|
|
// Output parameters
|
|
//
|
|
// Error code. This only signals errors in internal UVM operations.
|
|
// Pages that failed allocation or could not be populated are communicated
|
|
// using the fields below.
|
|
NV_STATUS status;
|
|
|
|
// Mask of pages that couldn't be made resident on the destination because
|
|
// (a) they are backed with data but pages are not populated (e.g. in swap),
|
|
// (b) pages are not backed with any data yet but were not populated
|
|
// due to the vma not being READ_WRITE, as it would not charge the pages to
|
|
// the process properly.
|
|
// (c) pages are already mapped and such pages were requested to not be
|
|
// migrated via skip_mapped.
|
|
// (d) pages which couldn't be migrated by the kernel.
|
|
uvm_migrate_vma_page_mask_t populate_pages_mask;
|
|
|
|
// Mask of pages that failed allocation on the destination
|
|
uvm_migrate_vma_page_mask_t allocation_failed_mask;
|
|
|
|
// Mask of pages which are already resident at the destination.
|
|
uvm_migrate_vma_page_mask_t dst_resident_pages_mask;
|
|
|
|
// Global state managed by the caller
|
|
//
|
|
// These are scratch masks that can be used by the migrate_vma caller to
|
|
// save output page masks and orchestrate the migrate_vma
|
|
// retries/population calls if needed.
|
|
uvm_migrate_vma_page_mask_t scratch1_mask;
|
|
uvm_migrate_vma_page_mask_t scratch2_mask;
|
|
|
|
// Arrays used by migrate_vma to store the src/dst pfns
|
|
unsigned long dst_pfn_array[UVM_MIGRATE_VMA_MAX_PAGES];
|
|
unsigned long src_pfn_array[UVM_MIGRATE_VMA_MAX_PAGES];
|
|
|
|
// Internal state
|
|
//
|
|
uvm_tracker_t tracker;
|
|
|
|
struct {
|
|
// Scatter table managing the IOMMU mappings for anonymous pages.
|
|
uvm_sgt_t anon_sgt;
|
|
|
|
// Scatter table managing the IOMMU mappings for pages resident on
|
|
// GPUs.
|
|
uvm_sgt_t sgt_gpu[UVM_ID_MAX_GPUS];
|
|
|
|
// Scatter table managing the IOMMU mappings for pages resident on CPU
|
|
// NUMA nodes. The array size is num_possible_nodes() and should be
|
|
// indexed by the position of CPU node ID relative to other set bits
|
|
// in node_possible_map nodemask.
|
|
uvm_sgt_t *sgt_cpu;
|
|
} dma;
|
|
|
|
// GPU processors where pages are resident before calling migrate_vma.
|
|
uvm_processor_mask_t src_gpus;
|
|
|
|
// CPU NUMA nodes where pages are resident before calling migrate_vma.
|
|
nodemask_t src_cpu_nodemask;
|
|
|
|
// Array of per-processor page masks with the pages that are resident
|
|
// before calling migrate_vma.
|
|
struct {
|
|
uvm_migrate_vma_page_mask_t proc_page_mask;
|
|
} processors[UVM_ID_MAX_PROCESSORS];
|
|
|
|
// CPU Processors where pages are resident before calling migrate_vma
|
|
uvm_migrate_vma_page_mask_t *cpu_page_mask;
|
|
|
|
// Number of pages in the migrate_vma call
|
|
unsigned long num_pages;
|
|
|
|
// Number of pages that are directly populated on the destination
|
|
unsigned long num_populate_anon_pages;
|
|
|
|
// Tracks if OOM condition was encountered.
|
|
bool out_of_memory;
|
|
} migrate_vma_state_t;
|
|
|
|
#if defined(CONFIG_MIGRATE_VMA_HELPER)
|
|
struct migrate_vma {
|
|
struct vm_area_struct *vma;
|
|
unsigned long *dst;
|
|
unsigned long *src;
|
|
unsigned long start;
|
|
unsigned long end;
|
|
};
|
|
#endif // CONFIG_MIGRATE_VMA_HELPER
|
|
|
|
// Populates the given VA range and tries to migrate all the pages to dst_id. If
|
|
// the destination processor is the CPU, the NUMA node in dst_node_id is used.
|
|
// The input VA range must be fully backed by VMAs. This function relies on
|
|
// migrate_vma, which was added in Linux 4.14. If skip_mapped is set to true,
|
|
// then already mapped pages will not be migrated. For kernels that do not
|
|
// provide migrate_vma, this function populates the memory using get_user_pages
|
|
// and returns NV_WARN_NOTHING_TO_DO to complete the migration in user space for
|
|
// API calls made from userspace. Kernel callers are expected to handle this
|
|
// error according to their respective usecases. user_space_start and
|
|
// user_space_length will contain the full input range. skip_mapped is ignored
|
|
// for such kernels. If the destination is the CPU and dst_node_id is full,
|
|
// NV_ERR_MORE_PROCESSING_REQUIRED is returned and user-space will call
|
|
// UVM_MIGRATE with the next preferred CPU node (if more are available),
|
|
// starting at the address specified by user_space_start. If the destination is
|
|
// a GPU and a page could not be populated, return NV_ERR_NO_MEMORY. Otherwise,
|
|
// return NV_OK. This is fine because UvmMigrate/UvmMigrateAsync only guarantee
|
|
// that the memory is populated somewhere in the system, not that pages moved to
|
|
// the requested processor.
|
|
//
|
|
// migrate_vma does not support file-backed vmas yet. If a file-backed vma is
|
|
// found, populates the pages corresponding to the vma with get_user_pages() and
|
|
// returns NV_WARN_NOTHING_TO_DO. The caller is expected to handle this error.
|
|
// API calls will fall back to user-mode to complete the migration. Kernel
|
|
// callers are expected to handle this error according to the respective
|
|
// usecases. If NV_WARN_NOTHING_TO_DO is returned, user_space_start and
|
|
// user_space_length will contain the intersection of the vma address range and
|
|
// [start:start + length].
|
|
//
|
|
// If a user-mode fallback is required but current->mm != uvm_migrate_args->mm,
|
|
// NV_ERR_NOT_SUPPORTED is returned since user mode can't perform such a
|
|
// migration. If the caller is a kernel thread, such as the GPU BH, this check
|
|
// is ignored since the caller is not expected to take such a fallback.
|
|
//
|
|
// Also, if no GPUs have been registered in the VA space, populates the pages
|
|
// corresponding to the first vma in the requested region using get_user_pages()
|
|
// and returns NV_WARN_NOTHING_TO_DO to fall back to user space for the
|
|
// userspace API callers to complete the whole migration. Kernel callers are
|
|
// expected to handle this error according to their respective usecases.
|
|
//
|
|
// Locking: mmap_lock must be held in read or write mode
|
|
NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args);
|
|
|
|
NV_STATUS uvm_migrate_pageable_init(void);
|
|
|
|
void uvm_migrate_pageable_exit(void);
|
|
|
|
NV_STATUS uvm_test_skip_migrate_vma(UVM_TEST_SKIP_MIGRATE_VMA_PARAMS *params, struct file *filp);
|
|
|
|
#else // UVM_MIGRATE_VMA_SUPPORTED
|
|
|
|
static NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
|
|
{
|
|
NV_STATUS status;
|
|
|
|
if (current->mm != uvm_migrate_args->mm && !(current->flags & PF_KTHREAD))
|
|
return NV_ERR_NOT_SUPPORTED;
|
|
|
|
status = uvm_populate_pageable(uvm_migrate_args->mm,
|
|
uvm_migrate_args->start,
|
|
uvm_migrate_args->length,
|
|
uvm_migrate_args->populate_permissions,
|
|
uvm_migrate_args->populate_flags);
|
|
if (status != NV_OK)
|
|
return status;
|
|
|
|
*(uvm_migrate_args->user_space_start) = uvm_migrate_args->start;
|
|
*(uvm_migrate_args->user_space_length) = uvm_migrate_args->length;
|
|
|
|
return NV_WARN_NOTHING_TO_DO;
|
|
}
|
|
|
|
static NV_STATUS uvm_migrate_pageable_init(void)
|
|
{
|
|
return NV_OK;
|
|
}
|
|
|
|
static void uvm_migrate_pageable_exit(void)
|
|
{
|
|
}
|
|
|
|
static inline NV_STATUS uvm_test_skip_migrate_vma(UVM_TEST_SKIP_MIGRATE_VMA_PARAMS *params, struct file *filp)
|
|
{
|
|
return NV_OK;
|
|
}
|
|
#endif // UVM_MIGRATE_VMA_SUPPORTED
|
|
|
|
#endif
|