535.171.04

This commit is contained in:
Bernhard Stoeckner
2024-03-21 14:22:31 +01:00
parent 044f70bbb8
commit c042c7903d
36 changed files with 691 additions and 265 deletions

View File

@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.161.08\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.171.04\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
@@ -152,6 +152,8 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \
NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
NV_CONFTEST_COMPILE_TEST_HEADERS += $(obj)/conftest/functions.h

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2001-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2001-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -1982,31 +1982,6 @@ static inline NvBool nv_platform_use_auto_online(nv_linux_state_t *nvl)
return nvl->numa_info.use_auto_online;
}
typedef struct {
NvU64 base;
NvU64 size;
NvU32 nodeId;
int ret;
} remove_numa_memory_info_t;
static void offline_numa_memory_callback
(
void *args
)
{
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->nodeId,
pNumaInfo->base,
pNumaInfo->size);
#else
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->base,
pNumaInfo->size);
#endif
#endif
}
typedef enum
{
NV_NUMA_STATUS_DISABLED = 0,

View File

@@ -3032,6 +3032,22 @@ compile_test() {
;;
foll_longterm_present)
#
# Determine if FOLL_LONGTERM enum is present or not
#
# Added by commit 932f4a630a69 ("mm/gup: replace
# get_user_pages_longterm() with FOLL_LONGTERM") in
# v5.2
#
CODE="
#include <linux/mm.h>
int foll_longterm = FOLL_LONGTERM;
"
compile_check_conftest "$CODE" "NV_FOLL_LONGTERM_PRESENT" "" "types"
;;
vfio_pin_pages_has_vfio_device_arg)
#
# Determine if vfio_pin_pages() kABI accepts "struct vfio_device *"
@@ -5081,11 +5097,15 @@ compile_test() {
# vmap ops and convert GEM backends") update
# drm_gem_object_funcs::vmap to take 'map' argument.
#
# Note that the 'map' argument type is changed from 'struct dma_buf_map'
# to 'struct iosys_map' by commit 7938f4218168 ("dma-buf-map: Rename
# to iosys-map) in v5.18.
#
CODE="
#include <drm/drm_gem.h>
int conftest_drm_gem_object_vmap_has_map_arg(
struct drm_gem_object *obj, struct dma_buf_map *map) {
return obj->funcs->vmap(obj, map);
struct drm_gem_object *obj) {
return obj->funcs->vmap(obj, NULL);
}"
compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_VMAP_HAS_MAP_ARG" "" "types"

View File

@@ -54,7 +54,11 @@
#include "nv-time.h"
#include "nv-lock.h"
#if !defined(CONFIG_RETPOLINE)
/*
* Commit aefb2f2e619b ("x86/bugs: Rename CONFIG_RETPOLINE =>
* CONFIG_MITIGATION_RETPOLINE) in v6.8 renamed CONFIG_RETPOLINE.
*/
#if !defined(CONFIG_RETPOLINE) && !defined(CONFIG_MITIGATION_RETPOLINE)
#include "nv-retpoline.h"
#endif

View File

@@ -34,16 +34,6 @@
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
// ATS prefetcher uses hmm_range_fault() to query residency information.
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
// of memory regions while hmm_range_fault() is being called, MMU interval
// notifiers are needed.
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
#define UVM_ATS_PREFETCH_SUPPORTED() 1
#else
#define UVM_ATS_PREFETCH_SUPPORTED() 0
#endif
typedef struct
{
// Mask of gpu_va_spaces which are registered for ATS access. The mask is

View File

@@ -30,23 +30,36 @@
#include <linux/mempolicy.h>
#include <linux/mmu_notifier.h>
#if UVM_ATS_PREFETCH_SUPPORTED()
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
#include <linux/hmm.h>
#endif
static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 start,
size_t length,
uvm_fault_access_type_t access_type,
uvm_ats_fault_context_t *ats_context)
typedef enum
{
UVM_ATS_SERVICE_TYPE_FAULTS = 0,
UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS,
UVM_ATS_SERVICE_TYPE_COUNT
} uvm_ats_service_type_t;
static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 start,
size_t length,
uvm_fault_access_type_t access_type,
uvm_ats_service_type_t service_type,
uvm_ats_fault_context_t *ats_context)
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
struct mm_struct *mm = va_space->va_space_mm.mm;
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
NV_STATUS status;
NvU64 user_space_start;
NvU64 user_space_length;
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
bool fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
uvm_populate_permissions_t populate_permissions = fault_service_type ?
(write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
UVM_POPULATE_PERMISSIONS_INHERIT;
// Request uvm_migrate_pageable() to touch the corresponding page after
// population.
@@ -83,10 +96,10 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
.dst_node_id = ats_context->residency_node,
.start = start,
.length = length,
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
.touch = true,
.skip_mapped = true,
.populate_on_cpu_alloc_failures = true,
.populate_permissions = populate_permissions,
.touch = fault_service_type,
.skip_mapped = fault_service_type,
.populate_on_cpu_alloc_failures = fault_service_type,
.user_space_start = &user_space_start,
.user_space_length = &user_space_length,
};
@@ -233,7 +246,7 @@ static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma,
return uvm_ats_region_from_start_end(start, end);
}
#if UVM_ATS_PREFETCH_SUPPORTED()
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
{
@@ -271,12 +284,12 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
uvm_ats_fault_context_t *ats_context)
{
NV_STATUS status = NV_OK;
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
#if UVM_ATS_PREFETCH_SUPPORTED()
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
int ret;
NvU64 start;
NvU64 end;
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
struct hmm_range range;
uvm_page_index_t page_index;
uvm_va_block_region_t vma_region;
@@ -357,78 +370,83 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
mmu_interval_notifier_remove(range.notifier);
#else
uvm_page_mask_zero(residency_mask);
#endif
return status;
}
static void ats_expand_fault_region(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
uvm_ats_fault_context_t *ats_context,
uvm_va_block_region_t max_prefetch_region,
uvm_page_mask_t *faulted_mask)
static void ats_compute_prefetch_mask(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
uvm_ats_fault_context_t *ats_context,
uvm_va_block_region_t max_prefetch_region)
{
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;
if (uvm_page_mask_empty(faulted_mask))
if (uvm_page_mask_empty(accessed_mask))
return;
uvm_perf_prefetch_compute_ats(gpu_va_space->va_space,
faulted_mask,
uvm_va_block_region_from_mask(NULL, faulted_mask),
accessed_mask,
uvm_va_block_region_from_mask(NULL, accessed_mask),
max_prefetch_region,
residency_mask,
bitmap_tree,
prefetch_mask);
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
if (vma->vm_flags & VM_WRITE)
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
}
static NV_STATUS ats_fault_prefetch(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_fault_context_t *ats_context)
static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_service_type_t service_type,
uvm_ats_fault_context_t *ats_context)
{
NV_STATUS status = NV_OK;
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
NV_STATUS status;
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
// Residency mask needs to be computed even if prefetching is disabled since
// the residency information is also needed by access counters servicing in
// uvm_ats_service_access_counters()
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
if (status != NV_OK)
return status;
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
return status;
if (uvm_page_mask_empty(faulted_mask))
return status;
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
if (status != NV_OK)
if (uvm_page_mask_empty(accessed_mask))
return status;
// Prefetch the entire region if none of the pages are resident on any node
// and if preferred_location is the faulting GPU.
if (ats_context->prefetch_state.has_preferred_location &&
ats_context->prefetch_state.first_touch &&
uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->parent->id)) {
(ats_context->prefetch_state.first_touch || (service_type == UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS)) &&
uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->id)) {
uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
}
else {
ats_compute_prefetch_mask(gpu_va_space, vma, ats_context, max_prefetch_region);
}
if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS) {
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
if (vma->vm_flags & VM_WRITE)
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
return status;
}
ats_expand_fault_region(gpu_va_space, vma, ats_context, max_prefetch_region, faulted_mask);
else {
uvm_page_mask_or(accessed_mask, accessed_mask, prefetch_mask);
}
return status;
}
@@ -446,6 +464,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
uvm_fault_client_type_t client_type = ats_context->client_type;
uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_FAULTS;
UVM_ASSERT(vma);
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
@@ -454,6 +473,9 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
UVM_ASSERT(gpu_va_space->ats.enabled);
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
uvm_assert_mmap_lock_locked(vma->vm_mm);
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
uvm_page_mask_zero(faults_serviced_mask);
uvm_page_mask_zero(reads_serviced_mask);
@@ -479,7 +501,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
ats_batch_select_residency(gpu_va_space, vma, ats_context);
ats_fault_prefetch(gpu_va_space, vma, base, ats_context);
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
NvU64 start = base + (subregion.first * PAGE_SIZE);
@@ -491,7 +513,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
if (status != NV_OK)
return status;
@@ -526,7 +548,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
if (status != NV_OK)
return status;
@@ -598,3 +620,53 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
return status;
}
NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_fault_context_t *ats_context)
{
uvm_va_block_region_t subregion;
uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS;
UVM_ASSERT(vma);
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
UVM_ASSERT(g_uvm_global.ats.enabled);
UVM_ASSERT(gpu_va_space);
UVM_ASSERT(gpu_va_space->ats.enabled);
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
uvm_assert_mmap_lock_locked(vma->vm_mm);
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
ats_batch_select_residency(gpu_va_space, vma, ats_context);
// Ignoring the return value of ats_compute_prefetch is ok since prefetching
// is just an optimization and servicing access counter migrations is still
// worthwhile even without any prefetching added. So, let servicing continue
// instead of returning early even if the prefetch computation fails.
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
// Remove pages which are already resident at the intended destination from
// the accessed_mask.
uvm_page_mask_andnot(&ats_context->accessed_mask,
&ats_context->accessed_mask,
&ats_context->prefetch_state.residency_mask);
for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
NV_STATUS status;
NvU64 start = base + (subregion.first * PAGE_SIZE);
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_COUNT;
UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
if (status != NV_OK)
return status;
}
return NV_OK;
}

View File

@@ -42,11 +42,31 @@
// corresponding bit in read_fault_mask. These returned masks are only valid if
// the return status is NV_OK. Status other than NV_OK indicate system global
// fault servicing failures.
//
// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
// lock.
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_fault_context_t *ats_context);
// Service access counter notifications on ATS regions in the range (base, base
// + UVM_VA_BLOCK_SIZE) for individual pages in the range requested by page_mask
// set in ats_context->accessed_mask. base must be aligned to UVM_VA_BLOCK_SIZE.
// The caller is responsible for ensuring that the addresses in the
// accessed_mask is completely covered by the VMA. The caller is also
// responsible for handling any errors returned by this function.
//
// Returns NV_OK if servicing was successful. Any other error indicates an error
// while servicing the range.
//
// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
// lock.
NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_fault_context_t *ats_context);
// Return whether there are any VA ranges (and thus GMMU mappings) within the
// UVM_GMMU_ATS_GRANULARITY-aligned region containing address.
bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next);

View File

@@ -181,23 +181,28 @@ struct uvm_service_block_context_struct
typedef struct
{
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
// VMA. Used for batching ATS faults in a vma.
// VMA. Used for batching ATS faults in a vma. This is unused for access
// counter service requests.
uvm_page_mask_t read_fault_mask;
// Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
// SAM VMA. Used for batching ATS faults in a vma.
// SAM VMA. Used for batching ATS faults in a vma. This is unused for access
// counter service requests.
uvm_page_mask_t write_fault_mask;
// Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
// of a SAM VMA. Used to return ATS fault status.
// of a SAM VMA. Used to return ATS fault status. This is unused for access
// counter service requests.
uvm_page_mask_t faults_serviced_mask;
// Mask of successfully serviced read faults on pages in write_fault_mask.
// This is unused for access counter service requests.
uvm_page_mask_t reads_serviced_mask;
// Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
// SAM VMA. This is used as input to the prefetcher.
uvm_page_mask_t faulted_mask;
// Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
// VMA. This is used as input for access counter service requests and output
// of fault service requests.
uvm_page_mask_t accessed_mask;
// Client type of the service requestor.
uvm_fault_client_type_t client_type;
@@ -466,6 +471,9 @@ struct uvm_access_counter_service_batch_context_struct
// Structure used to coalesce access counter servicing in a VA block
uvm_service_block_context_t block_service_context;
// Structure used to service access counter migrations in an ATS block.
uvm_ats_fault_context_t ats_context;
// Unique id (per-GPU) generated for tools events recording
NvU32 batch_id;
};

View File

@@ -33,7 +33,8 @@
#include "uvm_va_space_mm.h"
#include "uvm_pmm_sysmem.h"
#include "uvm_perf_module.h"
#include "uvm_ats_ibm.h"
#include "uvm_ats.h"
#include "uvm_ats_faults.h"
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN 1
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT 256
@@ -125,7 +126,7 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va
// Whether access counter migrations are enabled or not. The policy is as
// follows:
// - MIMC migrations are disabled by default on all systems except P9.
// - MIMC migrations are disabled by default on all non-ATS systems.
// - MOMC migrations are disabled by default on all systems
// - Users can override this policy by specifying on/off
static bool is_migration_enabled(uvm_access_counter_type_t type)
@@ -148,7 +149,7 @@ static bool is_migration_enabled(uvm_access_counter_type_t type)
if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
return false;
if (UVM_ATS_IBM_SUPPORTED())
if (UVM_ATS_SUPPORTED())
return g_uvm_global.ats.supported;
return false;
@@ -1507,8 +1508,7 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
accessed_pages));
}
static void expand_notification_block(struct mm_struct *mm,
uvm_gpu_va_space_t *gpu_va_space,
static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
uvm_va_block_t *va_block,
uvm_page_mask_t *accessed_pages,
const uvm_access_counter_buffer_entry_t *current_entry)
@@ -1543,7 +1543,7 @@ static void expand_notification_block(struct mm_struct *mm,
// which received the notification if the memory was already migrated before
// acquiring the locks either during the servicing of previous notifications
// or during faults or because of explicit migrations or if the VA range was
// freed after receving the notification. Return NV_OK in such cases.
// freed after receiving the notification. Return NV_OK in such cases.
if (!UVM_ID_IS_VALID(resident_id) || uvm_id_equal(resident_id, gpu->id))
return;
@@ -1578,14 +1578,14 @@ static void expand_notification_block(struct mm_struct *mm,
}
}
static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
uvm_gpu_va_space_t *gpu_va_space,
static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_space,
struct mm_struct *mm,
uvm_va_block_t *va_block,
uvm_access_counter_service_batch_context_t *batch_context,
NvU32 index,
NvU32 *out_index)
{
NvU32 i = index;
NvU32 i;
NvU32 flags = 0;
NV_STATUS status = NV_OK;
NV_STATUS flags_status;
@@ -1595,7 +1595,7 @@ static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
UVM_ASSERT(va_block);
UVM_ASSERT(i < batch_context->virt.num_notifications);
UVM_ASSERT(index < batch_context->virt.num_notifications);
uvm_assert_rwsem_locked(&va_space->lock);
@@ -1603,28 +1603,25 @@ static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
uvm_mutex_lock(&va_block->lock);
while (i < batch_context->virt.num_notifications) {
for (i = index; i < batch_context->virt.num_notifications; i++) {
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
NvU64 address = current_entry->address.address;
if ((current_entry->virtual_info.va_space != va_space) || (address > va_block->end)) {
*out_index = i;
if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end))
expand_notification_block(gpu_va_space, va_block, accessed_pages, current_entry);
else
break;
}
expand_notification_block(mm, gpu_va_space, va_block, accessed_pages, current_entry);
i++;
*out_index = i;
}
*out_index = i;
// Atleast one notification should have been processed.
UVM_ASSERT(index < *out_index);
status = service_notification_va_block_helper(mm, va_block, gpu->id, batch_context);
uvm_mutex_unlock(&va_block->lock);
// Atleast one notification should have been processed.
UVM_ASSERT(index < *out_index);
if (status == NV_OK)
flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
@@ -1636,62 +1633,154 @@ static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
return status;
}
static NV_STATUS service_virt_notifications_batch(struct mm_struct *mm,
uvm_gpu_va_space_t *gpu_va_space,
static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
struct mm_struct *mm,
uvm_access_counter_service_batch_context_t *batch_context,
NvU32 index,
NvU32 *out_index)
{
NvU32 i;
NvU64 base;
NvU64 end;
NvU64 address;
NvU32 flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
NV_STATUS status = NV_OK;
NV_STATUS flags_status;
struct vm_area_struct *vma = NULL;
uvm_gpu_t *gpu = gpu_va_space->gpu;
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
UVM_ASSERT(index < batch_context->virt.num_notifications);
uvm_assert_mmap_lock_locked(mm);
uvm_assert_rwsem_locked(&va_space->lock);
address = notifications[index]->address.address;
vma = find_vma_intersection(mm, address, address + 1);
if (!vma) {
// Clear the notification entry to continue receiving access counter
// notifications when a new VMA is allocated in this range.
status = notify_tools_and_process_flags(gpu, &notifications[index], 1, flags);
*out_index = index + 1;
return status;
}
base = UVM_VA_BLOCK_ALIGN_DOWN(address);
end = min(base + UVM_VA_BLOCK_SIZE, (NvU64)vma->vm_end);
uvm_page_mask_zero(&ats_context->accessed_mask);
for (i = index; i < batch_context->virt.num_notifications; i++) {
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
address = current_entry->address.address;
if ((current_entry->virtual_info.va_space == va_space) && (address < end))
uvm_page_mask_set(&ats_context->accessed_mask, (address - base) / PAGE_SIZE);
else
break;
}
*out_index = i;
// Atleast one notification should have been processed.
UVM_ASSERT(index < *out_index);
// TODO: Bug 2113632: [UVM] Don't clear access counters when the preferred
// location is set
// If no pages were actually migrated, don't clear the access counters.
status = uvm_ats_service_access_counters(gpu_va_space, vma, base, ats_context);
if (status != NV_OK)
flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
flags_status = notify_tools_and_process_flags(gpu, &notifications[index], *out_index - index, flags);
if ((status == NV_OK) && (flags_status != NV_OK))
status = flags_status;
return status;
}
static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_space,
struct mm_struct *mm,
uvm_access_counter_service_batch_context_t *batch_context,
NvU32 index,
NvU32 *out_index)
{
NV_STATUS status;
uvm_va_block_t *va_block;
uvm_va_range_t *va_range;
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[index];
NvU64 address = current_entry->address.address;
UVM_ASSERT(va_space);
if (mm)
uvm_assert_mmap_lock_locked(mm);
uvm_assert_rwsem_locked(&va_space->lock);
// Virtual address notifications are always 64K aligned
UVM_ASSERT(IS_ALIGNED(address, UVM_PAGE_SIZE_64K));
// TODO: Bug 4309292: [UVM][HMM] Re-enable access counter HMM block
// migrations for virtual notifications on configs with
// 4KB page size
status = uvm_va_block_find(va_space, address, &va_block);
if ((status == NV_OK) && !uvm_va_block_is_hmm(va_block)) {
va_range = uvm_va_range_find(va_space, address);
if (va_range) {
// Avoid clearing the entry by default.
NvU32 flags = 0;
uvm_va_block_t *va_block = NULL;
UVM_ASSERT(va_block);
if (va_range->type == UVM_VA_RANGE_TYPE_MANAGED) {
size_t index = uvm_va_range_block_index(va_range, address);
status = service_virt_notifications_in_block(mm, gpu_va_space, va_block, batch_context, index, out_index);
va_block = uvm_va_range_block(va_range, index);
// If the va_range is a managed range, the notification belongs to a
// recently freed va_range if va_block is NULL. If va_block is not
// NULL, service_virt_notifications_in_block will process flags.
// Clear the notification entry to continue receiving notifications
// when a new va_range is allocated in that region.
flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
}
if (va_block) {
status = service_virt_notifications_in_block(gpu_va_space, mm, va_block, batch_context, index, out_index);
}
else {
status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
*out_index = index + 1;
}
}
else if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
status = service_virt_notification_ats(gpu_va_space, mm, batch_context, index, out_index);
}
else {
NvU32 flags = 0;
NvU32 flags;
uvm_va_block_t *va_block = NULL;
status = uvm_hmm_va_block_find(va_space, address, &va_block);
// TODO: Bug 4309292: [UVM][HMM] Re-enable access counter HMM block
// migrations for virtual notifications
//
// - If the va_block is HMM, don't clear the notification since HMM
// migrations are currently disabled.
//
// - If the va_block isn't HMM, the notification belongs to a recently
// freed va_range. Clear the notification entry to continue receiving
// notifications when a new va_range is allocated in this region.
flags = va_block ? 0 : UVM_ACCESS_COUNTER_ACTION_CLEAR;
UVM_ASSERT((status == NV_ERR_OBJECT_NOT_FOUND) ||
(status == NV_ERR_INVALID_ADDRESS) ||
uvm_va_block_is_hmm(va_block));
// NV_ERR_OBJECT_NOT_FOUND is returned if the VA range is valid but no
// VA block has been allocated yet. This can happen if there are stale
// notifications in the batch. A new VA range may have been allocated in
// that range. So, clear the notification entry to continue getting
// notifications for the new VA range.
if (status == NV_ERR_OBJECT_NOT_FOUND)
flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
// Clobber status to continue processing the rest of the notifications
// in the batch.
status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
// NV_ERR_INVALID_ADDRESS is returned if the corresponding VA range
// doesn't exist or it's not a managed range. Access counter migrations
// are not currently supported on such ranges.
//
// TODO: Bug 1990466: [uvm] Use access counters to trigger migrations
// When support for SAM migrations is addded, clear the notification
// entry if the VA range doesn't exist in order to receive notifications
// when a new VA range is allocated in that region.
status = notify_tools_and_process_flags(gpu_va_space->gpu, &batch_context->virt.notifications[index], 1, flags);
*out_index = index + 1;
status = NV_OK;
}
return status;
@@ -1745,7 +1834,7 @@ static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
}
if (va_space && gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
status = service_virt_notifications_batch(mm, gpu_va_space, batch_context, i, &i);
status = service_virt_notifications_batch(gpu_va_space, mm, batch_context, i, &i);
}
else {
status = notify_tools_and_process_flags(gpu, &batch_context->virt.notifications[i], 1, 0);

View File

@@ -1632,23 +1632,23 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
const uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
const uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
UVM_ASSERT(vma);
ats_context->client_type = UVM_FAULT_CLIENT_TYPE_GPC;
uvm_page_mask_or(faulted_mask, write_fault_mask, read_fault_mask);
uvm_page_mask_or(accessed_mask, write_fault_mask, read_fault_mask);
status = uvm_ats_service_faults(gpu_va_space, vma, base, &batch_context->ats_context);
// Remove prefetched pages from the serviced mask since fault servicing
// failures belonging to prefetch pages need to be ignored.
uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, faulted_mask);
uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, accessed_mask);
UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, faulted_mask));
UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, accessed_mask));
if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, faulted_mask)) {
if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, accessed_mask)) {
(*block_faults) += (fault_index_end - fault_index_start);
return status;
}

View File

@@ -114,6 +114,16 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
#define UVM_IS_CONFIG_HMM() 0
#endif
// ATS prefetcher uses hmm_range_fault() to query residency information.
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
// of memory regions while hmm_range_fault() is being called, MMU interval
// notifiers are needed.
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
#define UVM_HMM_RANGE_FAULT_SUPPORTED() 1
#else
#define UVM_HMM_RANGE_FAULT_SUPPORTED() 0
#endif
// Various issues prevent us from using mmu_notifiers in older kernels. These
// include:
// - ->release being called under RCU instead of SRCU: fixed by commit

View File

@@ -280,7 +280,9 @@ NV_STATUS uvm_va_space_mm_register(uvm_va_space_t *va_space)
}
}
if ((UVM_IS_CONFIG_HMM() || UVM_ATS_PREFETCH_SUPPORTED()) && uvm_va_space_pageable_mem_access_supported(va_space)) {
if ((UVM_IS_CONFIG_HMM() || UVM_HMM_RANGE_FAULT_SUPPORTED()) &&
uvm_va_space_pageable_mem_access_supported(va_space)) {
#if UVM_CAN_USE_MMU_NOTIFIERS()
// Initialize MMU interval notifiers for this process. This allows
// mmu_interval_notifier_insert() to be called without holding the

View File

@@ -56,7 +56,11 @@
#include "nv-pat.h"
#include "nv-dmabuf.h"
#if !defined(CONFIG_RETPOLINE)
/*
* Commit aefb2f2e619b ("x86/bugs: Rename CONFIG_RETPOLINE =>
* CONFIG_MITIGATION_RETPOLINE) in v6.8 renamed CONFIG_RETPOLINE.
*/
#if !defined(CONFIG_RETPOLINE) && !defined(CONFIG_MITIGATION_RETPOLINE)
#include "nv-retpoline.h"
#endif

View File

@@ -250,6 +250,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += num_registered_fb
NV_CONFTEST_TYPE_COMPILE_TESTS += pci_driver_has_driver_managed_dma
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += memory_failure_has_trapno_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += foll_longterm_present
NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -2130,6 +2130,8 @@ static int os_numa_verify_gpu_memory_zone(struct notifier_block *nb,
return NOTIFY_OK;
}
#define ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS 4
NV_STATUS NV_API_CALL os_numa_add_gpu_memory
(
void *handle,
@@ -2143,7 +2145,12 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
nv_linux_state_t *nvl = pci_get_drvdata(handle);
nv_state_t *nv = NV_STATE_PTR(nvl);
NvU64 base = offset + nvl->coherent_link_info.gpu_mem_pa;
int ret;
int ret = 0;
NvU64 memblock_size;
NvU64 size_remaining;
NvU64 calculated_segment_size;
NvU64 segment_size;
NvU64 segment_base;
os_numa_gpu_mem_hotplug_notifier_t notifier =
{
.start_pa = base,
@@ -2176,11 +2183,49 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
goto failed;
}
//
// Adding all memory at once can take a long time. Split up memory into segments
// with schedule() in between to prevent soft lockups. Memory segments for
// add_memory_driver_managed() need to be aligned to memblock size.
//
// If there are any issues splitting into segments, then add all memory at once.
//
if (os_numa_memblock_size(&memblock_size) == NV_OK)
{
calculated_segment_size = NV_ALIGN_UP(size / ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS, memblock_size);
}
else
{
// Don't split into segments, add all memory at once
calculated_segment_size = size;
}
segment_size = calculated_segment_size;
segment_base = base;
size_remaining = size;
while ((size_remaining > 0) &&
(ret == 0))
{
if (segment_size > size_remaining)
{
segment_size = size_remaining;
}
#ifdef NV_ADD_MEMORY_DRIVER_MANAGED_HAS_MHP_FLAGS_ARG
ret = add_memory_driver_managed(node, base, size, "System RAM (NVIDIA)", MHP_NONE);
ret = add_memory_driver_managed(node, segment_base, segment_size, "System RAM (NVIDIA)", MHP_NONE);
#else
ret = add_memory_driver_managed(node, base, size, "System RAM (NVIDIA)");
ret = add_memory_driver_managed(node, segment_base, segment_size, "System RAM (NVIDIA)");
#endif
nv_printf(NV_DBG_SETUP, "NVRM: add_memory_driver_managed() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
ret, segment_base, segment_size);
segment_base += segment_size;
size_remaining -= segment_size;
// Yield CPU to prevent soft lockups
schedule();
}
unregister_memory_notifier(&notifier.memory_notifier);
if (ret == 0)
@@ -2194,14 +2239,33 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
zone_end_pfn(zone) != end_pfn)
{
nv_printf(NV_DBG_ERRORS, "NVRM: GPU memory zone movable auto onlining failed!\n");
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
if (offline_and_remove_memory(node, base, size) != 0)
#else
if (offline_and_remove_memory(base, size) != 0)
#endif
// Since zone movable auto onlining failed, need to remove the added memory.
segment_size = calculated_segment_size;
segment_base = base;
size_remaining = size;
while (size_remaining > 0)
{
nv_printf(NV_DBG_ERRORS, "NVRM: offline_and_remove_memory failed\n");
if (segment_size > size_remaining)
{
segment_size = size_remaining;
}
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
ret = offline_and_remove_memory(node, segment_base, segment_size);
#else
ret = offline_and_remove_memory(segment_base, segment_size);
#endif
nv_printf(NV_DBG_SETUP, "NVRM: offline_and_remove_memory() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
ret, segment_base, segment_size);
segment_base += segment_size;
size_remaining -= segment_size;
// Yield CPU to prevent soft lockups
schedule();
}
#endif
goto failed;
@@ -2221,6 +2285,77 @@ failed:
return NV_ERR_NOT_SUPPORTED;
}
typedef struct {
NvU64 base;
NvU64 size;
NvU32 nodeId;
int ret;
} remove_numa_memory_info_t;
static void offline_numa_memory_callback
(
void *args
)
{
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
int ret = 0;
NvU64 memblock_size;
NvU64 size_remaining;
NvU64 calculated_segment_size;
NvU64 segment_size;
NvU64 segment_base;
//
// Removing all memory at once can take a long time. Split up memory into segments
// with schedule() in between to prevent soft lockups. Memory segments for
// offline_and_remove_memory() need to be aligned to memblock size.
//
// If there are any issues splitting into segments, then remove all memory at once.
//
if (os_numa_memblock_size(&memblock_size) == NV_OK)
{
calculated_segment_size = NV_ALIGN_UP(pNumaInfo->size / ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS, memblock_size);
}
else
{
// Don't split into segments, remove all memory at once
calculated_segment_size = pNumaInfo->size;
}
segment_size = calculated_segment_size;
segment_base = pNumaInfo->base;
size_remaining = pNumaInfo->size;
while (size_remaining > 0)
{
if (segment_size > size_remaining)
{
segment_size = size_remaining;
}
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
ret = offline_and_remove_memory(pNumaInfo->nodeId,
segment_base,
segment_size);
#else
ret = offline_and_remove_memory(segment_base,
segment_size);
#endif
nv_printf(NV_DBG_SETUP, "NVRM: offline_and_remove_memory() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
ret, segment_base, segment_size);
pNumaInfo->ret |= ret;
segment_base += segment_size;
size_remaining -= segment_size;
// Yield CPU to prevent soft lockups
schedule();
}
#endif
}
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory
(
void *handle,

View File

@@ -26,6 +26,12 @@
#include "os-interface.h"
#include "nv-linux.h"
#if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT) && \
(defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS) || \
defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS))
#define NV_NUM_PIN_PAGES_PER_ITERATION 0x80000
#endif
static inline int nv_follow_pfn(struct vm_area_struct *vma,
unsigned long address,
unsigned long *pfn)
@@ -163,9 +169,15 @@ NV_STATUS NV_API_CALL os_lock_user_pages(
NV_STATUS rmStatus;
struct mm_struct *mm = current->mm;
struct page **user_pages;
NvU64 i, pinned;
NvU64 i;
NvU64 npages = page_count;
NvU64 pinned = 0;
unsigned int gup_flags = DRF_VAL(_LOCK_USER_PAGES, _FLAGS, _WRITE, flags) ? FOLL_WRITE : 0;
int ret;
long ret;
#if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT)
gup_flags |= FOLL_LONGTERM;
#endif
if (!NV_MAY_SLEEP())
{
@@ -185,16 +197,51 @@ NV_STATUS NV_API_CALL os_lock_user_pages(
nv_mmap_read_lock(mm);
ret = NV_PIN_USER_PAGES((unsigned long)address,
page_count, gup_flags, user_pages, NULL);
nv_mmap_read_unlock(mm);
pinned = ret;
if (ret < 0)
npages, gup_flags, user_pages, NULL);
if (ret > 0)
{
os_free_mem(user_pages);
return NV_ERR_INVALID_ADDRESS;
pinned = ret;
}
else if (pinned < page_count)
#if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT) && \
(defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS) || \
defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS))
//
// NV_PIN_USER_PAGES() passes in NULL for the vmas parameter (if required)
// in pin_user_pages() (or get_user_pages() if pin_user_pages() does not
// exist). For kernels which do not contain the commit 52650c8b466b
// (mm/gup: remove the vma allocation from gup_longterm_locked()), if
// FOLL_LONGTERM is passed in, this results in the kernel trying to kcalloc
// the vmas array, and since the limit for kcalloc is 4 MB, it results in
// NV_PIN_USER_PAGES() failing with ENOMEM if more than
// NV_NUM_PIN_PAGES_PER_ITERATION pages are requested on 64-bit systems.
//
// As a workaround, if we requested more than
// NV_NUM_PIN_PAGES_PER_ITERATION pages and failed with ENOMEM, try again
// with multiple calls of NV_NUM_PIN_PAGES_PER_ITERATION pages at a time.
//
else if ((ret == -ENOMEM) &&
(page_count > NV_NUM_PIN_PAGES_PER_ITERATION))
{
for (pinned = 0; pinned < page_count; pinned += ret)
{
npages = page_count - pinned;
if (npages > NV_NUM_PIN_PAGES_PER_ITERATION)
{
npages = NV_NUM_PIN_PAGES_PER_ITERATION;
}
ret = NV_PIN_USER_PAGES(((unsigned long) address) + (pinned * PAGE_SIZE),
npages, gup_flags, &user_pages[pinned], NULL);
if (ret <= 0)
{
break;
}
}
}
#endif
nv_mmap_read_unlock(mm);
if (pinned < page_count)
{
for (i = 0; i < pinned; i++)
NV_UNPIN_USER_PAGE(user_pages[i]);