570.124.04

This commit is contained in:
Bernhard Stoeckner
2025-02-27 17:32:23 +01:00
parent 81fe4fb417
commit 129479b1b7
141 changed files with 102245 additions and 100070 deletions

View File

@@ -86,7 +86,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.86.16\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.124.04\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2014-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -756,6 +756,8 @@ typedef struct UvmGpuFbInfo_tag
NvBool bStaticBar1Enabled; // Static BAR1 mode is enabled
NvU64 staticBar1StartOffset; // The start offset of the the static mapping
NvU64 staticBar1Size; // The size of the static mapping
NvU32 heapStart; // The start offset of heap in KB, helpful for MIG
// systems
} UvmGpuFbInfo;
typedef struct UvmGpuEccInfo_tag

View File

@@ -6307,6 +6307,32 @@ compile_test() {
compile_check_conftest "$CODE" "NV_NUM_REGISTERED_FB_PRESENT" "" "types"
;;
acpi_video_register_backlight)
#
# Determine if acpi_video_register_backlight() function is present
#
# acpi_video_register_backlight was added by commit 3dbc80a3e4c55c
# (ACPI: video: Make backlight class device registration a separate
# step (v2)) for v6.0 (2022-09-02).
# Note: the include directive for <linux/types> in this conftest is
# necessary in order to support kernels between commit 0b9f7d93ca61
# ("ACPI / i915: ignore firmware requests backlight change") for
# v3.16 (2014-07-07) and commit 3bd6bce369f5 ("ACPI / video: Port
# to new backlight interface selection API") for v4.2 (2015-07-16).
# Kernels within this range use the 'bool' type and the related
# 'false' value in <acpi/video.h> without first including the
# definitions of that type and value.
#
CODE="
#include <linux/types.h>
#include <acpi/video.h>
void conftest_acpi_video_register_backlight(void) {
acpi_video_register_backlight(0);
}"
compile_check_conftest "$CODE" "NV_ACPI_VIDEO_REGISTER_BACKLIGHT" "" "functions"
;;
acpi_video_backlight_use_native)
#
# Determine if acpi_video_backlight_use_native() function is present
@@ -6690,13 +6716,18 @@ compile_test() {
#
# Determine whether drm_client_setup is present.
#
# Added by commit d07fdf922592 ("drm/fbdev-ttm:
# Convert to client-setup") in v6.13.
# Added by commit d07fdf922592 ("drm/fbdev-ttm: Convert to
# client-setup") in v6.13 in drm/drm_client_setup.h, but then moved
# to drm/clients/drm_client_setup.h by commit b86711c6d6e2
# ("drm/client: Move public client header to clients/ subdirectory")
# in linux-next b86711c6d6e2.
#
CODE="
#include <drm/drm_fb_helper.h>
#if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT)
#include <drm/drm_client_setup.h>
#elif defined(NV_DRM_CLIENTS_DRM_CLIENT_SETUP_H_PRESENT)
#include <drm/clients/drm_client_setup.h>
#endif
void conftest_drm_client_setup(void) {
drm_client_setup();
@@ -7509,6 +7540,31 @@ compile_test() {
compile_check_conftest "$CODE" "NV_MODULE_IMPORT_NS_TAKES_CONSTANT" "" "generic"
;;
drm_driver_has_date)
#
# Determine if the 'drm_driver' structure has a 'date' field.
#
# Removed by commit cb2e1c2136f7 ("drm: remove driver date from
# struct drm_driver and all drivers") in linux-next, expected in
# v6.14.
#
CODE="
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
#include <drm/drm_drv.h>
#endif
int conftest_drm_driver_has_date(void) {
return offsetof(struct drm_driver, date);
}"
compile_check_conftest "$CODE" "NV_DRM_DRIVER_HAS_DATE" "" "types"
;;
# When adding a new conftest entry, please use the correct format for
# specifying the relevant upstream Linux kernel commit. Please
# avoid specifying -rc kernels, and only use SHAs that actually exist

View File

@@ -31,6 +31,7 @@ NV_HEADER_PRESENCE_TESTS = \
drm/drm_mode_config.h \
drm/drm_modeset_lock.h \
drm/drm_property.h \
drm/clients/drm_client_setup.h \
dt-bindings/interconnect/tegra_icc_id.h \
generated/autoconf.h \
generated/compile.h \

View File

@@ -65,9 +65,13 @@
#if defined(NV_DRM_CLIENT_SETUP_PRESENT) && \
(defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT) || \
defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT))
// XXX remove dependency on DRM_TTM_HELPER by implementing nvidia-drm's own
// .fbdev_probe callback that uses NVKMS kapi
#if IS_ENABLED(CONFIG_DRM_TTM_HELPER)
#define NV_DRM_FBDEV_AVAILABLE
#define NV_DRM_CLIENT_AVAILABLE
#endif
#endif
/*
* We can support color management if either drm_helper_crtc_enable_color_mgmt()

View File

@@ -78,6 +78,8 @@
#if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT)
#include <drm/drm_client_setup.h>
#elif defined(NV_DRM_CLIENTS_DRM_CLIENT_SETUP_H_PRESENT)
#include <drm/clients/drm_client_setup.h>
#endif
#if defined(NV_DRM_DRM_FBDEV_TTM_H_PRESENT)
@@ -1915,14 +1917,18 @@ static struct drm_driver nv_drm_driver = {
.name = "nvidia-drm",
.desc = "NVIDIA DRM driver",
#if defined(NV_DRM_DRIVER_HAS_DATE)
.date = "20160202",
#endif
#if defined(NV_DRM_DRIVER_HAS_DEVICE_LIST)
.device_list = LIST_HEAD_INIT(nv_drm_driver.device_list),
#elif defined(NV_DRM_DRIVER_HAS_LEGACY_DEV_LIST)
.legacy_dev_list = LIST_HEAD_INIT(nv_drm_driver.legacy_dev_list),
#endif
#if defined(DRM_FBDEV_TTM_DRIVER_OPS)
// XXX implement nvidia-drm's own .fbdev_probe callback that uses NVKMS kapi directly
#if defined(NV_DRM_FBDEV_AVAILABLE) && defined(DRM_FBDEV_TTM_DRIVER_OPS)
DRM_FBDEV_TTM_DRIVER_OPS,
#endif
};

View File

@@ -143,4 +143,5 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_lut
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_property_blob_put
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_mmap
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present

View File

@@ -1050,6 +1050,11 @@ nvkms_register_backlight(NvU32 gpu_id, NvU32 display_id, void *drv_priv,
#if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE)
if (!acpi_video_backlight_use_native()) {
#if defined(NV_ACPI_VIDEO_REGISTER_BACKLIGHT)
nvkms_log(NVKMS_LOG_LEVEL_INFO, NVKMS_LOG_PREFIX,
"ACPI reported no NVIDIA native backlight available; attempting to use ACPI backlight.");
acpi_video_register_backlight();
#endif
return NULL;
}
#endif

View File

@@ -102,4 +102,5 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kernel_read_has_pointer_pos_arg

View File

@@ -29,6 +29,7 @@
#include <linux/nodemask.h>
#include <linux/mempolicy.h>
#include <linux/mmu_notifier.h>
#include <linux/topology.h>
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
#include <linux/hmm.h>
@@ -291,6 +292,27 @@ static const struct mmu_interval_notifier_ops uvm_ats_notifier_ops =
#endif
static bool resident_policy_match(struct vm_area_struct *vma, int dst_nid, int src_nid)
{
#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
struct mempolicy *vma_policy = vma_policy(vma);
// TODO: Bug 4981209: When migrations between CPU numa nodes are supported,
// add (dst_nid != closest_cpu_numa_node) to allow migrations between CPU
// NUMA nodes when destination is the closest_cpu_numa_node.
if (vma_policy &&
node_isset(src_nid, vma_policy->nodes) &&
node_isset(dst_nid, vma_policy->nodes) &&
!cpumask_empty(cpumask_of_node(src_nid)) &&
!cpumask_empty(cpumask_of_node(dst_nid))) {
return true;
}
#endif
return false;
}
static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
@@ -370,9 +392,23 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
if (pfn & HMM_PFN_VALID) {
struct page *page = hmm_pfn_to_page(pfn);
int resident_node = page_to_nid(page);
if (page_to_nid(page) == ats_context->residency_node)
// Set the residency_mask if:
// - The page is already resident at the intended destination.
// or
// - If both the source and destination nodes are CPU nodes and
// source node is already in the list of preferred nodes for
// the vma. On multi-CPU NUMA node architectures, this avoids
// unnecessary migrations between CPU nodes. Since the
// specific ats_context->residency_node selected by
// ats_batch_select_residency() is just a guess among the list
// of preferred nodes, paying the cost of migration across the
// CPU preferred nodes in this case can't be justified.
if ((resident_node == ats_context->residency_node) ||
resident_policy_match(vma, ats_context->residency_node, resident_node)) {
uvm_page_mask_set(residency_mask, page_index);
}
ats_context->prefetch_state.first_touch = false;
}

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -35,6 +35,7 @@
#include "uvm_mmu.h"
#include "uvm_perf_heuristics.h"
#include "uvm_pmm_sysmem.h"
#include "uvm_pmm_gpu.h"
#include "uvm_migrate.h"
#include "uvm_gpu_access_counters.h"
#include "uvm_va_space_mm.h"
@@ -90,6 +91,8 @@ NV_STATUS uvm_global_init(void)
uvm_spin_lock_irqsave_init(&g_uvm_global.gpu_table_lock, UVM_LOCK_ORDER_LEAF);
uvm_mutex_init(&g_uvm_global.va_spaces.lock, UVM_LOCK_ORDER_VA_SPACES_LIST);
INIT_LIST_HEAD(&g_uvm_global.va_spaces.list);
uvm_mutex_init(&g_uvm_global.devmem_ranges.lock, UVM_LOCK_ORDER_LEAF);
INIT_LIST_HEAD(&g_uvm_global.devmem_ranges.list);
status = uvm_kvmalloc_init();
if (status != NV_OK) {
@@ -231,6 +234,7 @@ void uvm_global_exit(void)
uvm_va_policy_exit();
uvm_mem_global_exit();
uvm_pmm_sysmem_exit();
uvm_pmm_devmem_exit();
uvm_gpu_exit();
uvm_processor_mask_cache_exit();

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -157,6 +157,12 @@ struct uvm_global_struct
// This field is set once during global initialization (uvm_global_init),
// and can be read afterwards without acquiring any locks.
bool conf_computing_enabled;
// List of all devmem ranges allocted on this GPU
struct {
uvm_mutex_t lock;
struct list_head list;
} devmem_ranges;
};
// Initialize global uvm state

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -109,8 +109,10 @@ static void fill_parent_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo
// nvswitch is routed via physical pages, where the upper 13-bits of the
// 47-bit address space holds the routing information for each peer.
// Currently, this is limited to a 16GB framebuffer window size.
if (parent_gpu->nvswitch_info.is_nvswitch_connected)
if (parent_gpu->nvswitch_info.is_nvswitch_connected) {
parent_gpu->nvswitch_info.fabric_memory_window_start = gpu_info->nvswitchMemoryWindowStart;
parent_gpu->nvswitch_info.egm_fabric_memory_window_start = gpu_info->nvswitchEgmMemoryWindowStart;
}
uvm_uuid_string(uuid_buffer, &parent_gpu->uuid);
snprintf(parent_gpu->name,
@@ -244,6 +246,7 @@ static NV_STATUS get_gpu_fb_info(uvm_gpu_t *gpu)
if (!fb_info.bZeroFb) {
gpu->mem_info.size = ((NvU64)fb_info.heapSize + fb_info.reservedHeapSize) * 1024;
gpu->mem_info.max_allocatable_address = fb_info.maxAllocatableAddress;
gpu->mem_info.phys_start = (NvU64)fb_info.heapStart * 1024;
}
gpu->mem_info.max_vidmem_page_size = fb_info.maxVidmemPageSize;
@@ -568,6 +571,9 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
UVM_SEQ_OR_DBG_PRINT(s, "big_page_size %u\n", gpu->big_page.internal_size);
UVM_SEQ_OR_DBG_PRINT(s, "rm_va_base 0x%llx\n", gpu->parent->rm_va_base);
UVM_SEQ_OR_DBG_PRINT(s, "rm_va_size 0x%llx\n", gpu->parent->rm_va_size);
UVM_SEQ_OR_DBG_PRINT(s, "vidmem_start %llu (%llu MBs)\n",
gpu->mem_info.phys_start,
gpu->mem_info.phys_start / (1024 * 1024));
UVM_SEQ_OR_DBG_PRINT(s, "vidmem_size %llu (%llu MBs)\n",
gpu->mem_info.size,
gpu->mem_info.size / (1024 * 1024));
@@ -1361,6 +1367,7 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
const UvmGpuPlatformInfo *gpu_platform_info)
{
NV_STATUS status;
UvmGpuFbInfo fb_info = {0};
status = uvm_rm_locked_call(nvUvmInterfaceDeviceCreate(uvm_global_session_handle(),
gpu_info,
@@ -1384,8 +1391,15 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
parent_gpu->egm.local_peer_id = gpu_info->egmPeerId;
parent_gpu->egm.base_address = gpu_info->egmBaseAddr;
status = uvm_rm_locked_call(nvUvmInterfaceGetFbInfo(parent_gpu->rm_device, &fb_info));
if (status != NV_OK)
return status;
parent_gpu->sli_enabled = (gpu_info->subdeviceCount > 1);
if (!fb_info.bZeroFb)
parent_gpu->max_allocatable_address = fb_info.maxAllocatableAddress;
parent_gpu->virt_mode = gpu_info->virtMode;
if (parent_gpu->virt_mode == UVM_VIRT_MODE_LEGACY) {
UVM_ERR_PRINT("Failed to init GPU %s. UVM is not supported in legacy virtualization mode\n",
@@ -1419,6 +1433,14 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
uvm_mmu_init_gpu_chunk_sizes(parent_gpu);
status = uvm_pmm_devmem_init(parent_gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("failed to intialize device private memory: %s, GPU %s\n",
nvstatusToString(status),
uvm_parent_gpu_name(parent_gpu));
return status;
}
status = uvm_ats_add_gpu(parent_gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_ats_add_gpu failed: %s, GPU %s\n",
@@ -1667,6 +1689,7 @@ static void deinit_parent_gpu(uvm_parent_gpu_t *parent_gpu)
deinit_parent_procfs_files(parent_gpu);
uvm_pmm_devmem_deinit(parent_gpu);
uvm_ats_remove_gpu(parent_gpu);
UVM_ASSERT(atomic64_read(&parent_gpu->mapped_cpu_pages_size) == 0);

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -696,6 +696,11 @@ struct uvm_gpu_struct
// ZeroFB testing mode, this will be 0.
NvU64 size;
// Physical start of heap, for SMC enabled GPUs, this is useful to
// partition PMM, it is used by HMM to figure out the right translation
// between HMM ranges and PMM offsets.
NvU64 phys_start;
// Max (inclusive) physical address of this GPU's memory that the driver
// can allocate through PMM (PMA).
NvU64 max_allocatable_address;
@@ -1015,6 +1020,13 @@ struct uvm_parent_gpu_struct
// Do not read this field directly, use uvm_gpu_device_handle instead.
uvmGpuDeviceHandle rm_device;
// Total amount of physical memory available on the parent GPU.
NvU64 max_allocatable_address;
#if UVM_IS_CONFIG_HMM()
uvm_pmm_gpu_devmem_t *devmem;
#endif
// The physical address range addressable by the GPU
//
// The GPU has its NV_PFB_XV_UPPER_ADDR register set by RM to
@@ -1288,6 +1300,10 @@ struct uvm_parent_gpu_struct
// 47-bit fabric memory physical offset that peer gpus need to access
// to read a peer's memory
NvU64 fabric_memory_window_start;
// 47-bit fabric memory physical offset that peer gpus need to access
// to read remote EGM memory.
NvU64 egm_fabric_memory_window_start;
} nvswitch_info;
struct

View File

@@ -321,13 +321,17 @@ void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_
{
uvm_range_tree_node_t *node;
uvm_va_block_t *va_block;
struct range range = gpu->pmm.devmem.pagemap.range;
unsigned long devmem_start;
unsigned long devmem_end;
unsigned long pfn;
bool retry;
if (!uvm_hmm_is_enabled(va_space))
return;
devmem_start = gpu->parent->devmem->pagemap.range.start + gpu->mem_info.phys_start;
devmem_end = devmem_start + gpu->mem_info.size;
if (mm)
uvm_assert_mmap_lock_locked(mm);
uvm_assert_rwsem_locked_write(&va_space->lock);
@@ -341,7 +345,7 @@ void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_
do {
retry = false;
for (pfn = __phys_to_pfn(range.start); pfn <= __phys_to_pfn(range.end); pfn++) {
for (pfn = __phys_to_pfn(devmem_start); pfn <= __phys_to_pfn(devmem_end); pfn++) {
struct page *page = pfn_to_page(pfn);
UVM_ASSERT(is_device_private_page(page));
@@ -349,7 +353,7 @@ void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_
// This check is racy because nothing stops the page being freed and
// even reused. That doesn't matter though - worst case the
// migration fails, we retry and find the va_space doesn't match.
if (page->zone_device_data == va_space)
if (uvm_pmm_devmem_page_to_va_space(page) == va_space)
if (uvm_hmm_pmm_gpu_evict_pfn(pfn) != NV_OK)
retry = true;
}
@@ -1713,7 +1717,7 @@ static void gpu_chunk_remove(uvm_va_block_t *va_block,
uvm_gpu_chunk_t *gpu_chunk;
uvm_gpu_id_t id;
id = uvm_pmm_devmem_page_to_gpu_id(page);
id = uvm_gpu_chunk_get_gpu(uvm_pmm_devmem_page_to_chunk(page))->id;
gpu_state = uvm_va_block_gpu_state_get(va_block, id);
UVM_ASSERT(gpu_state);
@@ -1743,7 +1747,7 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
uvm_gpu_id_t id;
NV_STATUS status;
id = uvm_pmm_devmem_page_to_gpu_id(page);
id = uvm_gpu_chunk_get_gpu(uvm_pmm_devmem_page_to_chunk(page))->id;
gpu_state = uvm_va_block_gpu_state_get(va_block, id);
// It's possible that this is a fresh va_block we're trying to add an
@@ -1765,7 +1769,7 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
gpu_chunk = uvm_pmm_devmem_page_to_chunk(page);
UVM_ASSERT(gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
UVM_ASSERT(gpu_chunk->is_referenced);
UVM_ASSERT(page->zone_device_data == va_block->hmm.va_space);
UVM_ASSERT(uvm_pmm_devmem_page_to_va_space(page) == va_block->hmm.va_space);
if (gpu_state->chunks[page_index] == gpu_chunk)
return NV_OK;
@@ -1992,7 +1996,7 @@ static void fill_dst_pfn(uvm_va_block_t *va_block,
hmm_mark_gpu_chunk_referenced(va_block, gpu, gpu_chunk);
UVM_ASSERT(!page_count(dpage));
zone_device_page_init(dpage);
dpage->zone_device_data = va_block->hmm.va_space;
dpage->zone_device_data = gpu_chunk;
dst_pfns[page_index] = migrate_pfn(pfn);
}

View File

@@ -130,27 +130,12 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
NV_STATUS status = NV_OK;
NV_STATUS tracker_status;
uvm_prot_t prot = UVM_PROT_READ_WRITE_ATOMIC;
// Get the mask of unmapped pages because it will change after the
// first map operation
uvm_va_block_unmapped_pages_get(va_block, region, &va_block_context->caller_page_mask);
if (uvm_va_block_is_hmm(va_block) && !UVM_ID_IS_CPU(dest_id)) {
// Do not map pages that are already resident on the CPU. This is in
// order to avoid breaking system-wide atomic operations on HMM. HMM's
// implementation of system-side atomic operations involves restricting
// mappings to one processor (CPU or a GPU) at a time. If we were to
// grant a GPU a mapping to system memory, this gets into trouble
// because, on the CPU side, Linux can silently upgrade PTE permissions
// (move from read-only, to read-write, without any MMU notifiers
// firing), thus breaking the model by allowing simultaneous read-write
// access from two separate processors. To avoid that, just don't map
// such pages at all, when migrating.
uvm_page_mask_andnot(&va_block_context->caller_page_mask,
&va_block_context->caller_page_mask,
uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE));
}
// Only map those pages that are not mapped anywhere else (likely due
// to a first touch or a migration). We pass
// UvmEventMapRemoteCauseInvalid since the destination processor of a
@@ -166,6 +151,31 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
if (status != NV_OK)
goto out;
if (uvm_va_block_is_hmm(va_block) && UVM_ID_IS_CPU(dest_id)) {
uvm_processor_id_t id;
// Do not atomically map pages that are resident on the CPU. This is in
// order to avoid breaking system-wide atomic operations on HMM. HMM's
// implementation of system-side atomic operations involves restricting
// mappings to one processor (CPU or a GPU) at a time. If we were to
// grant a GPU a mapping to system memory, this gets into trouble
// because, on the CPU side, Linux can silently upgrade PTE permissions
// (move from read-only, to read-write, without any MMU notifiers
// firing), thus breaking the model by allowing simultaneous read-write
// access from two separate processors. To avoid that, don't remote map
// such pages atomically, after migrating.
// Also note that HMM sets CPU mapping for resident pages so the mask
// of pages to be mapped needs to be recomputed without including the
// CPU mapping.
prot = UVM_PROT_READ_WRITE;
uvm_page_mask_region_fill(&va_block_context->caller_page_mask, region);
for_each_gpu_id_in_mask(id, &va_block->mapped) {
uvm_page_mask_andnot(&va_block_context->caller_page_mask,
&va_block_context->caller_page_mask,
uvm_va_block_map_mask_get(va_block, id));
}
}
// Add mappings for AccessedBy processors
//
// No mappings within this call will operate on dest_id, so we don't
@@ -176,7 +186,7 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
dest_id,
region,
&va_block_context->caller_page_mask,
UVM_PROT_READ_WRITE_ATOMIC,
prot,
NULL);
out:

View File

@@ -1409,11 +1409,13 @@ static bool thrashing_processors_have_fast_access_to(uvm_va_space_t *va_space,
uvm_processor_mask_set(fast_to, to);
}
else {
// Include registered SMC peers and the processor 'to'.
// Include all SMC peers and the processor 'to'.
// This includes SMC peers that are not registered.
// Since not-registered peers cannot be in page_thrashing->processors,
// the value of their respective bits in "fast_to" doesn't matter.
uvm_processor_mask_range_fill(fast_to,
uvm_gpu_id_from_sub_processor(uvm_parent_gpu_id_from_gpu_id(to), 0),
UVM_PARENT_ID_MAX_SUB_PROCESSORS);
uvm_processor_mask_and(fast_to, fast_to, &va_space->registered_gpu_va_spaces);
}
return uvm_processor_mask_subset(&page_thrashing->processors, fast_to);

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@@ -3030,69 +3030,23 @@ NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region
#if UVM_IS_CONFIG_HMM()
static uvm_pmm_gpu_t *devmem_page_to_pmm(struct page *page)
{
return container_of(page->pgmap, uvm_pmm_gpu_t, devmem.pagemap);
}
static uvm_gpu_chunk_t *devmem_page_to_chunk_locked(struct page *page)
{
uvm_pmm_gpu_t *pmm = devmem_page_to_pmm(page);
NvU64 chunk_addr = ((NvU64)page_to_pfn(page) << PAGE_SHIFT) - pmm->devmem.pagemap.range.start;
size_t index = chunk_addr / UVM_CHUNK_SIZE_MAX;
uvm_gpu_chunk_t *root_chunk;
uvm_gpu_chunk_t *chunk;
uvm_gpu_chunk_t *parent;
uvm_chunk_size_t chunk_size;
UVM_ASSERT(index < pmm->root_chunks.count);
root_chunk = &pmm->root_chunks.array[index].chunk;
UVM_ASSERT(root_chunk->address == UVM_ALIGN_DOWN(chunk_addr, UVM_CHUNK_SIZE_MAX));
// Find the uvm_gpu_chunk_t that corresponds to the device private struct
// page's PFN. The loop is only 0, 1, or 2 iterations.
for (chunk = root_chunk;
uvm_gpu_chunk_get_size(chunk) != page_size(page);
chunk = parent->suballoc->subchunks[index]) {
parent = chunk;
UVM_ASSERT(parent->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT);
UVM_ASSERT(parent->suballoc);
chunk_size = uvm_gpu_chunk_get_size(parent->suballoc->subchunks[0]);
index = (size_t)uvm_div_pow2_64(chunk_addr - parent->address, chunk_size);
UVM_ASSERT(index < num_subchunks(parent));
}
UVM_ASSERT(chunk->address = chunk_addr);
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
UVM_ASSERT(chunk->is_referenced);
return chunk;
}
uvm_gpu_chunk_t *uvm_pmm_devmem_page_to_chunk(struct page *page)
{
uvm_pmm_gpu_t *pmm = devmem_page_to_pmm(page);
uvm_gpu_chunk_t *chunk;
UVM_ASSERT(is_device_private_page(page));
uvm_spin_lock(&pmm->list_lock);
chunk = devmem_page_to_chunk_locked(page);
uvm_spin_unlock(&pmm->list_lock);
return chunk;
return page->zone_device_data;
}
uvm_gpu_id_t uvm_pmm_devmem_page_to_gpu_id(struct page *page)
uvm_va_space_t *uvm_pmm_devmem_page_to_va_space(struct page *page)
{
uvm_pmm_gpu_t *pmm = devmem_page_to_pmm(page);
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
uvm_gpu_chunk_t *gpu_chunk = uvm_pmm_devmem_page_to_chunk(page);
UVM_ASSERT(is_device_private_page(page));
// uvm_hmm_unregister_gpu() needs to do a racy check here so
// page->zone_device_data might be NULL.
if (!gpu_chunk || !gpu_chunk->va_block)
return NULL;
return gpu->id;
return gpu_chunk->va_block->hmm.va_space;
}
// Check there are no orphan pages. This should be only called as part of
@@ -3104,12 +3058,17 @@ static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
{
size_t i;
bool ret = true;
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
unsigned long devmem_start;
unsigned long devmem_end;
unsigned long pfn;
struct range range = pmm->devmem.pagemap.range;
if (!pmm->initialized || !uvm_hmm_is_enabled_system_wide())
return ret;
devmem_start = gpu->parent->devmem->pagemap.range.start + gpu->mem_info.phys_start;
devmem_end = devmem_start + gpu->mem_info.size;
// Scan all the root chunks looking for subchunks which are still
// referenced.
for (i = 0; i < pmm->root_chunks.count; i++) {
@@ -3121,7 +3080,7 @@ static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
root_chunk_unlock(pmm, root_chunk);
}
for (pfn = __phys_to_pfn(range.start); pfn <= __phys_to_pfn(range.end); pfn++) {
for (pfn = __phys_to_pfn(devmem_start); pfn <= __phys_to_pfn(devmem_end); pfn++) {
struct page *page = pfn_to_page(pfn);
if (!is_device_private_page(page)) {
@@ -3140,9 +3099,8 @@ static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
static void devmem_page_free(struct page *page)
{
uvm_pmm_gpu_t *pmm = devmem_page_to_pmm(page);
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
uvm_gpu_chunk_t *chunk;
uvm_gpu_chunk_t *chunk = uvm_pmm_devmem_page_to_chunk(page);
uvm_gpu_t *gpu = uvm_gpu_chunk_get_gpu(chunk);
page->zone_device_data = NULL;
@@ -3150,23 +3108,22 @@ static void devmem_page_free(struct page *page)
// we may be in an interrupt context where we can't do that. Instead,
// do a lazy free. Note that we have to use a "normal" spin lock because
// the UVM context is not available.
spin_lock(&pmm->list_lock.lock);
spin_lock(&gpu->pmm.list_lock.lock);
chunk = devmem_page_to_chunk_locked(page);
UVM_ASSERT(chunk->is_referenced);
chunk->is_referenced = false;
list_add_tail(&chunk->list, &pmm->root_chunks.va_block_lazy_free);
list_add_tail(&chunk->list, &gpu->pmm.root_chunks.va_block_lazy_free);
spin_unlock(&pmm->list_lock.lock);
spin_unlock(&gpu->pmm.list_lock.lock);
nv_kthread_q_schedule_q_item(&gpu->parent->lazy_free_q,
&pmm->root_chunks.va_block_lazy_free_q_item);
&gpu->pmm.root_chunks.va_block_lazy_free_q_item);
}
// This is called by HMM when the CPU faults on a ZONE_DEVICE private entry.
static vm_fault_t devmem_fault(struct vm_fault *vmf)
{
uvm_va_space_t *va_space = vmf->page->zone_device_data;
uvm_va_space_t *va_space = uvm_pmm_devmem_page_to_va_space(vmf->page);
if (!va_space)
return VM_FAULT_SIGBUS;
@@ -3185,26 +3142,46 @@ static const struct dev_pagemap_ops uvm_pmm_devmem_ops =
.migrate_to_ram = devmem_fault_entry,
};
static NV_STATUS devmem_init(uvm_pmm_gpu_t *pmm)
// Allocating and initialising device private pages takes a significant amount
// of time on very large systems. So rather than do that everytime a GPU is
// registered we do it once and keep track of the range when the GPU is
// unregistered for later reuse.
//
// This function tries to find an exsiting range of device private pages and if
// available allocates and returns it for reuse.
static uvm_pmm_gpu_devmem_t *devmem_reuse_pagemap(unsigned long size)
{
unsigned long size = pmm->root_chunks.count * UVM_CHUNK_SIZE_MAX;
uvm_pmm_gpu_devmem_t *devmem = &pmm->devmem;
uvm_pmm_gpu_devmem_t *devmem;
list_for_each_entry(devmem, &g_uvm_global.devmem_ranges.list, list_node) {
if (devmem->size == size) {
list_del(&devmem->list_node);
return devmem;
}
}
return NULL;
}
static uvm_pmm_gpu_devmem_t *devmem_alloc_pagemap(unsigned long size)
{
uvm_pmm_gpu_devmem_t *devmem;
struct resource *res;
void *ptr;
NV_STATUS status;
if (!uvm_hmm_is_enabled_system_wide()) {
devmem->pagemap.owner = NULL;
return NV_OK;
}
res = request_free_mem_region(&iomem_resource, size, "nvidia-uvm-hmm");
if (IS_ERR(res)) {
UVM_ERR_PRINT("request_free_mem_region() err %ld\n", PTR_ERR(res));
status = errno_to_nv_status(PTR_ERR(res));
goto err;
return NULL;
}
devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
if (!devmem)
goto err;
devmem->size = size;
devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
devmem->pagemap.range.start = res->start;
devmem->pagemap.range.end = res->end;
@@ -3217,43 +3194,77 @@ static NV_STATUS devmem_init(uvm_pmm_gpu_t *pmm)
if (IS_ERR(ptr)) {
UVM_ERR_PRINT("memremap_pages() err %ld\n", PTR_ERR(ptr));
status = errno_to_nv_status(PTR_ERR(ptr));
goto err_release;
goto err_free;
}
return NV_OK;
return devmem;
err_free:
kfree(devmem);
err_release:
release_mem_region(res->start, resource_size(res));
err:
devmem->pagemap.owner = NULL;
return status;
release_mem_region(res->start, resource_size(res));
return NULL;
}
static void devmem_deinit(uvm_pmm_gpu_t *pmm)
NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu)
{
uvm_pmm_gpu_devmem_t *devmem = &pmm->devmem;
// Create a DEVICE_PRIVATE page for every GPU page available on the parent.
unsigned long size = gpu->max_allocatable_address;
if (!devmem->pagemap.owner)
if (!uvm_hmm_is_enabled_system_wide()) {
gpu->devmem = NULL;
return NV_OK;
}
gpu->devmem = devmem_reuse_pagemap(size);
if (!gpu->devmem)
gpu->devmem = devmem_alloc_pagemap(size);
if (!gpu->devmem)
return NV_ERR_NO_MEMORY;
return NV_OK;
}
void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *gpu)
{
if (!gpu->devmem)
return;
memunmap_pages(&devmem->pagemap);
release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range));
list_add_tail(&gpu->devmem->list_node, &g_uvm_global.devmem_ranges.list);
gpu->devmem = NULL;
}
void uvm_pmm_devmem_exit(void)
{
uvm_pmm_gpu_devmem_t *devmem, *devmem_next;
list_for_each_entry_safe(devmem, devmem_next, &g_uvm_global.devmem_ranges.list, list_node) {
list_del(&devmem->list_node);
memunmap_pages(&devmem->pagemap);
release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range));
kfree(devmem);
}
}
unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
{
return (pmm->devmem.pagemap.range.start + chunk->address) >> PAGE_SHIFT;
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
unsigned long devmem_start = gpu->parent->devmem->pagemap.range.start;
return (devmem_start + chunk->address) >> PAGE_SHIFT;
}
#endif // UVM_IS_CONFIG_HMM()
#if !UVM_IS_CONFIG_HMM()
static NV_STATUS devmem_init(uvm_pmm_gpu_t *pmm)
NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu)
{
return NV_OK;
}
static void devmem_deinit(uvm_pmm_gpu_t *pmm)
void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *gpu)
{
}
@@ -3469,10 +3480,6 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
}
}
status = devmem_init(pmm);
if (status != NV_OK)
goto cleanup;
return NV_OK;
cleanup:
uvm_pmm_gpu_deinit(pmm);
@@ -3543,8 +3550,6 @@ void uvm_pmm_gpu_deinit(uvm_pmm_gpu_t *pmm)
deinit_caches(pmm);
devmem_deinit(pmm);
pmm->initialized = false;
}

View File

@@ -192,22 +192,41 @@ typedef struct uvm_pmm_gpu_chunk_suballoc_struct uvm_pmm_gpu_chunk_suballoc_t;
#if UVM_IS_CONFIG_HMM()
typedef struct uvm_pmm_gpu_struct uvm_pmm_gpu_t;
typedef struct
{
// For g_uvm_global.devmem_ranges
struct list_head list_node;
// Size that was requested when created this region. This may be less than
// the size actually allocated by the kernel due to alignment contraints.
// Figuring out the required alignment at compile time is difficult due to
// unexported macros, so just use the requested size as the search key.
unsigned long size;
struct dev_pagemap pagemap;
} uvm_pmm_gpu_devmem_t;
typedef struct uvm_pmm_gpu_struct uvm_pmm_gpu_t;
// Return the GPU chunk for a given device private struct page.
uvm_gpu_chunk_t *uvm_pmm_devmem_page_to_chunk(struct page *page);
// Return the va_space for a given device private struct page.
uvm_va_space_t *uvm_pmm_devmem_page_to_va_space(struct page *page);
// Return the GPU id for a given device private struct page.
uvm_gpu_id_t uvm_pmm_devmem_page_to_gpu_id(struct page *page);
// Return the PFN of the device private struct page for the given GPU chunk.
unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
// Free unused ZONE_DEVICE pages.
void uvm_pmm_devmem_exit(void);
#else
static inline void uvm_pmm_devmem_exit(void)
{
}
#endif
#if defined(CONFIG_PCI_P2PDMA) && defined(NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA)
@@ -349,10 +368,6 @@ typedef struct uvm_pmm_gpu_struct
nv_kthread_q_item_t va_block_lazy_free_q_item;
} root_chunks;
#if UVM_IS_CONFIG_HMM()
uvm_pmm_gpu_devmem_t devmem;
#endif
// Lock protecting PMA allocation, freeing and eviction
uvm_rw_semaphore_t pma_lock;
@@ -604,6 +619,10 @@ static uvm_chunk_size_t uvm_chunk_find_prev_size(uvm_chunk_sizes_mask_t chunk_si
// retained, and it's up to the caller to release them.
NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region_size, uvm_reverse_map_t *out_mappings);
// Allocate and initialise struct page data in the kernel to support HMM.
NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu);
void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *parent_gpu);
// Iterates over every size in the input mask from smallest to largest
#define for_each_chunk_size(__size, __chunk_sizes) \
for ((__size) = (__chunk_sizes) ? uvm_chunk_find_first_size(__chunk_sizes) : \

View File

@@ -2839,10 +2839,14 @@ static bool block_check_egm_peer(uvm_va_space_t *va_space, uvm_gpu_t *gpu, int n
remote_node_info = uvm_va_space_get_egm_numa_node_info(va_space, nid);
UVM_ASSERT(!uvm_parent_processor_mask_empty(&remote_node_info->parent_gpus));
for_each_parent_gpu_in_mask(parent_gpu, &remote_node_info->parent_gpus) {
UVM_ASSERT(parent_gpu->egm.enabled);
NvU64 page_addr = phys_addr.address;
if (phys_addr.address + parent_gpu->egm.base_address >= remote_node_info->node_start &&
phys_addr.address + parent_gpu->egm.base_address < remote_node_info->node_end &&
UVM_ASSERT(parent_gpu->egm.enabled);
page_addr += parent_gpu->egm.base_address;
if (parent_gpu->nvswitch_info.is_nvswitch_connected && gpu->parent != parent_gpu)
page_addr -= parent_gpu->nvswitch_info.egm_fabric_memory_window_start;
if (page_addr >= remote_node_info->node_start && page_addr < remote_node_info->node_end &&
remote_node_info->routing_table[uvm_parent_id_gpu_index(gpu->parent->id)] == parent_gpu) {
return true;
}
@@ -3229,8 +3233,15 @@ static uvm_gpu_phys_address_t block_phys_page_address(uvm_va_block_t *block,
if (routing_gpu) {
struct page *page = uvm_cpu_chunk_get_cpu_page(block, chunk, block_page.page_index);
phys_addr = page_to_phys(page);
aperture = uvm_gpu_egm_peer_aperture(gpu->parent, routing_gpu);
// Remote EGM routing is based on both the EGM base address and EGM
// fabric memory window.
if (routing_gpu->nvswitch_info.is_nvswitch_connected && routing_gpu != gpu->parent)
phys_addr += routing_gpu->nvswitch_info.egm_fabric_memory_window_start;
uvm_page_mask_set(&accessing_gpu_state->egm_pages, block_page.page_index);
return uvm_gpu_phys_address(aperture, phys_addr - routing_gpu->egm.base_address);
}
@@ -13575,6 +13586,9 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
struct page *page = block_page_get(block, block_page);
phys_addr = page_to_phys(page) - egm_routing_gpu->egm.base_address;
if (egm_routing_gpu->nvswitch_info.is_nvswitch_connected && egm_routing_gpu != gpu->parent)
phys_addr += egm_routing_gpu->nvswitch_info.egm_fabric_memory_window_start;
params->is_egm_mapping[count] = true;
}
}

View File

@@ -612,6 +612,42 @@ nv_dma_buf_unmap_pfns(
}
}
static NvU32
nv_dma_buf_get_sg_count (
struct device *dev,
nv_dma_buf_file_private_t *priv,
NvU32 *max_seg_size
)
{
NvU32 dma_max_seg_size, i;
NvU32 nents = 0;
dma_max_seg_size = NV_ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
if (dma_max_seg_size < PAGE_SIZE)
{
return 0;
}
// Calculate nents needed to allocate sg_table
for (i = 0; i < priv->num_objects; i++)
{
NvU32 range_count = priv->handles[i].memArea.numRanges;
NvU32 index;
for (index = 0; index < range_count; index++)
{
NvU64 length = priv->handles[i].memArea.pRanges[index].size;
NvU64 count = length + dma_max_seg_size - 1;
do_div(count, dma_max_seg_size);
nents += count;
}
}
*max_seg_size = dma_max_seg_size;
return nents;
}
static struct sg_table*
nv_dma_buf_map_pages (
struct device *dev,
@@ -620,15 +656,11 @@ nv_dma_buf_map_pages (
{
struct sg_table *sgt = NULL;
struct scatterlist *sg;
NvU32 nents = 0;
NvU32 i;
NvU32 dma_max_seg_size = 0;
NvU32 i, nents;
int rc;
// Calculate nents needed to allocate sg_table
for (i = 0; i < priv->num_objects; i++)
{
nents += priv->handles[i].memArea.numRanges;
}
nents = nv_dma_buf_get_sg_count(dev, priv, &dma_max_seg_size);
NV_KZALLOC(sgt, sizeof(struct sg_table));
if (sgt == NULL)
@@ -650,20 +682,30 @@ nv_dma_buf_map_pages (
NvU32 index = 0;
for (index = 0; index < range_count; index++)
{
NvU64 addr = priv->handles[i].memArea.pRanges[index].start;
NvU64 len = priv->handles[i].memArea.pRanges[index].size;
struct page *page = NV_GET_PAGE_STRUCT(addr);
NvU64 dma_addr = priv->handles[i].memArea.pRanges[index].start;
NvU64 dma_len = priv->handles[i].memArea.pRanges[index].size;
if ((page == NULL) || (sg == NULL))
// Split each range into dma_max_seg_size chunks
while(dma_len != 0)
{
goto free_table;
}
NvU32 sg_len = NV_MIN(dma_len, dma_max_seg_size);
struct page *page = NV_GET_PAGE_STRUCT(dma_addr);
sg_set_page(sg, page, len, NV_GET_OFFSET_IN_PAGE(addr));
sg = sg_next(sg);
if ((page == NULL) || (sg == NULL))
{
goto free_table;
}
sg_set_page(sg, page, sg_len, NV_GET_OFFSET_IN_PAGE(dma_addr));
dma_addr += sg_len;
dma_len -= sg_len;
sg = sg_next(sg);
}
}
}
WARN_ON(sg != NULL);
// DMA map the sg_table
rc = dma_map_sg(dev, sgt->sgl, sgt->orig_nents, DMA_BIDIRECTIONAL);
if (rc <= 0)
@@ -693,36 +735,16 @@ nv_dma_buf_map_pfns (
struct sg_table *sgt = NULL;
struct scatterlist *sg;
nv_dma_device_t peer_dma_dev = {{ 0 }};
NvU32 dma_max_seg_size;
NvU32 nents = 0;
NvU32 dma_max_seg_size = 0;
NvU32 mapped_nents = 0;
NvU32 i = 0;
NvU32 nents;
int rc = 0;
peer_dma_dev.dev = dev;
peer_dma_dev.addressable_range.limit = (NvU64)dev->dma_mask;
dma_max_seg_size = NV_ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
if (dma_max_seg_size < PAGE_SIZE)
{
return NULL;
}
// Calculate nents needed to allocate sg_table
for (i = 0; i < priv->num_objects; i++)
{
NvU32 range_count = priv->handles[i].memArea.numRanges;
NvU32 index;
for (index = 0; index < range_count; index++)
{
NvU64 length = priv->handles[i].memArea.pRanges[index].size;
NvU64 count = length + dma_max_seg_size - 1;
do_div(count, dma_max_seg_size);
nents += count;
}
}
nents = nv_dma_buf_get_sg_count(dev, priv, &dma_max_seg_size);
NV_KZALLOC(sgt, sizeof(struct sg_table));
if (sgt == NULL)
@@ -777,6 +799,9 @@ nv_dma_buf_map_pfns (
}
}
}
WARN_ON(sg != NULL);
sgt->nents = mapped_nents;
WARN_ON(sgt->nents != sgt->orig_nents);

View File

@@ -445,7 +445,9 @@ static int nvidia_mmap_sysmem(
}
else
{
vma->vm_page_prot = nv_adjust_pgprot(vma->vm_page_prot, 0);
if (at->flags.unencrypted)
vma->vm_page_prot = nv_adjust_pgprot(vma->vm_page_prot, 0);
ret = vm_insert_page(vma, start,
NV_GET_PAGE_STRUCT(at->page_table[j]->phys_addr));
}

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -308,6 +308,15 @@ static NvU32 find_gpu_numa_nodes_in_srat(nv_linux_state_t *nvl)
struct acpi_srat_generic_affinity *gi;
NvU32 numa_node = NUMA_NO_NODE;
if (NV_PCI_DEVFN(nvl->pci_dev) != 0)
{
nv_printf(NV_DBG_ERRORS, "NVRM: Failing to parse SRAT GI for %04x:%02x:%02x.%x "
"since non-zero device function is not supported.\n",
NV_PCI_DOMAIN_NUMBER(nvl->pci_dev), NV_PCI_BUS_NUMBER(nvl->pci_dev),
NV_PCI_SLOT_NUMBER(nvl->pci_dev), PCI_FUNC(nvl->pci_dev->devfn));
return 0;
}
if (acpi_get_table(ACPI_SIG_SRAT, 0, &table_header)) {
nv_printf(NV_DBG_INFO, "NVRM: Failed to parse the SRAT table.\n");
return 0;
@@ -331,9 +340,14 @@ static NvU32 find_gpu_numa_nodes_in_srat(nv_linux_state_t *nvl)
(((unsigned long)subtable_header) + subtable_header_length < table_end)) {
if (subtable_header->type == ACPI_SRAT_TYPE_GENERIC_AFFINITY) {
NvU8 busAtByte2, busAtByte3;
gi = (struct acpi_srat_generic_affinity *) subtable_header;
busAtByte2 = gi->device_handle[2];
busAtByte3 = gi->device_handle[3];
// Device and function should be zero enforced by above check
gi_dbdf = *((NvU16 *)(&gi->device_handle[0])) << 16 |
*((NvU16 *)(&gi->device_handle[2]));
(busAtByte2 != 0 ? busAtByte2 : busAtByte3) << 8;
if (gi_dbdf == dev_dbdf) {
numa_node = pxm_to_node(gi->proximity_domain);
@@ -347,6 +361,31 @@ static NvU32 find_gpu_numa_nodes_in_srat(nv_linux_state_t *nvl)
pxm_count = 0;
goto exit;
}
nv_printf(NV_DBG_INFO,
"NVRM: matching SRAT GI entry: 0x%x 0x%x 0x%x 0x%x PXM: %d\n",
gi->device_handle[3],
gi->device_handle[2],
gi->device_handle[1],
gi->device_handle[0],
gi->proximity_domain);
if ((busAtByte2) == 0 &&
(busAtByte3) != 0)
{
/*
* TODO: Remove this WAR once Hypervisor stack is updated
* to fix this bug and after all CSPs have moved to using
* the updated Hypervisor stack with fix.
*/
nv_printf(NV_DBG_WARNINGS,
"NVRM: PCIe bus value picked from byte 3 offset in SRAT GI entry: 0x%x 0x%x 0x%x 0x%x PXM: %d\n"
"NVRM: Hypervisor stack is old and not following ACPI spec defined offset.\n"
"NVRM: Please consider upgrading the Hypervisor stack as this workaround will be removed in future release.\n",
gi->device_handle[3],
gi->device_handle[2],
gi->device_handle[1],
gi->device_handle[0],
gi->proximity_domain);
}
}
}
@@ -792,7 +831,10 @@ next_bar:
NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_DISABLED);
nvl->numa_info.node_id = NUMA_NO_NODE;
nv_init_coherent_link_info(nv);
if (pci_devid_is_self_hosted(pci_dev->device))
{
nv_init_coherent_link_info(nv);
}
#if defined(NVCPU_PPC64LE)
// Use HW NUMA support as a proxy for ATS support. This is true in the only

View File

@@ -1630,17 +1630,25 @@ static void nv_init_mapping_revocation(nv_linux_state_t *nvl,
nv_linux_file_private_t *nvlfp,
struct inode *inode)
{
down(&nvl->mmap_lock);
/* Set up struct address_space for use with unmap_mapping_range() */
address_space_init_once(&nvlfp->mapping);
nvlfp->mapping.host = inode;
nvlfp->mapping.a_ops = inode->i_mapping->a_ops;
file->f_mapping = &nvlfp->mapping;
}
/* Add nvlfp to list of open files in nvl for mapping revocation */
/* Adds nvlfp to list of open files for mapping revocation */
static void nv_add_open_file(nv_linux_state_t *nvl,
nv_linux_file_private_t *nvlfp)
{
nvlfp->nvptr = nvl;
/*
* nvl->open_files and other mapping revocation members in nv_linux_state_t
* are protected by nvl->mmap_lock instead of nvl->ldata_lock.
*/
down(&nvl->mmap_lock);
list_add(&nvlfp->entry, &nvl->open_files);
up(&nvl->mmap_lock);
}
@@ -1690,11 +1698,12 @@ static void nvidia_open_deferred(void *nvlfp_raw)
*/
down(&nvl->ldata_lock);
rc = nv_open_device_for_nvlfp(NV_STATE_PTR(nvl), nvlfp->sp, nvlfp);
up(&nvl->ldata_lock);
/* Set nvptr only upon success (where nvl->usage_count is incremented) */
/* Only add open file tracking where nvl->usage_count is incremented */
if (rc == 0)
nvlfp->nvptr = nvl;
nv_add_open_file(nvl, nvlfp);
up(&nvl->ldata_lock);
complete_all(&nvlfp->open_complete);
}
@@ -1813,6 +1822,7 @@ nvidia_open(
}
nv = NV_STATE_PTR(nvl);
nv_init_mapping_revocation(nvl, file, nvlfp, inode);
if (nv_try_lock_foreground_open(file, nvl) == 0)
{
@@ -1823,11 +1833,11 @@ nvidia_open(
rc = nv_open_device_for_nvlfp(nv, nvlfp->sp, nvlfp);
up(&nvl->ldata_lock);
/* Set nvptr only upon success (where nvl->usage_count is incremented) */
/* Only add open file tracking where nvl->usage_count is incremented */
if (rc == 0)
nvlfp->nvptr = nvl;
nv_add_open_file(nvl, nvlfp);
up(&nvl->ldata_lock);
complete_all(&nvlfp->open_complete);
}
@@ -1882,10 +1892,6 @@ failed:
NV_SET_FILE_PRIVATE(file, NULL);
}
}
else
{
nv_init_mapping_revocation(nvl, file, nvlfp, inode);
}
return rc;
}

View File

@@ -1672,7 +1672,7 @@ NV_STATUS NV_API_CALL os_alloc_pages_node
NV_STATUS status = NV_ERR_NOT_SUPPORTED;
#if defined(__GFP_THISNODE) && defined(GFP_HIGHUSER_MOVABLE) && \
defined(__GFP_COMP) && defined(__GFP_NORETRY) && defined(__GFP_NOWARN)
defined(__GFP_COMP) && defined(__GFP_NOWARN)
gfp_t gfp_mask;
struct page *alloc_addr;
unsigned int order = get_order(size);
@@ -1689,13 +1689,11 @@ NV_STATUS NV_API_CALL os_alloc_pages_node
* pages, which is needed in order to use
* vm_insert_page API.
*
* 4. __GFP_NORETRY: Used to avoid the Linux kernel OOM killer.
*
* 5. __GFP_NOWARN: Used to avoid a WARN_ON in the slowpath if
* 4. __GFP_NOWARN: Used to avoid a WARN_ON in the slowpath if
* the requested order is too large (just fail
* instead).
*
* 6. (Optional) __GFP_RECLAIM: Used to allow/forbid reclaim.
* 5. (Optional) __GFP_RECLAIM: Used to allow/forbid reclaim.
* This is part of GFP_USER and consequently
* GFP_HIGHUSER_MOVABLE.
*
@@ -1709,7 +1707,30 @@ NV_STATUS NV_API_CALL os_alloc_pages_node
*/
gfp_mask = __GFP_THISNODE | GFP_HIGHUSER_MOVABLE | __GFP_COMP |
__GFP_NORETRY | __GFP_NOWARN;
__GFP_NOWARN;
#if defined(__GFP_RETRY_MAYFAIL)
/*
* __GFP_RETRY_MAYFAIL : Used to avoid the Linux kernel OOM killer.
* To help PMA on paths where UVM might be
* in memory over subscription. This gives UVM
* a chance to free memory before invoking any
* action from the OOM killer.
* Freeing non-essential memory will also benefit
* the system as a whole.
*/
gfp_mask |= __GFP_RETRY_MAYFAIL;
#elif defined(__GFP_NORETRY)
/*
* __GFP_NORETRY : Use __GFP_NORETRY on older kernels where
* __GFP_RETRY_MAYFAIL is not present.
*/
gfp_mask |= __GFP_NORETRY;
#endif
#if defined(__GFP_RECLAIM)
if (flag & NV_ALLOC_PAGES_NODE_SKIP_RECLAIM)