mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-02-02 22:47:25 +00:00
570.124.04
This commit is contained in:
@@ -86,7 +86,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.86.16\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.124.04\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -756,6 +756,8 @@ typedef struct UvmGpuFbInfo_tag
|
||||
NvBool bStaticBar1Enabled; // Static BAR1 mode is enabled
|
||||
NvU64 staticBar1StartOffset; // The start offset of the the static mapping
|
||||
NvU64 staticBar1Size; // The size of the static mapping
|
||||
NvU32 heapStart; // The start offset of heap in KB, helpful for MIG
|
||||
// systems
|
||||
} UvmGpuFbInfo;
|
||||
|
||||
typedef struct UvmGpuEccInfo_tag
|
||||
|
||||
@@ -6307,6 +6307,32 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_NUM_REGISTERED_FB_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
acpi_video_register_backlight)
|
||||
#
|
||||
# Determine if acpi_video_register_backlight() function is present
|
||||
#
|
||||
# acpi_video_register_backlight was added by commit 3dbc80a3e4c55c
|
||||
# (ACPI: video: Make backlight class device registration a separate
|
||||
# step (v2)) for v6.0 (2022-09-02).
|
||||
# Note: the include directive for <linux/types> in this conftest is
|
||||
# necessary in order to support kernels between commit 0b9f7d93ca61
|
||||
# ("ACPI / i915: ignore firmware requests backlight change") for
|
||||
# v3.16 (2014-07-07) and commit 3bd6bce369f5 ("ACPI / video: Port
|
||||
# to new backlight interface selection API") for v4.2 (2015-07-16).
|
||||
# Kernels within this range use the 'bool' type and the related
|
||||
# 'false' value in <acpi/video.h> without first including the
|
||||
# definitions of that type and value.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/types.h>
|
||||
#include <acpi/video.h>
|
||||
void conftest_acpi_video_register_backlight(void) {
|
||||
acpi_video_register_backlight(0);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_ACPI_VIDEO_REGISTER_BACKLIGHT" "" "functions"
|
||||
;;
|
||||
|
||||
acpi_video_backlight_use_native)
|
||||
#
|
||||
# Determine if acpi_video_backlight_use_native() function is present
|
||||
@@ -6690,13 +6716,18 @@ compile_test() {
|
||||
#
|
||||
# Determine whether drm_client_setup is present.
|
||||
#
|
||||
# Added by commit d07fdf922592 ("drm/fbdev-ttm:
|
||||
# Convert to client-setup") in v6.13.
|
||||
# Added by commit d07fdf922592 ("drm/fbdev-ttm: Convert to
|
||||
# client-setup") in v6.13 in drm/drm_client_setup.h, but then moved
|
||||
# to drm/clients/drm_client_setup.h by commit b86711c6d6e2
|
||||
# ("drm/client: Move public client header to clients/ subdirectory")
|
||||
# in linux-next b86711c6d6e2.
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT)
|
||||
#include <drm/drm_client_setup.h>
|
||||
#elif defined(NV_DRM_CLIENTS_DRM_CLIENT_SETUP_H_PRESENT)
|
||||
#include <drm/clients/drm_client_setup.h>
|
||||
#endif
|
||||
void conftest_drm_client_setup(void) {
|
||||
drm_client_setup();
|
||||
@@ -7509,6 +7540,31 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_MODULE_IMPORT_NS_TAKES_CONSTANT" "" "generic"
|
||||
;;
|
||||
|
||||
|
||||
drm_driver_has_date)
|
||||
#
|
||||
# Determine if the 'drm_driver' structure has a 'date' field.
|
||||
#
|
||||
# Removed by commit cb2e1c2136f7 ("drm: remove driver date from
|
||||
# struct drm_driver and all drivers") in linux-next, expected in
|
||||
# v6.14.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
|
||||
int conftest_drm_driver_has_date(void) {
|
||||
return offsetof(struct drm_driver, date);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_DRIVER_HAS_DATE" "" "types"
|
||||
;;
|
||||
|
||||
# When adding a new conftest entry, please use the correct format for
|
||||
# specifying the relevant upstream Linux kernel commit. Please
|
||||
# avoid specifying -rc kernels, and only use SHAs that actually exist
|
||||
|
||||
@@ -31,6 +31,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_mode_config.h \
|
||||
drm/drm_modeset_lock.h \
|
||||
drm/drm_property.h \
|
||||
drm/clients/drm_client_setup.h \
|
||||
dt-bindings/interconnect/tegra_icc_id.h \
|
||||
generated/autoconf.h \
|
||||
generated/compile.h \
|
||||
|
||||
@@ -65,9 +65,13 @@
|
||||
#if defined(NV_DRM_CLIENT_SETUP_PRESENT) && \
|
||||
(defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT) || \
|
||||
defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT))
|
||||
// XXX remove dependency on DRM_TTM_HELPER by implementing nvidia-drm's own
|
||||
// .fbdev_probe callback that uses NVKMS kapi
|
||||
#if IS_ENABLED(CONFIG_DRM_TTM_HELPER)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_CLIENT_AVAILABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We can support color management if either drm_helper_crtc_enable_color_mgmt()
|
||||
|
||||
@@ -78,6 +78,8 @@
|
||||
|
||||
#if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT)
|
||||
#include <drm/drm_client_setup.h>
|
||||
#elif defined(NV_DRM_CLIENTS_DRM_CLIENT_SETUP_H_PRESENT)
|
||||
#include <drm/clients/drm_client_setup.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_FBDEV_TTM_H_PRESENT)
|
||||
@@ -1915,14 +1917,18 @@ static struct drm_driver nv_drm_driver = {
|
||||
.name = "nvidia-drm",
|
||||
|
||||
.desc = "NVIDIA DRM driver",
|
||||
|
||||
#if defined(NV_DRM_DRIVER_HAS_DATE)
|
||||
.date = "20160202",
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRIVER_HAS_DEVICE_LIST)
|
||||
.device_list = LIST_HEAD_INIT(nv_drm_driver.device_list),
|
||||
#elif defined(NV_DRM_DRIVER_HAS_LEGACY_DEV_LIST)
|
||||
.legacy_dev_list = LIST_HEAD_INIT(nv_drm_driver.legacy_dev_list),
|
||||
#endif
|
||||
#if defined(DRM_FBDEV_TTM_DRIVER_OPS)
|
||||
// XXX implement nvidia-drm's own .fbdev_probe callback that uses NVKMS kapi directly
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE) && defined(DRM_FBDEV_TTM_DRIVER_OPS)
|
||||
DRM_FBDEV_TTM_DRIVER_OPS,
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -143,4 +143,5 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_lut
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_property_blob_put
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_mmap
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present
|
||||
|
||||
@@ -1050,6 +1050,11 @@ nvkms_register_backlight(NvU32 gpu_id, NvU32 display_id, void *drv_priv,
|
||||
|
||||
#if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE)
|
||||
if (!acpi_video_backlight_use_native()) {
|
||||
#if defined(NV_ACPI_VIDEO_REGISTER_BACKLIGHT)
|
||||
nvkms_log(NVKMS_LOG_LEVEL_INFO, NVKMS_LOG_PREFIX,
|
||||
"ACPI reported no NVIDIA native backlight available; attempting to use ACPI backlight.");
|
||||
acpi_video_register_backlight();
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -102,4 +102,5 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kernel_read_has_pointer_pos_arg
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
#include <linux/nodemask.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/topology.h>
|
||||
|
||||
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
|
||||
#include <linux/hmm.h>
|
||||
@@ -291,6 +292,27 @@ static const struct mmu_interval_notifier_ops uvm_ats_notifier_ops =
|
||||
|
||||
#endif
|
||||
|
||||
static bool resident_policy_match(struct vm_area_struct *vma, int dst_nid, int src_nid)
|
||||
{
|
||||
#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
|
||||
struct mempolicy *vma_policy = vma_policy(vma);
|
||||
|
||||
// TODO: Bug 4981209: When migrations between CPU numa nodes are supported,
|
||||
// add (dst_nid != closest_cpu_numa_node) to allow migrations between CPU
|
||||
// NUMA nodes when destination is the closest_cpu_numa_node.
|
||||
if (vma_policy &&
|
||||
node_isset(src_nid, vma_policy->nodes) &&
|
||||
node_isset(dst_nid, vma_policy->nodes) &&
|
||||
!cpumask_empty(cpumask_of_node(src_nid)) &&
|
||||
!cpumask_empty(cpumask_of_node(dst_nid))) {
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
@@ -370,9 +392,23 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
if (pfn & HMM_PFN_VALID) {
|
||||
struct page *page = hmm_pfn_to_page(pfn);
|
||||
int resident_node = page_to_nid(page);
|
||||
|
||||
if (page_to_nid(page) == ats_context->residency_node)
|
||||
// Set the residency_mask if:
|
||||
// - The page is already resident at the intended destination.
|
||||
// or
|
||||
// - If both the source and destination nodes are CPU nodes and
|
||||
// source node is already in the list of preferred nodes for
|
||||
// the vma. On multi-CPU NUMA node architectures, this avoids
|
||||
// unnecessary migrations between CPU nodes. Since the
|
||||
// specific ats_context->residency_node selected by
|
||||
// ats_batch_select_residency() is just a guess among the list
|
||||
// of preferred nodes, paying the cost of migration across the
|
||||
// CPU preferred nodes in this case can't be justified.
|
||||
if ((resident_node == ats_context->residency_node) ||
|
||||
resident_policy_match(vma, ats_context->residency_node, resident_node)) {
|
||||
uvm_page_mask_set(residency_mask, page_index);
|
||||
}
|
||||
|
||||
ats_context->prefetch_state.first_touch = false;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -35,6 +35,7 @@
|
||||
#include "uvm_mmu.h"
|
||||
#include "uvm_perf_heuristics.h"
|
||||
#include "uvm_pmm_sysmem.h"
|
||||
#include "uvm_pmm_gpu.h"
|
||||
#include "uvm_migrate.h"
|
||||
#include "uvm_gpu_access_counters.h"
|
||||
#include "uvm_va_space_mm.h"
|
||||
@@ -90,6 +91,8 @@ NV_STATUS uvm_global_init(void)
|
||||
uvm_spin_lock_irqsave_init(&g_uvm_global.gpu_table_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_mutex_init(&g_uvm_global.va_spaces.lock, UVM_LOCK_ORDER_VA_SPACES_LIST);
|
||||
INIT_LIST_HEAD(&g_uvm_global.va_spaces.list);
|
||||
uvm_mutex_init(&g_uvm_global.devmem_ranges.lock, UVM_LOCK_ORDER_LEAF);
|
||||
INIT_LIST_HEAD(&g_uvm_global.devmem_ranges.list);
|
||||
|
||||
status = uvm_kvmalloc_init();
|
||||
if (status != NV_OK) {
|
||||
@@ -231,6 +234,7 @@ void uvm_global_exit(void)
|
||||
uvm_va_policy_exit();
|
||||
uvm_mem_global_exit();
|
||||
uvm_pmm_sysmem_exit();
|
||||
uvm_pmm_devmem_exit();
|
||||
uvm_gpu_exit();
|
||||
uvm_processor_mask_cache_exit();
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -157,6 +157,12 @@ struct uvm_global_struct
|
||||
// This field is set once during global initialization (uvm_global_init),
|
||||
// and can be read afterwards without acquiring any locks.
|
||||
bool conf_computing_enabled;
|
||||
|
||||
// List of all devmem ranges allocted on this GPU
|
||||
struct {
|
||||
uvm_mutex_t lock;
|
||||
struct list_head list;
|
||||
} devmem_ranges;
|
||||
};
|
||||
|
||||
// Initialize global uvm state
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -109,8 +109,10 @@ static void fill_parent_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo
|
||||
// nvswitch is routed via physical pages, where the upper 13-bits of the
|
||||
// 47-bit address space holds the routing information for each peer.
|
||||
// Currently, this is limited to a 16GB framebuffer window size.
|
||||
if (parent_gpu->nvswitch_info.is_nvswitch_connected)
|
||||
if (parent_gpu->nvswitch_info.is_nvswitch_connected) {
|
||||
parent_gpu->nvswitch_info.fabric_memory_window_start = gpu_info->nvswitchMemoryWindowStart;
|
||||
parent_gpu->nvswitch_info.egm_fabric_memory_window_start = gpu_info->nvswitchEgmMemoryWindowStart;
|
||||
}
|
||||
|
||||
uvm_uuid_string(uuid_buffer, &parent_gpu->uuid);
|
||||
snprintf(parent_gpu->name,
|
||||
@@ -244,6 +246,7 @@ static NV_STATUS get_gpu_fb_info(uvm_gpu_t *gpu)
|
||||
if (!fb_info.bZeroFb) {
|
||||
gpu->mem_info.size = ((NvU64)fb_info.heapSize + fb_info.reservedHeapSize) * 1024;
|
||||
gpu->mem_info.max_allocatable_address = fb_info.maxAllocatableAddress;
|
||||
gpu->mem_info.phys_start = (NvU64)fb_info.heapStart * 1024;
|
||||
}
|
||||
|
||||
gpu->mem_info.max_vidmem_page_size = fb_info.maxVidmemPageSize;
|
||||
@@ -568,6 +571,9 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "big_page_size %u\n", gpu->big_page.internal_size);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "rm_va_base 0x%llx\n", gpu->parent->rm_va_base);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "rm_va_size 0x%llx\n", gpu->parent->rm_va_size);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "vidmem_start %llu (%llu MBs)\n",
|
||||
gpu->mem_info.phys_start,
|
||||
gpu->mem_info.phys_start / (1024 * 1024));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "vidmem_size %llu (%llu MBs)\n",
|
||||
gpu->mem_info.size,
|
||||
gpu->mem_info.size / (1024 * 1024));
|
||||
@@ -1361,6 +1367,7 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
const UvmGpuPlatformInfo *gpu_platform_info)
|
||||
{
|
||||
NV_STATUS status;
|
||||
UvmGpuFbInfo fb_info = {0};
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceDeviceCreate(uvm_global_session_handle(),
|
||||
gpu_info,
|
||||
@@ -1384,8 +1391,15 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
parent_gpu->egm.local_peer_id = gpu_info->egmPeerId;
|
||||
parent_gpu->egm.base_address = gpu_info->egmBaseAddr;
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceGetFbInfo(parent_gpu->rm_device, &fb_info));
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
parent_gpu->sli_enabled = (gpu_info->subdeviceCount > 1);
|
||||
|
||||
if (!fb_info.bZeroFb)
|
||||
parent_gpu->max_allocatable_address = fb_info.maxAllocatableAddress;
|
||||
|
||||
parent_gpu->virt_mode = gpu_info->virtMode;
|
||||
if (parent_gpu->virt_mode == UVM_VIRT_MODE_LEGACY) {
|
||||
UVM_ERR_PRINT("Failed to init GPU %s. UVM is not supported in legacy virtualization mode\n",
|
||||
@@ -1419,6 +1433,14 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
uvm_mmu_init_gpu_chunk_sizes(parent_gpu);
|
||||
|
||||
status = uvm_pmm_devmem_init(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("failed to intialize device private memory: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_ats_add_gpu(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_ats_add_gpu failed: %s, GPU %s\n",
|
||||
@@ -1667,6 +1689,7 @@ static void deinit_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
deinit_parent_procfs_files(parent_gpu);
|
||||
|
||||
uvm_pmm_devmem_deinit(parent_gpu);
|
||||
uvm_ats_remove_gpu(parent_gpu);
|
||||
|
||||
UVM_ASSERT(atomic64_read(&parent_gpu->mapped_cpu_pages_size) == 0);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -696,6 +696,11 @@ struct uvm_gpu_struct
|
||||
// ZeroFB testing mode, this will be 0.
|
||||
NvU64 size;
|
||||
|
||||
// Physical start of heap, for SMC enabled GPUs, this is useful to
|
||||
// partition PMM, it is used by HMM to figure out the right translation
|
||||
// between HMM ranges and PMM offsets.
|
||||
NvU64 phys_start;
|
||||
|
||||
// Max (inclusive) physical address of this GPU's memory that the driver
|
||||
// can allocate through PMM (PMA).
|
||||
NvU64 max_allocatable_address;
|
||||
@@ -1015,6 +1020,13 @@ struct uvm_parent_gpu_struct
|
||||
// Do not read this field directly, use uvm_gpu_device_handle instead.
|
||||
uvmGpuDeviceHandle rm_device;
|
||||
|
||||
// Total amount of physical memory available on the parent GPU.
|
||||
NvU64 max_allocatable_address;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
uvm_pmm_gpu_devmem_t *devmem;
|
||||
#endif
|
||||
|
||||
// The physical address range addressable by the GPU
|
||||
//
|
||||
// The GPU has its NV_PFB_XV_UPPER_ADDR register set by RM to
|
||||
@@ -1288,6 +1300,10 @@ struct uvm_parent_gpu_struct
|
||||
// 47-bit fabric memory physical offset that peer gpus need to access
|
||||
// to read a peer's memory
|
||||
NvU64 fabric_memory_window_start;
|
||||
|
||||
// 47-bit fabric memory physical offset that peer gpus need to access
|
||||
// to read remote EGM memory.
|
||||
NvU64 egm_fabric_memory_window_start;
|
||||
} nvswitch_info;
|
||||
|
||||
struct
|
||||
|
||||
@@ -321,13 +321,17 @@ void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_
|
||||
{
|
||||
uvm_range_tree_node_t *node;
|
||||
uvm_va_block_t *va_block;
|
||||
struct range range = gpu->pmm.devmem.pagemap.range;
|
||||
unsigned long devmem_start;
|
||||
unsigned long devmem_end;
|
||||
unsigned long pfn;
|
||||
bool retry;
|
||||
|
||||
if (!uvm_hmm_is_enabled(va_space))
|
||||
return;
|
||||
|
||||
devmem_start = gpu->parent->devmem->pagemap.range.start + gpu->mem_info.phys_start;
|
||||
devmem_end = devmem_start + gpu->mem_info.size;
|
||||
|
||||
if (mm)
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
@@ -341,7 +345,7 @@ void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_
|
||||
do {
|
||||
retry = false;
|
||||
|
||||
for (pfn = __phys_to_pfn(range.start); pfn <= __phys_to_pfn(range.end); pfn++) {
|
||||
for (pfn = __phys_to_pfn(devmem_start); pfn <= __phys_to_pfn(devmem_end); pfn++) {
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
UVM_ASSERT(is_device_private_page(page));
|
||||
@@ -349,7 +353,7 @@ void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_
|
||||
// This check is racy because nothing stops the page being freed and
|
||||
// even reused. That doesn't matter though - worst case the
|
||||
// migration fails, we retry and find the va_space doesn't match.
|
||||
if (page->zone_device_data == va_space)
|
||||
if (uvm_pmm_devmem_page_to_va_space(page) == va_space)
|
||||
if (uvm_hmm_pmm_gpu_evict_pfn(pfn) != NV_OK)
|
||||
retry = true;
|
||||
}
|
||||
@@ -1713,7 +1717,7 @@ static void gpu_chunk_remove(uvm_va_block_t *va_block,
|
||||
uvm_gpu_chunk_t *gpu_chunk;
|
||||
uvm_gpu_id_t id;
|
||||
|
||||
id = uvm_pmm_devmem_page_to_gpu_id(page);
|
||||
id = uvm_gpu_chunk_get_gpu(uvm_pmm_devmem_page_to_chunk(page))->id;
|
||||
gpu_state = uvm_va_block_gpu_state_get(va_block, id);
|
||||
UVM_ASSERT(gpu_state);
|
||||
|
||||
@@ -1743,7 +1747,7 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
|
||||
uvm_gpu_id_t id;
|
||||
NV_STATUS status;
|
||||
|
||||
id = uvm_pmm_devmem_page_to_gpu_id(page);
|
||||
id = uvm_gpu_chunk_get_gpu(uvm_pmm_devmem_page_to_chunk(page))->id;
|
||||
gpu_state = uvm_va_block_gpu_state_get(va_block, id);
|
||||
|
||||
// It's possible that this is a fresh va_block we're trying to add an
|
||||
@@ -1765,7 +1769,7 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
|
||||
gpu_chunk = uvm_pmm_devmem_page_to_chunk(page);
|
||||
UVM_ASSERT(gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
UVM_ASSERT(gpu_chunk->is_referenced);
|
||||
UVM_ASSERT(page->zone_device_data == va_block->hmm.va_space);
|
||||
UVM_ASSERT(uvm_pmm_devmem_page_to_va_space(page) == va_block->hmm.va_space);
|
||||
|
||||
if (gpu_state->chunks[page_index] == gpu_chunk)
|
||||
return NV_OK;
|
||||
@@ -1992,7 +1996,7 @@ static void fill_dst_pfn(uvm_va_block_t *va_block,
|
||||
hmm_mark_gpu_chunk_referenced(va_block, gpu, gpu_chunk);
|
||||
UVM_ASSERT(!page_count(dpage));
|
||||
zone_device_page_init(dpage);
|
||||
dpage->zone_device_data = va_block->hmm.va_space;
|
||||
dpage->zone_device_data = gpu_chunk;
|
||||
|
||||
dst_pfns[page_index] = migrate_pfn(pfn);
|
||||
}
|
||||
|
||||
@@ -130,27 +130,12 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS tracker_status;
|
||||
uvm_prot_t prot = UVM_PROT_READ_WRITE_ATOMIC;
|
||||
|
||||
// Get the mask of unmapped pages because it will change after the
|
||||
// first map operation
|
||||
uvm_va_block_unmapped_pages_get(va_block, region, &va_block_context->caller_page_mask);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block) && !UVM_ID_IS_CPU(dest_id)) {
|
||||
// Do not map pages that are already resident on the CPU. This is in
|
||||
// order to avoid breaking system-wide atomic operations on HMM. HMM's
|
||||
// implementation of system-side atomic operations involves restricting
|
||||
// mappings to one processor (CPU or a GPU) at a time. If we were to
|
||||
// grant a GPU a mapping to system memory, this gets into trouble
|
||||
// because, on the CPU side, Linux can silently upgrade PTE permissions
|
||||
// (move from read-only, to read-write, without any MMU notifiers
|
||||
// firing), thus breaking the model by allowing simultaneous read-write
|
||||
// access from two separate processors. To avoid that, just don't map
|
||||
// such pages at all, when migrating.
|
||||
uvm_page_mask_andnot(&va_block_context->caller_page_mask,
|
||||
&va_block_context->caller_page_mask,
|
||||
uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE));
|
||||
}
|
||||
|
||||
// Only map those pages that are not mapped anywhere else (likely due
|
||||
// to a first touch or a migration). We pass
|
||||
// UvmEventMapRemoteCauseInvalid since the destination processor of a
|
||||
@@ -166,6 +151,31 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block) && UVM_ID_IS_CPU(dest_id)) {
|
||||
uvm_processor_id_t id;
|
||||
|
||||
// Do not atomically map pages that are resident on the CPU. This is in
|
||||
// order to avoid breaking system-wide atomic operations on HMM. HMM's
|
||||
// implementation of system-side atomic operations involves restricting
|
||||
// mappings to one processor (CPU or a GPU) at a time. If we were to
|
||||
// grant a GPU a mapping to system memory, this gets into trouble
|
||||
// because, on the CPU side, Linux can silently upgrade PTE permissions
|
||||
// (move from read-only, to read-write, without any MMU notifiers
|
||||
// firing), thus breaking the model by allowing simultaneous read-write
|
||||
// access from two separate processors. To avoid that, don't remote map
|
||||
// such pages atomically, after migrating.
|
||||
// Also note that HMM sets CPU mapping for resident pages so the mask
|
||||
// of pages to be mapped needs to be recomputed without including the
|
||||
// CPU mapping.
|
||||
prot = UVM_PROT_READ_WRITE;
|
||||
uvm_page_mask_region_fill(&va_block_context->caller_page_mask, region);
|
||||
for_each_gpu_id_in_mask(id, &va_block->mapped) {
|
||||
uvm_page_mask_andnot(&va_block_context->caller_page_mask,
|
||||
&va_block_context->caller_page_mask,
|
||||
uvm_va_block_map_mask_get(va_block, id));
|
||||
}
|
||||
}
|
||||
|
||||
// Add mappings for AccessedBy processors
|
||||
//
|
||||
// No mappings within this call will operate on dest_id, so we don't
|
||||
@@ -176,7 +186,7 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
|
||||
dest_id,
|
||||
region,
|
||||
&va_block_context->caller_page_mask,
|
||||
UVM_PROT_READ_WRITE_ATOMIC,
|
||||
prot,
|
||||
NULL);
|
||||
|
||||
out:
|
||||
|
||||
@@ -1409,11 +1409,13 @@ static bool thrashing_processors_have_fast_access_to(uvm_va_space_t *va_space,
|
||||
uvm_processor_mask_set(fast_to, to);
|
||||
}
|
||||
else {
|
||||
// Include registered SMC peers and the processor 'to'.
|
||||
// Include all SMC peers and the processor 'to'.
|
||||
// This includes SMC peers that are not registered.
|
||||
// Since not-registered peers cannot be in page_thrashing->processors,
|
||||
// the value of their respective bits in "fast_to" doesn't matter.
|
||||
uvm_processor_mask_range_fill(fast_to,
|
||||
uvm_gpu_id_from_sub_processor(uvm_parent_gpu_id_from_gpu_id(to), 0),
|
||||
UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
uvm_processor_mask_and(fast_to, fast_to, &va_space->registered_gpu_va_spaces);
|
||||
}
|
||||
|
||||
return uvm_processor_mask_subset(&page_thrashing->processors, fast_to);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@@ -3030,69 +3030,23 @@ NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
static uvm_pmm_gpu_t *devmem_page_to_pmm(struct page *page)
|
||||
{
|
||||
return container_of(page->pgmap, uvm_pmm_gpu_t, devmem.pagemap);
|
||||
}
|
||||
|
||||
static uvm_gpu_chunk_t *devmem_page_to_chunk_locked(struct page *page)
|
||||
{
|
||||
uvm_pmm_gpu_t *pmm = devmem_page_to_pmm(page);
|
||||
NvU64 chunk_addr = ((NvU64)page_to_pfn(page) << PAGE_SHIFT) - pmm->devmem.pagemap.range.start;
|
||||
size_t index = chunk_addr / UVM_CHUNK_SIZE_MAX;
|
||||
uvm_gpu_chunk_t *root_chunk;
|
||||
uvm_gpu_chunk_t *chunk;
|
||||
uvm_gpu_chunk_t *parent;
|
||||
uvm_chunk_size_t chunk_size;
|
||||
|
||||
UVM_ASSERT(index < pmm->root_chunks.count);
|
||||
root_chunk = &pmm->root_chunks.array[index].chunk;
|
||||
UVM_ASSERT(root_chunk->address == UVM_ALIGN_DOWN(chunk_addr, UVM_CHUNK_SIZE_MAX));
|
||||
|
||||
// Find the uvm_gpu_chunk_t that corresponds to the device private struct
|
||||
// page's PFN. The loop is only 0, 1, or 2 iterations.
|
||||
for (chunk = root_chunk;
|
||||
uvm_gpu_chunk_get_size(chunk) != page_size(page);
|
||||
chunk = parent->suballoc->subchunks[index]) {
|
||||
|
||||
parent = chunk;
|
||||
UVM_ASSERT(parent->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT);
|
||||
UVM_ASSERT(parent->suballoc);
|
||||
|
||||
chunk_size = uvm_gpu_chunk_get_size(parent->suballoc->subchunks[0]);
|
||||
index = (size_t)uvm_div_pow2_64(chunk_addr - parent->address, chunk_size);
|
||||
UVM_ASSERT(index < num_subchunks(parent));
|
||||
}
|
||||
|
||||
UVM_ASSERT(chunk->address = chunk_addr);
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
UVM_ASSERT(chunk->is_referenced);
|
||||
|
||||
return chunk;
|
||||
}
|
||||
|
||||
uvm_gpu_chunk_t *uvm_pmm_devmem_page_to_chunk(struct page *page)
|
||||
{
|
||||
uvm_pmm_gpu_t *pmm = devmem_page_to_pmm(page);
|
||||
uvm_gpu_chunk_t *chunk;
|
||||
|
||||
UVM_ASSERT(is_device_private_page(page));
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
chunk = devmem_page_to_chunk_locked(page);
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
|
||||
return chunk;
|
||||
return page->zone_device_data;
|
||||
}
|
||||
|
||||
uvm_gpu_id_t uvm_pmm_devmem_page_to_gpu_id(struct page *page)
|
||||
uvm_va_space_t *uvm_pmm_devmem_page_to_va_space(struct page *page)
|
||||
{
|
||||
uvm_pmm_gpu_t *pmm = devmem_page_to_pmm(page);
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
uvm_gpu_chunk_t *gpu_chunk = uvm_pmm_devmem_page_to_chunk(page);
|
||||
|
||||
UVM_ASSERT(is_device_private_page(page));
|
||||
// uvm_hmm_unregister_gpu() needs to do a racy check here so
|
||||
// page->zone_device_data might be NULL.
|
||||
if (!gpu_chunk || !gpu_chunk->va_block)
|
||||
return NULL;
|
||||
|
||||
return gpu->id;
|
||||
return gpu_chunk->va_block->hmm.va_space;
|
||||
}
|
||||
|
||||
// Check there are no orphan pages. This should be only called as part of
|
||||
@@ -3104,12 +3058,17 @@ static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
|
||||
{
|
||||
size_t i;
|
||||
bool ret = true;
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
unsigned long devmem_start;
|
||||
unsigned long devmem_end;
|
||||
unsigned long pfn;
|
||||
struct range range = pmm->devmem.pagemap.range;
|
||||
|
||||
if (!pmm->initialized || !uvm_hmm_is_enabled_system_wide())
|
||||
return ret;
|
||||
|
||||
devmem_start = gpu->parent->devmem->pagemap.range.start + gpu->mem_info.phys_start;
|
||||
devmem_end = devmem_start + gpu->mem_info.size;
|
||||
|
||||
// Scan all the root chunks looking for subchunks which are still
|
||||
// referenced.
|
||||
for (i = 0; i < pmm->root_chunks.count; i++) {
|
||||
@@ -3121,7 +3080,7 @@ static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
|
||||
root_chunk_unlock(pmm, root_chunk);
|
||||
}
|
||||
|
||||
for (pfn = __phys_to_pfn(range.start); pfn <= __phys_to_pfn(range.end); pfn++) {
|
||||
for (pfn = __phys_to_pfn(devmem_start); pfn <= __phys_to_pfn(devmem_end); pfn++) {
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
if (!is_device_private_page(page)) {
|
||||
@@ -3140,9 +3099,8 @@ static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
|
||||
|
||||
static void devmem_page_free(struct page *page)
|
||||
{
|
||||
uvm_pmm_gpu_t *pmm = devmem_page_to_pmm(page);
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
uvm_gpu_chunk_t *chunk;
|
||||
uvm_gpu_chunk_t *chunk = uvm_pmm_devmem_page_to_chunk(page);
|
||||
uvm_gpu_t *gpu = uvm_gpu_chunk_get_gpu(chunk);
|
||||
|
||||
page->zone_device_data = NULL;
|
||||
|
||||
@@ -3150,23 +3108,22 @@ static void devmem_page_free(struct page *page)
|
||||
// we may be in an interrupt context where we can't do that. Instead,
|
||||
// do a lazy free. Note that we have to use a "normal" spin lock because
|
||||
// the UVM context is not available.
|
||||
spin_lock(&pmm->list_lock.lock);
|
||||
spin_lock(&gpu->pmm.list_lock.lock);
|
||||
|
||||
chunk = devmem_page_to_chunk_locked(page);
|
||||
UVM_ASSERT(chunk->is_referenced);
|
||||
chunk->is_referenced = false;
|
||||
list_add_tail(&chunk->list, &pmm->root_chunks.va_block_lazy_free);
|
||||
list_add_tail(&chunk->list, &gpu->pmm.root_chunks.va_block_lazy_free);
|
||||
|
||||
spin_unlock(&pmm->list_lock.lock);
|
||||
spin_unlock(&gpu->pmm.list_lock.lock);
|
||||
|
||||
nv_kthread_q_schedule_q_item(&gpu->parent->lazy_free_q,
|
||||
&pmm->root_chunks.va_block_lazy_free_q_item);
|
||||
&gpu->pmm.root_chunks.va_block_lazy_free_q_item);
|
||||
}
|
||||
|
||||
// This is called by HMM when the CPU faults on a ZONE_DEVICE private entry.
|
||||
static vm_fault_t devmem_fault(struct vm_fault *vmf)
|
||||
{
|
||||
uvm_va_space_t *va_space = vmf->page->zone_device_data;
|
||||
uvm_va_space_t *va_space = uvm_pmm_devmem_page_to_va_space(vmf->page);
|
||||
|
||||
if (!va_space)
|
||||
return VM_FAULT_SIGBUS;
|
||||
@@ -3185,26 +3142,46 @@ static const struct dev_pagemap_ops uvm_pmm_devmem_ops =
|
||||
.migrate_to_ram = devmem_fault_entry,
|
||||
};
|
||||
|
||||
static NV_STATUS devmem_init(uvm_pmm_gpu_t *pmm)
|
||||
// Allocating and initialising device private pages takes a significant amount
|
||||
// of time on very large systems. So rather than do that everytime a GPU is
|
||||
// registered we do it once and keep track of the range when the GPU is
|
||||
// unregistered for later reuse.
|
||||
//
|
||||
// This function tries to find an exsiting range of device private pages and if
|
||||
// available allocates and returns it for reuse.
|
||||
static uvm_pmm_gpu_devmem_t *devmem_reuse_pagemap(unsigned long size)
|
||||
{
|
||||
unsigned long size = pmm->root_chunks.count * UVM_CHUNK_SIZE_MAX;
|
||||
uvm_pmm_gpu_devmem_t *devmem = &pmm->devmem;
|
||||
uvm_pmm_gpu_devmem_t *devmem;
|
||||
|
||||
list_for_each_entry(devmem, &g_uvm_global.devmem_ranges.list, list_node) {
|
||||
if (devmem->size == size) {
|
||||
list_del(&devmem->list_node);
|
||||
return devmem;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static uvm_pmm_gpu_devmem_t *devmem_alloc_pagemap(unsigned long size)
|
||||
{
|
||||
uvm_pmm_gpu_devmem_t *devmem;
|
||||
struct resource *res;
|
||||
void *ptr;
|
||||
NV_STATUS status;
|
||||
|
||||
if (!uvm_hmm_is_enabled_system_wide()) {
|
||||
devmem->pagemap.owner = NULL;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
res = request_free_mem_region(&iomem_resource, size, "nvidia-uvm-hmm");
|
||||
if (IS_ERR(res)) {
|
||||
UVM_ERR_PRINT("request_free_mem_region() err %ld\n", PTR_ERR(res));
|
||||
status = errno_to_nv_status(PTR_ERR(res));
|
||||
goto err;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
|
||||
if (!devmem)
|
||||
goto err;
|
||||
|
||||
devmem->size = size;
|
||||
devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
|
||||
devmem->pagemap.range.start = res->start;
|
||||
devmem->pagemap.range.end = res->end;
|
||||
@@ -3217,43 +3194,77 @@ static NV_STATUS devmem_init(uvm_pmm_gpu_t *pmm)
|
||||
if (IS_ERR(ptr)) {
|
||||
UVM_ERR_PRINT("memremap_pages() err %ld\n", PTR_ERR(ptr));
|
||||
status = errno_to_nv_status(PTR_ERR(ptr));
|
||||
goto err_release;
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
return devmem;
|
||||
|
||||
err_free:
|
||||
kfree(devmem);
|
||||
|
||||
err_release:
|
||||
release_mem_region(res->start, resource_size(res));
|
||||
err:
|
||||
devmem->pagemap.owner = NULL;
|
||||
return status;
|
||||
release_mem_region(res->start, resource_size(res));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void devmem_deinit(uvm_pmm_gpu_t *pmm)
|
||||
NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu)
|
||||
{
|
||||
uvm_pmm_gpu_devmem_t *devmem = &pmm->devmem;
|
||||
// Create a DEVICE_PRIVATE page for every GPU page available on the parent.
|
||||
unsigned long size = gpu->max_allocatable_address;
|
||||
|
||||
if (!devmem->pagemap.owner)
|
||||
if (!uvm_hmm_is_enabled_system_wide()) {
|
||||
gpu->devmem = NULL;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
gpu->devmem = devmem_reuse_pagemap(size);
|
||||
if (!gpu->devmem)
|
||||
gpu->devmem = devmem_alloc_pagemap(size);
|
||||
|
||||
if (!gpu->devmem)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *gpu)
|
||||
{
|
||||
if (!gpu->devmem)
|
||||
return;
|
||||
|
||||
memunmap_pages(&devmem->pagemap);
|
||||
release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range));
|
||||
list_add_tail(&gpu->devmem->list_node, &g_uvm_global.devmem_ranges.list);
|
||||
gpu->devmem = NULL;
|
||||
}
|
||||
|
||||
void uvm_pmm_devmem_exit(void)
|
||||
{
|
||||
uvm_pmm_gpu_devmem_t *devmem, *devmem_next;
|
||||
|
||||
list_for_each_entry_safe(devmem, devmem_next, &g_uvm_global.devmem_ranges.list, list_node) {
|
||||
list_del(&devmem->list_node);
|
||||
memunmap_pages(&devmem->pagemap);
|
||||
release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range));
|
||||
kfree(devmem);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
return (pmm->devmem.pagemap.range.start + chunk->address) >> PAGE_SHIFT;
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
unsigned long devmem_start = gpu->parent->devmem->pagemap.range.start;
|
||||
|
||||
return (devmem_start + chunk->address) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
||||
#if !UVM_IS_CONFIG_HMM()
|
||||
static NV_STATUS devmem_init(uvm_pmm_gpu_t *pmm)
|
||||
NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void devmem_deinit(uvm_pmm_gpu_t *pmm)
|
||||
void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *gpu)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -3469,10 +3480,6 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
|
||||
}
|
||||
}
|
||||
|
||||
status = devmem_init(pmm);
|
||||
if (status != NV_OK)
|
||||
goto cleanup;
|
||||
|
||||
return NV_OK;
|
||||
cleanup:
|
||||
uvm_pmm_gpu_deinit(pmm);
|
||||
@@ -3543,8 +3550,6 @@ void uvm_pmm_gpu_deinit(uvm_pmm_gpu_t *pmm)
|
||||
|
||||
deinit_caches(pmm);
|
||||
|
||||
devmem_deinit(pmm);
|
||||
|
||||
pmm->initialized = false;
|
||||
}
|
||||
|
||||
|
||||
@@ -192,22 +192,41 @@ typedef struct uvm_pmm_gpu_chunk_suballoc_struct uvm_pmm_gpu_chunk_suballoc_t;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
typedef struct uvm_pmm_gpu_struct uvm_pmm_gpu_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// For g_uvm_global.devmem_ranges
|
||||
struct list_head list_node;
|
||||
|
||||
// Size that was requested when created this region. This may be less than
|
||||
// the size actually allocated by the kernel due to alignment contraints.
|
||||
// Figuring out the required alignment at compile time is difficult due to
|
||||
// unexported macros, so just use the requested size as the search key.
|
||||
unsigned long size;
|
||||
|
||||
struct dev_pagemap pagemap;
|
||||
} uvm_pmm_gpu_devmem_t;
|
||||
|
||||
typedef struct uvm_pmm_gpu_struct uvm_pmm_gpu_t;
|
||||
|
||||
// Return the GPU chunk for a given device private struct page.
|
||||
uvm_gpu_chunk_t *uvm_pmm_devmem_page_to_chunk(struct page *page);
|
||||
|
||||
// Return the va_space for a given device private struct page.
|
||||
uvm_va_space_t *uvm_pmm_devmem_page_to_va_space(struct page *page);
|
||||
|
||||
// Return the GPU id for a given device private struct page.
|
||||
uvm_gpu_id_t uvm_pmm_devmem_page_to_gpu_id(struct page *page);
|
||||
|
||||
// Return the PFN of the device private struct page for the given GPU chunk.
|
||||
unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
||||
|
||||
// Free unused ZONE_DEVICE pages.
|
||||
void uvm_pmm_devmem_exit(void);
|
||||
|
||||
#else
|
||||
static inline void uvm_pmm_devmem_exit(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PCI_P2PDMA) && defined(NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA)
|
||||
@@ -349,10 +368,6 @@ typedef struct uvm_pmm_gpu_struct
|
||||
nv_kthread_q_item_t va_block_lazy_free_q_item;
|
||||
} root_chunks;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
uvm_pmm_gpu_devmem_t devmem;
|
||||
#endif
|
||||
|
||||
// Lock protecting PMA allocation, freeing and eviction
|
||||
uvm_rw_semaphore_t pma_lock;
|
||||
|
||||
@@ -604,6 +619,10 @@ static uvm_chunk_size_t uvm_chunk_find_prev_size(uvm_chunk_sizes_mask_t chunk_si
|
||||
// retained, and it's up to the caller to release them.
|
||||
NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region_size, uvm_reverse_map_t *out_mappings);
|
||||
|
||||
// Allocate and initialise struct page data in the kernel to support HMM.
|
||||
NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu);
|
||||
void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Iterates over every size in the input mask from smallest to largest
|
||||
#define for_each_chunk_size(__size, __chunk_sizes) \
|
||||
for ((__size) = (__chunk_sizes) ? uvm_chunk_find_first_size(__chunk_sizes) : \
|
||||
|
||||
@@ -2839,10 +2839,14 @@ static bool block_check_egm_peer(uvm_va_space_t *va_space, uvm_gpu_t *gpu, int n
|
||||
remote_node_info = uvm_va_space_get_egm_numa_node_info(va_space, nid);
|
||||
UVM_ASSERT(!uvm_parent_processor_mask_empty(&remote_node_info->parent_gpus));
|
||||
for_each_parent_gpu_in_mask(parent_gpu, &remote_node_info->parent_gpus) {
|
||||
UVM_ASSERT(parent_gpu->egm.enabled);
|
||||
NvU64 page_addr = phys_addr.address;
|
||||
|
||||
if (phys_addr.address + parent_gpu->egm.base_address >= remote_node_info->node_start &&
|
||||
phys_addr.address + parent_gpu->egm.base_address < remote_node_info->node_end &&
|
||||
UVM_ASSERT(parent_gpu->egm.enabled);
|
||||
page_addr += parent_gpu->egm.base_address;
|
||||
if (parent_gpu->nvswitch_info.is_nvswitch_connected && gpu->parent != parent_gpu)
|
||||
page_addr -= parent_gpu->nvswitch_info.egm_fabric_memory_window_start;
|
||||
|
||||
if (page_addr >= remote_node_info->node_start && page_addr < remote_node_info->node_end &&
|
||||
remote_node_info->routing_table[uvm_parent_id_gpu_index(gpu->parent->id)] == parent_gpu) {
|
||||
return true;
|
||||
}
|
||||
@@ -3229,8 +3233,15 @@ static uvm_gpu_phys_address_t block_phys_page_address(uvm_va_block_t *block,
|
||||
|
||||
if (routing_gpu) {
|
||||
struct page *page = uvm_cpu_chunk_get_cpu_page(block, chunk, block_page.page_index);
|
||||
|
||||
phys_addr = page_to_phys(page);
|
||||
aperture = uvm_gpu_egm_peer_aperture(gpu->parent, routing_gpu);
|
||||
|
||||
// Remote EGM routing is based on both the EGM base address and EGM
|
||||
// fabric memory window.
|
||||
if (routing_gpu->nvswitch_info.is_nvswitch_connected && routing_gpu != gpu->parent)
|
||||
phys_addr += routing_gpu->nvswitch_info.egm_fabric_memory_window_start;
|
||||
|
||||
uvm_page_mask_set(&accessing_gpu_state->egm_pages, block_page.page_index);
|
||||
return uvm_gpu_phys_address(aperture, phys_addr - routing_gpu->egm.base_address);
|
||||
}
|
||||
@@ -13575,6 +13586,9 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
|
||||
struct page *page = block_page_get(block, block_page);
|
||||
|
||||
phys_addr = page_to_phys(page) - egm_routing_gpu->egm.base_address;
|
||||
if (egm_routing_gpu->nvswitch_info.is_nvswitch_connected && egm_routing_gpu != gpu->parent)
|
||||
phys_addr += egm_routing_gpu->nvswitch_info.egm_fabric_memory_window_start;
|
||||
|
||||
params->is_egm_mapping[count] = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -612,6 +612,42 @@ nv_dma_buf_unmap_pfns(
|
||||
}
|
||||
}
|
||||
|
||||
static NvU32
|
||||
nv_dma_buf_get_sg_count (
|
||||
struct device *dev,
|
||||
nv_dma_buf_file_private_t *priv,
|
||||
NvU32 *max_seg_size
|
||||
)
|
||||
{
|
||||
NvU32 dma_max_seg_size, i;
|
||||
NvU32 nents = 0;
|
||||
|
||||
dma_max_seg_size = NV_ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
|
||||
if (dma_max_seg_size < PAGE_SIZE)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Calculate nents needed to allocate sg_table
|
||||
for (i = 0; i < priv->num_objects; i++)
|
||||
{
|
||||
NvU32 range_count = priv->handles[i].memArea.numRanges;
|
||||
NvU32 index;
|
||||
|
||||
for (index = 0; index < range_count; index++)
|
||||
{
|
||||
NvU64 length = priv->handles[i].memArea.pRanges[index].size;
|
||||
NvU64 count = length + dma_max_seg_size - 1;
|
||||
do_div(count, dma_max_seg_size);
|
||||
nents += count;
|
||||
}
|
||||
}
|
||||
|
||||
*max_seg_size = dma_max_seg_size;
|
||||
|
||||
return nents;
|
||||
}
|
||||
|
||||
static struct sg_table*
|
||||
nv_dma_buf_map_pages (
|
||||
struct device *dev,
|
||||
@@ -620,15 +656,11 @@ nv_dma_buf_map_pages (
|
||||
{
|
||||
struct sg_table *sgt = NULL;
|
||||
struct scatterlist *sg;
|
||||
NvU32 nents = 0;
|
||||
NvU32 i;
|
||||
NvU32 dma_max_seg_size = 0;
|
||||
NvU32 i, nents;
|
||||
int rc;
|
||||
|
||||
// Calculate nents needed to allocate sg_table
|
||||
for (i = 0; i < priv->num_objects; i++)
|
||||
{
|
||||
nents += priv->handles[i].memArea.numRanges;
|
||||
}
|
||||
nents = nv_dma_buf_get_sg_count(dev, priv, &dma_max_seg_size);
|
||||
|
||||
NV_KZALLOC(sgt, sizeof(struct sg_table));
|
||||
if (sgt == NULL)
|
||||
@@ -650,20 +682,30 @@ nv_dma_buf_map_pages (
|
||||
NvU32 index = 0;
|
||||
for (index = 0; index < range_count; index++)
|
||||
{
|
||||
NvU64 addr = priv->handles[i].memArea.pRanges[index].start;
|
||||
NvU64 len = priv->handles[i].memArea.pRanges[index].size;
|
||||
struct page *page = NV_GET_PAGE_STRUCT(addr);
|
||||
NvU64 dma_addr = priv->handles[i].memArea.pRanges[index].start;
|
||||
NvU64 dma_len = priv->handles[i].memArea.pRanges[index].size;
|
||||
|
||||
if ((page == NULL) || (sg == NULL))
|
||||
// Split each range into dma_max_seg_size chunks
|
||||
while(dma_len != 0)
|
||||
{
|
||||
goto free_table;
|
||||
}
|
||||
NvU32 sg_len = NV_MIN(dma_len, dma_max_seg_size);
|
||||
struct page *page = NV_GET_PAGE_STRUCT(dma_addr);
|
||||
|
||||
sg_set_page(sg, page, len, NV_GET_OFFSET_IN_PAGE(addr));
|
||||
sg = sg_next(sg);
|
||||
if ((page == NULL) || (sg == NULL))
|
||||
{
|
||||
goto free_table;
|
||||
}
|
||||
|
||||
sg_set_page(sg, page, sg_len, NV_GET_OFFSET_IN_PAGE(dma_addr));
|
||||
dma_addr += sg_len;
|
||||
dma_len -= sg_len;
|
||||
sg = sg_next(sg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
WARN_ON(sg != NULL);
|
||||
|
||||
// DMA map the sg_table
|
||||
rc = dma_map_sg(dev, sgt->sgl, sgt->orig_nents, DMA_BIDIRECTIONAL);
|
||||
if (rc <= 0)
|
||||
@@ -693,36 +735,16 @@ nv_dma_buf_map_pfns (
|
||||
struct sg_table *sgt = NULL;
|
||||
struct scatterlist *sg;
|
||||
nv_dma_device_t peer_dma_dev = {{ 0 }};
|
||||
NvU32 dma_max_seg_size;
|
||||
NvU32 nents = 0;
|
||||
NvU32 dma_max_seg_size = 0;
|
||||
NvU32 mapped_nents = 0;
|
||||
NvU32 i = 0;
|
||||
NvU32 nents;
|
||||
int rc = 0;
|
||||
|
||||
peer_dma_dev.dev = dev;
|
||||
peer_dma_dev.addressable_range.limit = (NvU64)dev->dma_mask;
|
||||
|
||||
dma_max_seg_size = NV_ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
|
||||
|
||||
if (dma_max_seg_size < PAGE_SIZE)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Calculate nents needed to allocate sg_table
|
||||
for (i = 0; i < priv->num_objects; i++)
|
||||
{
|
||||
NvU32 range_count = priv->handles[i].memArea.numRanges;
|
||||
NvU32 index;
|
||||
|
||||
for (index = 0; index < range_count; index++)
|
||||
{
|
||||
NvU64 length = priv->handles[i].memArea.pRanges[index].size;
|
||||
NvU64 count = length + dma_max_seg_size - 1;
|
||||
do_div(count, dma_max_seg_size);
|
||||
nents += count;
|
||||
}
|
||||
}
|
||||
nents = nv_dma_buf_get_sg_count(dev, priv, &dma_max_seg_size);
|
||||
|
||||
NV_KZALLOC(sgt, sizeof(struct sg_table));
|
||||
if (sgt == NULL)
|
||||
@@ -777,6 +799,9 @@ nv_dma_buf_map_pfns (
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
WARN_ON(sg != NULL);
|
||||
|
||||
sgt->nents = mapped_nents;
|
||||
|
||||
WARN_ON(sgt->nents != sgt->orig_nents);
|
||||
|
||||
@@ -445,7 +445,9 @@ static int nvidia_mmap_sysmem(
|
||||
}
|
||||
else
|
||||
{
|
||||
vma->vm_page_prot = nv_adjust_pgprot(vma->vm_page_prot, 0);
|
||||
if (at->flags.unencrypted)
|
||||
vma->vm_page_prot = nv_adjust_pgprot(vma->vm_page_prot, 0);
|
||||
|
||||
ret = vm_insert_page(vma, start,
|
||||
NV_GET_PAGE_STRUCT(at->page_table[j]->phys_addr));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -308,6 +308,15 @@ static NvU32 find_gpu_numa_nodes_in_srat(nv_linux_state_t *nvl)
|
||||
struct acpi_srat_generic_affinity *gi;
|
||||
NvU32 numa_node = NUMA_NO_NODE;
|
||||
|
||||
if (NV_PCI_DEVFN(nvl->pci_dev) != 0)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: Failing to parse SRAT GI for %04x:%02x:%02x.%x "
|
||||
"since non-zero device function is not supported.\n",
|
||||
NV_PCI_DOMAIN_NUMBER(nvl->pci_dev), NV_PCI_BUS_NUMBER(nvl->pci_dev),
|
||||
NV_PCI_SLOT_NUMBER(nvl->pci_dev), PCI_FUNC(nvl->pci_dev->devfn));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (acpi_get_table(ACPI_SIG_SRAT, 0, &table_header)) {
|
||||
nv_printf(NV_DBG_INFO, "NVRM: Failed to parse the SRAT table.\n");
|
||||
return 0;
|
||||
@@ -331,9 +340,14 @@ static NvU32 find_gpu_numa_nodes_in_srat(nv_linux_state_t *nvl)
|
||||
(((unsigned long)subtable_header) + subtable_header_length < table_end)) {
|
||||
|
||||
if (subtable_header->type == ACPI_SRAT_TYPE_GENERIC_AFFINITY) {
|
||||
NvU8 busAtByte2, busAtByte3;
|
||||
gi = (struct acpi_srat_generic_affinity *) subtable_header;
|
||||
busAtByte2 = gi->device_handle[2];
|
||||
busAtByte3 = gi->device_handle[3];
|
||||
|
||||
// Device and function should be zero enforced by above check
|
||||
gi_dbdf = *((NvU16 *)(&gi->device_handle[0])) << 16 |
|
||||
*((NvU16 *)(&gi->device_handle[2]));
|
||||
(busAtByte2 != 0 ? busAtByte2 : busAtByte3) << 8;
|
||||
|
||||
if (gi_dbdf == dev_dbdf) {
|
||||
numa_node = pxm_to_node(gi->proximity_domain);
|
||||
@@ -347,6 +361,31 @@ static NvU32 find_gpu_numa_nodes_in_srat(nv_linux_state_t *nvl)
|
||||
pxm_count = 0;
|
||||
goto exit;
|
||||
}
|
||||
nv_printf(NV_DBG_INFO,
|
||||
"NVRM: matching SRAT GI entry: 0x%x 0x%x 0x%x 0x%x PXM: %d\n",
|
||||
gi->device_handle[3],
|
||||
gi->device_handle[2],
|
||||
gi->device_handle[1],
|
||||
gi->device_handle[0],
|
||||
gi->proximity_domain);
|
||||
if ((busAtByte2) == 0 &&
|
||||
(busAtByte3) != 0)
|
||||
{
|
||||
/*
|
||||
* TODO: Remove this WAR once Hypervisor stack is updated
|
||||
* to fix this bug and after all CSPs have moved to using
|
||||
* the updated Hypervisor stack with fix.
|
||||
*/
|
||||
nv_printf(NV_DBG_WARNINGS,
|
||||
"NVRM: PCIe bus value picked from byte 3 offset in SRAT GI entry: 0x%x 0x%x 0x%x 0x%x PXM: %d\n"
|
||||
"NVRM: Hypervisor stack is old and not following ACPI spec defined offset.\n"
|
||||
"NVRM: Please consider upgrading the Hypervisor stack as this workaround will be removed in future release.\n",
|
||||
gi->device_handle[3],
|
||||
gi->device_handle[2],
|
||||
gi->device_handle[1],
|
||||
gi->device_handle[0],
|
||||
gi->proximity_domain);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -792,7 +831,10 @@ next_bar:
|
||||
NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_DISABLED);
|
||||
nvl->numa_info.node_id = NUMA_NO_NODE;
|
||||
|
||||
nv_init_coherent_link_info(nv);
|
||||
if (pci_devid_is_self_hosted(pci_dev->device))
|
||||
{
|
||||
nv_init_coherent_link_info(nv);
|
||||
}
|
||||
|
||||
#if defined(NVCPU_PPC64LE)
|
||||
// Use HW NUMA support as a proxy for ATS support. This is true in the only
|
||||
|
||||
@@ -1630,17 +1630,25 @@ static void nv_init_mapping_revocation(nv_linux_state_t *nvl,
|
||||
nv_linux_file_private_t *nvlfp,
|
||||
struct inode *inode)
|
||||
{
|
||||
down(&nvl->mmap_lock);
|
||||
|
||||
/* Set up struct address_space for use with unmap_mapping_range() */
|
||||
address_space_init_once(&nvlfp->mapping);
|
||||
nvlfp->mapping.host = inode;
|
||||
nvlfp->mapping.a_ops = inode->i_mapping->a_ops;
|
||||
file->f_mapping = &nvlfp->mapping;
|
||||
}
|
||||
|
||||
/* Add nvlfp to list of open files in nvl for mapping revocation */
|
||||
/* Adds nvlfp to list of open files for mapping revocation */
|
||||
static void nv_add_open_file(nv_linux_state_t *nvl,
|
||||
nv_linux_file_private_t *nvlfp)
|
||||
{
|
||||
nvlfp->nvptr = nvl;
|
||||
|
||||
/*
|
||||
* nvl->open_files and other mapping revocation members in nv_linux_state_t
|
||||
* are protected by nvl->mmap_lock instead of nvl->ldata_lock.
|
||||
*/
|
||||
down(&nvl->mmap_lock);
|
||||
list_add(&nvlfp->entry, &nvl->open_files);
|
||||
|
||||
up(&nvl->mmap_lock);
|
||||
}
|
||||
|
||||
@@ -1690,11 +1698,12 @@ static void nvidia_open_deferred(void *nvlfp_raw)
|
||||
*/
|
||||
down(&nvl->ldata_lock);
|
||||
rc = nv_open_device_for_nvlfp(NV_STATE_PTR(nvl), nvlfp->sp, nvlfp);
|
||||
up(&nvl->ldata_lock);
|
||||
|
||||
/* Set nvptr only upon success (where nvl->usage_count is incremented) */
|
||||
/* Only add open file tracking where nvl->usage_count is incremented */
|
||||
if (rc == 0)
|
||||
nvlfp->nvptr = nvl;
|
||||
nv_add_open_file(nvl, nvlfp);
|
||||
|
||||
up(&nvl->ldata_lock);
|
||||
|
||||
complete_all(&nvlfp->open_complete);
|
||||
}
|
||||
@@ -1813,6 +1822,7 @@ nvidia_open(
|
||||
}
|
||||
|
||||
nv = NV_STATE_PTR(nvl);
|
||||
nv_init_mapping_revocation(nvl, file, nvlfp, inode);
|
||||
|
||||
if (nv_try_lock_foreground_open(file, nvl) == 0)
|
||||
{
|
||||
@@ -1823,11 +1833,11 @@ nvidia_open(
|
||||
|
||||
rc = nv_open_device_for_nvlfp(nv, nvlfp->sp, nvlfp);
|
||||
|
||||
up(&nvl->ldata_lock);
|
||||
|
||||
/* Set nvptr only upon success (where nvl->usage_count is incremented) */
|
||||
/* Only add open file tracking where nvl->usage_count is incremented */
|
||||
if (rc == 0)
|
||||
nvlfp->nvptr = nvl;
|
||||
nv_add_open_file(nvl, nvlfp);
|
||||
|
||||
up(&nvl->ldata_lock);
|
||||
|
||||
complete_all(&nvlfp->open_complete);
|
||||
}
|
||||
@@ -1882,10 +1892,6 @@ failed:
|
||||
NV_SET_FILE_PRIVATE(file, NULL);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nv_init_mapping_revocation(nvl, file, nvlfp, inode);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -1672,7 +1672,7 @@ NV_STATUS NV_API_CALL os_alloc_pages_node
|
||||
NV_STATUS status = NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
#if defined(__GFP_THISNODE) && defined(GFP_HIGHUSER_MOVABLE) && \
|
||||
defined(__GFP_COMP) && defined(__GFP_NORETRY) && defined(__GFP_NOWARN)
|
||||
defined(__GFP_COMP) && defined(__GFP_NOWARN)
|
||||
gfp_t gfp_mask;
|
||||
struct page *alloc_addr;
|
||||
unsigned int order = get_order(size);
|
||||
@@ -1689,13 +1689,11 @@ NV_STATUS NV_API_CALL os_alloc_pages_node
|
||||
* pages, which is needed in order to use
|
||||
* vm_insert_page API.
|
||||
*
|
||||
* 4. __GFP_NORETRY: Used to avoid the Linux kernel OOM killer.
|
||||
*
|
||||
* 5. __GFP_NOWARN: Used to avoid a WARN_ON in the slowpath if
|
||||
* 4. __GFP_NOWARN: Used to avoid a WARN_ON in the slowpath if
|
||||
* the requested order is too large (just fail
|
||||
* instead).
|
||||
*
|
||||
* 6. (Optional) __GFP_RECLAIM: Used to allow/forbid reclaim.
|
||||
* 5. (Optional) __GFP_RECLAIM: Used to allow/forbid reclaim.
|
||||
* This is part of GFP_USER and consequently
|
||||
* GFP_HIGHUSER_MOVABLE.
|
||||
*
|
||||
@@ -1709,7 +1707,30 @@ NV_STATUS NV_API_CALL os_alloc_pages_node
|
||||
*/
|
||||
|
||||
gfp_mask = __GFP_THISNODE | GFP_HIGHUSER_MOVABLE | __GFP_COMP |
|
||||
__GFP_NORETRY | __GFP_NOWARN;
|
||||
__GFP_NOWARN;
|
||||
|
||||
#if defined(__GFP_RETRY_MAYFAIL)
|
||||
|
||||
/*
|
||||
* __GFP_RETRY_MAYFAIL : Used to avoid the Linux kernel OOM killer.
|
||||
* To help PMA on paths where UVM might be
|
||||
* in memory over subscription. This gives UVM
|
||||
* a chance to free memory before invoking any
|
||||
* action from the OOM killer.
|
||||
* Freeing non-essential memory will also benefit
|
||||
* the system as a whole.
|
||||
*/
|
||||
|
||||
gfp_mask |= __GFP_RETRY_MAYFAIL;
|
||||
#elif defined(__GFP_NORETRY)
|
||||
|
||||
/*
|
||||
* __GFP_NORETRY : Use __GFP_NORETRY on older kernels where
|
||||
* __GFP_RETRY_MAYFAIL is not present.
|
||||
*/
|
||||
|
||||
gfp_mask |= __GFP_NORETRY;
|
||||
#endif
|
||||
|
||||
#if defined(__GFP_RECLAIM)
|
||||
if (flag & NV_ALLOC_PAGES_NODE_SKIP_RECLAIM)
|
||||
|
||||
Reference in New Issue
Block a user