This commit is contained in:
Andy Ritger
2024-11-24 15:32:26 -08:00
parent 7d3cbfe254
commit 7a6a5a1f9a
42 changed files with 331 additions and 215 deletions

View File

@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.127.08\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.135\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)

View File

@@ -52,6 +52,22 @@ else
endif
endif
# If CC hasn't been set explicitly, check the value of CONFIG_CC_VERSION_TEXT.
# Look for the compiler specified there, and use it by default, if found.
ifeq ($(origin CC),default)
cc_version_text=$(firstword $(shell . $(KERNEL_OUTPUT)/.config; \
echo "$$CONFIG_CC_VERSION_TEXT"))
ifneq ($(cc_version_text),)
ifeq ($(shell command -v $(cc_version_text)),)
$(warning WARNING: Unable to locate the compiler $(cc_version_text) \
from CONFIG_CC_VERSION_TEXT in the kernel configuration.)
else
CC=$(cc_version_text)
endif
endif
endif
CC ?= cc
LD ?= ld
OBJDUMP ?= objdump
@@ -65,6 +81,16 @@ else
)
endif
KERNEL_ARCH = $(ARCH)
ifneq ($(filter $(ARCH),i386 x86_64),)
KERNEL_ARCH = x86
else
ifeq ($(filter $(ARCH),arm64 powerpc),)
$(error Unsupported architecture $(ARCH))
endif
endif
NV_KERNEL_MODULES ?= $(wildcard nvidia nvidia-uvm nvidia-vgpu-vfio nvidia-modeset nvidia-drm nvidia-peermem)
NV_KERNEL_MODULES := $(filter-out $(NV_EXCLUDE_KERNEL_MODULES), \
$(NV_KERNEL_MODULES))
@@ -106,8 +132,9 @@ else
# module symbols on which the Linux kernel's module resolution is dependent
# and hence must be used whenever present.
LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \
$(KERNEL_SOURCES)/arch/$(ARCH)/kernel/module.lds \
LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \
$(KERNEL_SOURCES)/arch/$(KERNEL_ARCH)/kernel/module.lds \
$(KERNEL_OUTPUT)/arch/$(KERNEL_ARCH)/module.lds \
$(KERNEL_OUTPUT)/scripts/module.lds
NV_MODULE_COMMON_SCRIPTS := $(foreach s, $(wildcard $(LD_SCRIPT)), -T $(s))

View File

@@ -2523,6 +2523,22 @@ compile_test() {
fi
;;
file_operations_fop_unsigned_offset_present)
#
# Determine if the FOP_UNSIGNED_OFFSET define is present.
#
# Added by commit 641bb4394f40 ("fs: move FMODE_UNSIGNED_OFFSET to
# fop_flags") in v6.12.
#
CODE="
#include <linux/fs.h>
int conftest_file_operations_fop_unsigned_offset_present(void) {
return FOP_UNSIGNED_OFFSET;
}"
compile_check_conftest "$CODE" "NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT" "" "types"
;;
mm_context_t)
#
# Determine if the 'mm_context_t' data type is present
@@ -6896,6 +6912,22 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT" "" "types"
;;
folio_test_swapcache)
#
# Determine if the folio_test_swapcache() function is present.
#
# folio_test_swapcache() was exported by commit d389a4a811551 ("mm:
# Add folio flag manipulation functions") in v5.16.
#
CODE="
#include <linux/page-flags.h>
void conftest_folio_test_swapcache(void) {
folio_test_swapcache();
}"
compile_check_conftest "$CODE" "NV_FOLIO_TEST_SWAPCACHE_PRESENT" "" "functions"
;;
# When adding a new conftest entry, please use the correct format for
# specifying the relevant upstream Linux kernel commit. Please
# avoid specifying -rc kernels, and only use SHAs that actually exist

View File

@@ -1553,6 +1553,10 @@ static const struct file_operations nv_drm_fops = {
.read = drm_read,
.llseek = noop_llseek,
#if defined(NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT)
.fop_flags = FOP_UNSIGNED_OFFSET,
#endif
};
static const struct drm_ioctl_desc nv_drm_ioctls[] = {

View File

@@ -131,3 +131,4 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffe
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present

View File

@@ -8,7 +8,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
NVIDIA_UVM_SOURCES += nvidia-uvm/nvstatus.c
NVIDIA_UVM_SOURCES += nvidia-uvm/nvCpuUuid.c
NVIDIA_UVM_SOURCES += nvidia-uvm/nv-kthread-q.c

View File

@@ -13,19 +13,6 @@ NVIDIA_UVM_OBJECTS =
include $(src)/nvidia-uvm/nvidia-uvm-sources.Kbuild
NVIDIA_UVM_OBJECTS += $(patsubst %.c,%.o,$(NVIDIA_UVM_SOURCES))
# Some linux kernel functions rely on being built with optimizations on and
# to work around this we put wrappers for them in a separate file that's built
# with optimizations on in debug builds and skipped in other builds.
# Notably gcc 4.4 supports per function optimization attributes that would be
# easier to use, but is too recent to rely on for now.
NVIDIA_UVM_DEBUG_OPTIMIZED_SOURCE := nvidia-uvm/uvm_debug_optimized.c
NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT := $(patsubst %.c,%.o,$(NVIDIA_UVM_DEBUG_OPTIMIZED_SOURCE))
ifneq ($(UVM_BUILD_TYPE),debug)
# Only build the wrappers on debug builds
NVIDIA_UVM_OBJECTS := $(filter-out $(NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT), $(NVIDIA_UVM_OBJECTS))
endif
obj-m += nvidia-uvm.o
nvidia-uvm-y := $(NVIDIA_UVM_OBJECTS)
@@ -36,15 +23,14 @@ NVIDIA_UVM_KO = nvidia-uvm/nvidia-uvm.ko
#
ifeq ($(UVM_BUILD_TYPE),debug)
NVIDIA_UVM_CFLAGS += -DDEBUG -O1 -g
else
ifeq ($(UVM_BUILD_TYPE),develop)
# -DDEBUG is required, in order to allow pr_devel() print statements to
# work:
NVIDIA_UVM_CFLAGS += -DDEBUG
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_DEVELOP
endif
NVIDIA_UVM_CFLAGS += -O2
NVIDIA_UVM_CFLAGS += -DDEBUG -g
endif
ifeq ($(UVM_BUILD_TYPE),develop)
# -DDEBUG is required, in order to allow pr_devel() print statements to
# work:
NVIDIA_UVM_CFLAGS += -DDEBUG
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_DEVELOP
endif
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_ENABLED
@@ -56,11 +42,6 @@ NVIDIA_UVM_CFLAGS += -I$(src)/nvidia-uvm
$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_OBJECTS), $(NVIDIA_UVM_CFLAGS))
ifeq ($(UVM_BUILD_TYPE),debug)
# Force optimizations on for the wrappers
$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT), $(NVIDIA_UVM_CFLAGS) -O2)
endif
#
# Register the conftests needed by nvidia-uvm.ko
#
@@ -88,6 +69,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_is_dma_domain
NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t

View File

@@ -127,9 +127,9 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
goto err;
}
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
UVM_FD_UNINITIALIZED,
UVM_FD_INITIALIZING);
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
UVM_FD_UNINITIALIZED,
UVM_FD_INITIALIZING);
old_fd_type &= UVM_FD_TYPE_MASK;
if (old_fd_type != UVM_FD_UNINITIALIZED) {
status = NV_ERR_IN_USE;
@@ -914,8 +914,9 @@ static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *
// attempt to be made. This is safe because other threads will have only had
// a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
// case.
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
UVM_FD_UNINITIALIZED,
UVM_FD_INITIALIZING);
old_fd_type &= UVM_FD_TYPE_MASK;
if (old_fd_type == UVM_FD_UNINITIALIZED) {
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);

View File

@@ -47,7 +47,7 @@
{ \
params_type params; \
BUILD_BUG_ON(sizeof(params) > UVM_MAX_IOCTL_PARAM_STACK_SIZE); \
if (nv_copy_from_user(&params, (void __user*)arg, sizeof(params))) \
if (copy_from_user(&params, (void __user*)arg, sizeof(params))) \
return -EFAULT; \
\
params.rmStatus = uvm_global_get_status(); \
@@ -60,7 +60,7 @@
params.rmStatus = function_name(&params, filp); \
} \
\
if (nv_copy_to_user((void __user*)arg, &params, sizeof(params))) \
if (copy_to_user((void __user*)arg, &params, sizeof(params))) \
return -EFAULT; \
\
return 0; \
@@ -84,7 +84,7 @@
if (!params) \
return -ENOMEM; \
BUILD_BUG_ON(sizeof(*params) <= UVM_MAX_IOCTL_PARAM_STACK_SIZE); \
if (nv_copy_from_user(params, (void __user*)arg, sizeof(*params))) { \
if (copy_from_user(params, (void __user*)arg, sizeof(*params))) { \
uvm_kvfree(params); \
return -EFAULT; \
} \
@@ -99,7 +99,7 @@
params->rmStatus = function_name(params, filp); \
} \
\
if (nv_copy_to_user((void __user*)arg, params, sizeof(*params))) \
if (copy_to_user((void __user*)arg, params, sizeof(*params))) \
ret = -EFAULT; \
\
uvm_kvfree(params); \

View File

@@ -1,53 +0,0 @@
/*******************************************************************************
Copyright (c) 2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
// This file provides simple wrappers that are always built with optimizations
// turned on to WAR issues with functions that don't build correctly otherwise.
#include "uvm_linux.h"
int nv_atomic_xchg(atomic_t *val, int new)
{
return atomic_xchg(val, new);
}
int nv_atomic_cmpxchg(atomic_t *val, int old, int new)
{
return atomic_cmpxchg(val, old, new);
}
long nv_atomic_long_cmpxchg(atomic_long_t *val, long old, long new)
{
return atomic_long_cmpxchg(val, old, new);
}
unsigned long nv_copy_from_user(void *to, const void __user *from, unsigned long n)
{
return copy_from_user(to, from, n);
}
unsigned long nv_copy_to_user(void __user *to, const void *from, unsigned long n)
{
return copy_to_user(to, from, n);
}

View File

@@ -412,7 +412,7 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
UVM_ASSERT(error != NV_OK);
previous_error = nv_atomic_cmpxchg(&g_uvm_global.fatal_error, NV_OK, error);
previous_error = atomic_cmpxchg(&g_uvm_global.fatal_error, NV_OK, error);
if (previous_error == NV_OK) {
UVM_ERR_PRINT("Encountered a global fatal error: %s\n", nvstatusToString(error));
@@ -430,7 +430,7 @@ NV_STATUS uvm_global_reset_fatal_error(void)
return NV_ERR_INVALID_STATE;
}
return nv_atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
return atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
}
void uvm_global_gpu_retain(const uvm_processor_mask_t *mask)

View File

@@ -73,6 +73,24 @@ module_param(uvm_disable_hmm, bool, 0444);
#include "uvm_va_policy.h"
#include "uvm_tools.h"
// The function nv_PageSwapCache() wraps the check for page swap cache flag in
// order to support a wide variety of kernel versions.
// The function PageSwapCache() is removed after 32f51ead3d77 ("mm: remove
// PageSwapCache") in v6.12-rc1.
// The function folio_test_swapcache() was added in Linux 5.16 (d389a4a811551
// "mm: Add folio flag manipulation functions")
// Systems with HMM patches backported to 5.14 are possible, but those systems
// do not include folio_test_swapcache()
// TODO: Bug 4050579: Remove this when migration of swap cached pages is updated
static __always_inline bool nv_PageSwapCache(struct page *page)
{
#if defined(NV_FOLIO_TEST_SWAPCACHE_PRESENT)
return folio_test_swapcache(page_folio(page));
#else
return PageSwapCache(page);
#endif
}
static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
struct page *page);
@@ -166,7 +184,7 @@ static void hmm_copy_devmem_page(struct page *dst_page, struct page *src_page)
gpu = uvm_gpu_chunk_get_gpu(gpu_chunk);
status = uvm_mmu_chunk_map(gpu_chunk);
if (status != NV_OK)
goto out_zero;
goto out;
status = uvm_parent_gpu_map_cpu_pages(gpu->parent, dst_page, PAGE_SIZE, &dma_addr);
if (status != NV_OK)
@@ -197,7 +215,7 @@ out_unmap_cpu:
out_unmap_gpu:
uvm_mmu_chunk_unmap(gpu_chunk, NULL);
out_zero:
out:
// We can't fail eviction because we need to free the device-private pages
// so the GPU can be unregistered. So the best we can do is warn on any
// failures and zero the uninitialised page. This could result in data loss
@@ -227,6 +245,7 @@ static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
}
lock_page(dst_page);
hmm_copy_devmem_page(dst_page, migrate_pfn_to_page(src_pfn));
dst_pfn = migrate_pfn(page_to_pfn(dst_page));
migrate_device_pages(&src_pfn, &dst_pfn, 1);
@@ -2698,7 +2717,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
continue;
}
if (PageSwapCache(src_page)) {
if (nv_PageSwapCache(src_page)) {
// TODO: Bug 4050579: Remove this when swap cached pages can be
// migrated.
status = NV_WARN_MISMATCHED_TARGET;

View File

@@ -228,47 +228,6 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
#define NV_UVM_GFP_FLAGS (GFP_KERNEL)
// Develop builds define DEBUG but enable optimization
#if defined(DEBUG) && !defined(NVIDIA_UVM_DEVELOP)
// Wrappers for functions not building correctly without optimizations on,
// implemented in uvm_debug_optimized.c. Notably the file is only built for
// debug builds, not develop or release builds.
// Unoptimized builds of atomic_xchg() hit a BUILD_BUG() on arm64 as it relies
// on __xchg being completely inlined:
// /usr/src/linux-3.12.19/arch/arm64/include/asm/cmpxchg.h:67:3: note: in expansion of macro 'BUILD_BUG'
//
// Powerppc hits a similar issue, but ends up with an undefined symbol:
// WARNING: "__xchg_called_with_bad_pointer" [...] undefined!
int nv_atomic_xchg(atomic_t *val, int new);
// Same problem as atomic_xchg() on powerppc:
// WARNING: "__cmpxchg_called_with_bad_pointer" [...] undefined!
int nv_atomic_cmpxchg(atomic_t *val, int old, int new);
// Same problem as atomic_xchg() on powerppc:
// WARNING: "__cmpxchg_called_with_bad_pointer" [...] undefined!
long nv_atomic_long_cmpxchg(atomic_long_t *val, long old, long new);
// This Linux kernel commit:
// 2016-08-30 0d025d271e55f3de21f0aaaf54b42d20404d2b23
// leads to build failures on x86_64, when compiling without optimization. Avoid
// that problem, by providing our own builds of copy_from_user / copy_to_user,
// for debug (non-optimized) UVM builds. Those are accessed via these
// nv_copy_to/from_user wrapper functions.
//
// Bug 1849583 has further details.
unsigned long nv_copy_from_user(void *to, const void __user *from, unsigned long n);
unsigned long nv_copy_to_user(void __user *to, const void *from, unsigned long n);
#else
#define nv_atomic_xchg atomic_xchg
#define nv_atomic_cmpxchg atomic_cmpxchg
#define nv_atomic_long_cmpxchg atomic_long_cmpxchg
#define nv_copy_to_user copy_to_user
#define nv_copy_from_user copy_from_user
#endif
#ifndef NV_ALIGN_DOWN
#define NV_ALIGN_DOWN(v,g) ((v) & ~((g) - 1))
#endif

View File

@@ -2206,7 +2206,7 @@ NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *
goto fail;
}
if (nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data, 0, (long)event_tracker) != 0) {
if (atomic_long_cmpxchg((atomic_long_t *)&filp->private_data, 0, (long)event_tracker) != 0) {
status = NV_ERR_INVALID_ARGUMENT;
goto fail;
}
@@ -2577,7 +2577,7 @@ static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space,
bool map_stage_mem_on_gpus = true;
if (is_write) {
NvU64 remaining = nv_copy_from_user(stage_addr, user_va_start, bytes_now);
NvU64 remaining = copy_from_user(stage_addr, user_va_start, bytes_now);
if (remaining != 0) {
status = NV_ERR_INVALID_ARGUMENT;
goto exit;
@@ -2660,7 +2660,7 @@ static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space,
// point where the data is copied out.
nv_speculation_barrier();
remaining = nv_copy_to_user(user_va_start, stage_addr, bytes_now);
remaining = copy_to_user(user_va_start, stage_addr, bytes_now);
if (remaining > 0) {
status = NV_ERR_INVALID_ARGUMENT;
goto exit;
@@ -2808,7 +2808,7 @@ NV_STATUS uvm_api_tools_get_processor_uuid_table(UVM_TOOLS_GET_PROCESSOR_UUID_TA
uvm_va_space_up_read(va_space);
if (params->tablePtr)
remaining = nv_copy_to_user((void *)params->tablePtr, uuids, sizeof(NvProcessorUuid) * count);
remaining = copy_to_user((void *)params->tablePtr, uuids, sizeof(NvProcessorUuid) * count);
else
remaining = 0;
uvm_kvfree(uuids);

View File

@@ -1281,9 +1281,9 @@ NV_STATUS uvm_va_range_block_create(uvm_va_range_t *va_range, size_t index, uvm_
return status;
// Try to insert it
old = (uvm_va_block_t *)nv_atomic_long_cmpxchg(&va_range->blocks[index],
(long)NULL,
(long)block);
old = (uvm_va_block_t *)atomic_long_cmpxchg(&va_range->blocks[index],
(long)NULL,
(long)block);
if (old) {
// Someone else beat us on the insert
uvm_va_block_release(block);

View File

@@ -588,7 +588,7 @@ NV_STATUS uvm_test_va_space_mm_or_current_retain(UVM_TEST_VA_SPACE_MM_OR_CURRENT
if (params->retain_done_ptr) {
NvU64 flag = true;
if (nv_copy_to_user((void __user *)params->retain_done_ptr, &flag, sizeof(flag)))
if (copy_to_user((void __user *)params->retain_done_ptr, &flag, sizeof(flag)))
status = NV_ERR_INVALID_ARGUMENT;
}