595.71.05

This commit is contained in:
Andy Ritger
2026-04-28 08:55:30 -07:00
parent db0c4e65c8
commit 51edebee79
47 changed files with 48117 additions and 47816 deletions

View File

@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
ccflags-y += -I$(src)
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
ccflags-y += -DNV_VERSION_STRING=\"595.58.03\"
ccflags-y += -DNV_VERSION_STRING=\"595.71.05\"
# Include and link Tegra out-of-tree modules.
ifneq ($(wildcard /usr/src/nvidia/nvidia-public),)

View File

@@ -71,31 +71,6 @@ else
CC ?= cc
LD ?= ld
OBJDUMP ?= objdump
AWK ?= awk
# Bake the following awk program in a string. The program is needed to add C++
# to the languages excluded from BTF generation.
#
# Also, unconditionally return success (0) from the awk program, rather than
# propagating pahole's return status (with 'exit system(pahole_cmd)'), to
# workaround an DW_TAG_rvalue_reference_type error in
# kernel/nvidia-modeset.ko.
#
# BEGIN {
# pahole_cmd = "pahole"
# for (i = 1; i < ARGC; i++) {
# if (ARGV[i] ~ /--lang_exclude=/) {
# pahole_cmd = pahole_cmd sprintf(" %s,c++", ARGV[i])
# } else {
# pahole_cmd = pahole_cmd sprintf(" %s", ARGV[i])
# }
# }
# system(pahole_cmd)
# }
PAHOLE_AWK_PROGRAM = BEGIN { pahole_cmd = \"pahole\"; for (i = 1; i < ARGC; i++) { if (ARGV[i] ~ /--lang_exclude=/) { pahole_cmd = pahole_cmd sprintf(\" %s,c++\", ARGV[i]); } else { pahole_cmd = pahole_cmd sprintf(\" %s\", ARGV[i]); } } system(pahole_cmd); }
# If scripts/pahole-flags.sh is not present in the kernel tree, add PAHOLE and
# PAHOLE_AWK_PROGRAM assignments to PAHOLE_VARIABLES; otherwise assign the
# empty string to PAHOLE_VARIABLES.
PAHOLE_VARIABLES=$(if $(wildcard $(KERNEL_SOURCES)/scripts/pahole-flags.sh),,"PAHOLE=$(AWK) '$(PAHOLE_AWK_PROGRAM)'")
ifndef ARCH
ARCH := $(shell uname -m | sed -e 's/i.86/i386/' \
@@ -133,10 +108,12 @@ else
KBUILD_PARAMS += INSTALL_MOD_DIR="$(INSTALL_MOD_DIR)"
KBUILD_PARAMS += NV_SPECTRE_V2=$(SPECTRE_V2_RETPOLINE)
export ORIG_PAHOLE := $(PAHOLE)
.PHONY: modules module clean clean_conftest modules_install
modules clean modules_install:
@$(MAKE) "LD=$(LD)" "CC=$(CC)" "OBJDUMP=$(OBJDUMP)" \
$(PAHOLE_VARIABLES) $(KBUILD_PARAMS) $@
PAHOLE=$(CURDIR)/pahole.sh $(KBUILD_PARAMS) $@
@if [ "$@" = "modules" ]; then \
for module in $(NV_KERNEL_MODULES); do \
if [ -x split-object-file.sh ]; then \

View File

@@ -1726,7 +1726,69 @@ typedef enum
#include <linux/reset.h>
#include <linux/dma-buf.h>
#include <linux/gpio.h>
#if defined(NV_LINUX_OF_GPIO_H_PRESENT)
#include <linux/of_gpio.h>
#else
#include <linux/gpio/driver.h>
/*
* of_get_named_gpio() was removed along with linux/of_gpio.h by commit
* 51aaf65bbd21 ("gpio: of: Remove <linux/of_gpio.h>"). Provide a compat
* implementation using the remaining public GPIO APIs.
*/
static inline int of_get_named_gpio(const struct device_node *np,
const char *propname, int index)
{
struct of_phandle_args gpiospec;
struct gpio_device *gdev;
struct gpio_desc *desc;
int ret;
if (!np)
return -ENOENT;
ret =
of_parse_phandle_with_args_map(np, propname, "gpio", index, &gpiospec);
if (ret)
return ret;
gdev = gpio_device_find_by_fwnode(of_fwnode_handle(gpiospec.np));
of_node_put(gpiospec.np);
if (!gdev)
return -EPROBE_DEFER;
/*
* Use the chip's of_xlate callback to translate the DT GPIO
* specifier into a linear offset. Tegra GPIO controllers encode
* port and pin in args[0] and of_xlate sums per-port pin counts
* to produce the real offset.
*/
{
struct gpio_chip *chip = gpio_device_get_chip(gdev);
int hwgpio;
#if defined(CONFIG_OF_GPIO)
if (chip->of_xlate)
hwgpio = chip->of_xlate(chip, &gpiospec, NULL);
else
#endif
hwgpio = gpiospec.args[0];
if (hwgpio < 0) {
gpio_device_put(gdev);
return hwgpio;
}
desc = gpio_device_get_desc(gdev, hwgpio);
}
gpio_device_put(gdev);
if (IS_ERR(desc))
return PTR_ERR(desc);
return desc_to_gpio(desc);
}
#endif
#include <linux/of_device.h>
#include <linux/of_platform.h>

View File

@@ -363,7 +363,8 @@ check_symbol_exists() {
for KMOD in linuxkpi.ko linuxkpi_gplv2.ko drm.ko dmabuf.ko ; do
for KMODPATH in $KMODPATHS; do
if [ -e "$KMODPATH/$KMOD" ] ; then
if nm "$KMODPATH/$KMOD" | grep "$SYMBOL" >/dev/null 2>&1 ; then
# Search if this symbol is a global text symbol in nm output
if nm "$KMODPATH/$KMOD" | grep "T.*$SYMBOL" >/dev/null 2>&1 ; then
return 0
fi
fi

View File

@@ -41,6 +41,7 @@ NV_HEADER_PRESENCE_TESTS = \
linux/vfio_pci_core.h \
linux/cc_platform.h \
linux/slub_def.h \
linux/of_gpio.h \
asm/cpufeature.h \
asm/mshyperv.h \
crypto/sig.h

View File

@@ -64,3 +64,4 @@ module_exit(nv_linux_drm_exit);
MODULE_INFO(supported, "external");
MODULE_VERSION(NV_VERSION_STRING);
MODULE_DESCRIPTION("NVIDIA DRM kernel module");

View File

@@ -2229,3 +2229,4 @@ module_exit(nvkms_exit);
MODULE_INFO(supported, "external");
MODULE_VERSION(NV_VERSION_STRING);
MODULE_DESCRIPTION("NVIDIA modeset kernel module");

View File

@@ -1230,3 +1230,4 @@ module_exit(uvm_exit_entry);
MODULE_LICENSE("Dual MIT/GPL");
MODULE_INFO(supported, "external");
MODULE_VERSION(NV_VERSION_STRING);
MODULE_DESCRIPTION("NVIDIA Unified Virtual Memory kernel module");

View File

@@ -1800,6 +1800,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
// ATS faults can't be unserviceable, since unserviceable faults require
// GMMU PTEs.
UVM_ASSERT(!current_entry->is_fatal);
UVM_ASSERT(current_entry->gpu == gpu);
i++;
@@ -1849,6 +1850,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
} while (current_entry &&
(current_entry->fault_address < outer) &&
(previous_entry->gpu == current_entry->gpu) &&
(previous_entry->va_space == current_entry->va_space));
// Service the last sub-batch.

View File

@@ -1582,6 +1582,31 @@ uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
return uvm_va_block_region_from_start_end(va_block, start, end);
}
uvm_prot_t uvm_hmm_compute_mapping_prot(uvm_va_block_t *va_block,
uvm_processor_id_t processor_id,
uvm_page_index_t page_index)
{
if (!uvm_processor_mask_test(&va_block->mapped, UVM_ID_CPU))
return UVM_PROT_NONE;
if (uvm_page_mask_test(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_WRITE], page_index)) {
if (uvm_processor_mask_test(&va_block->hmm.va_space->has_native_atomics[uvm_id_value(UVM_ID_CPU)],
processor_id))
// If the CPU has write access it also has atomic access, so it's
// fine for any GPU with HW support to do atomic accesses.
return UVM_PROT_READ_WRITE_ATOMIC;
else
// Otherwise the GPU needs to fault on atomic access to ensure the
// CPU is unmapped.
return UVM_PROT_READ_WRITE;
}
if (uvm_page_mask_test(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_READ], page_index))
return UVM_PROT_READ_ONLY;
return UVM_PROT_NONE;
}
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
struct vm_area_struct *vma,
NvU64 addr)

View File

@@ -258,6 +258,14 @@ typedef struct
const uvm_va_policy_t *policy,
NvU64 address);
// Return the actual permissions allowed when mapping a page within a
// va_block on the given processor_id. This may differ from the logical
// permission if for example the kernel has the CPU pages mapped read-only
// to do copy-on-write.
uvm_prot_t uvm_hmm_compute_mapping_prot(uvm_va_block_t *va_block,
uvm_processor_id_t processor_id,
uvm_page_index_t page_index);
// Return the logical protection allowed of a HMM va_block for the page at
// the given address within the vma which must be valid. This is usually
// obtained from uvm_hmm_va_block_find_create()).
@@ -561,6 +569,13 @@ typedef struct
return (uvm_va_block_region_t){};
}
static uvm_prot_t uvm_hmm_compute_mapping_prot(uvm_va_block_t *va_block,
uvm_processor_id_t processor_id,
uvm_page_index_t page_index)
{
return UVM_PROT_NONE;
}
static uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
struct vm_area_struct *vma,
NvU64 addr)

View File

@@ -10991,6 +10991,13 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block
uvm_processor_mask_t *resident_processors = &va_block_context->scratch_processor_mask;
NvU32 resident_processors_count;
// TODO: Bug 5841902
// There are several calls to uvm_va_block_is_hmm() which need to be removed
if (uvm_va_block_is_hmm(va_block))
return uvm_hmm_compute_mapping_prot(va_block,
processor_id,
page_index);
uvm_va_block_page_resident_processors(va_block, page_index, resident_processors);
resident_processors_count = uvm_processor_mask_get_count(resident_processors);

View File

@@ -929,8 +929,12 @@ void uvm_va_range_remove_gpu_va_space(uvm_va_range_t *va_range,
gpu_va_space->gpu);
break;
case UVM_VA_RANGE_TYPE_DEVICE_P2P:
unmap_mapping_range(va_range->va_space->mapping, va_range->node.start, uvm_va_range_size(va_range), 1);
uvm_va_range_deinit_device_p2p(uvm_va_range_to_device_p2p(va_range), deferred_free_list);
// Device P2P ranges are associated with a specific GPU so destroy
// the range entirely if unregistering the associated GPU.
if (uvm_va_range_to_device_p2p(va_range)->gpu == gpu_va_space->gpu) {
unmap_mapping_range(va_range->va_space->mapping, va_range->node.start, uvm_va_range_size(va_range), 1);
uvm_va_range_deinit_device_p2p(uvm_va_range_to_device_p2p(va_range), deferred_free_list);
}
break;
default:
UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
@@ -1172,9 +1176,10 @@ void uvm_va_range_unregister_gpu(uvm_va_range_t *va_range,
va_range_unregister_gpu_semaphore_pool(uvm_va_range_to_semaphore_pool(va_range), gpu);
break;
case UVM_VA_RANGE_TYPE_DEVICE_P2P:
// All ranges should have been deinited by GPU VA space unregister,
// which should have already happened.
UVM_ASSERT(!uvm_va_range_to_device_p2p(va_range)->p2p_mem);
// All ranges for this GPU should have been deinited by GPU VA space
// unregister, which should have already happened.
if (uvm_va_range_to_device_p2p(va_range)->p2p_mem != NULL)
UVM_ASSERT(uvm_va_range_to_device_p2p(va_range)->gpu != gpu);
break;
default:
UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",

View File

@@ -129,10 +129,8 @@ nvidia_vma_access(
nv_alloc_mapping_list_node_t **pfile_mapping_list = NULL;
nv_alloc_mapping_context_t *mmap_context = NULL;
NvU64 offsInVma = addr - vma->vm_start;
NvBool bIsNuma = NV_FALSE;
int ret = -EINVAL;
bIsNuma = pfn_valid(mmap_context->access_start >> PAGE_SHIFT);
NvBool has_pages;
pageIndex = (offsInVma >> PAGE_SHIFT);
pageOffset = (offsInVma & ~PAGE_MASK);
@@ -152,6 +150,8 @@ nvidia_vma_access(
mmap_context = &(*pfile_mapping_list)->context;
has_pages = (mmap_context->num_pages != 0);
if (write && !(mmap_context->prot & NV_PROTECT_WRITEABLE))
{
ret = -EACCES;
@@ -180,7 +180,7 @@ nvidia_vma_access(
pageIndex = nv_array_index_no_speculate(pageIndex, at->num_pages);
kernel_mapping = (void *)(at->page_table[pageIndex].virt_addr + pageOffset);
}
else if (bIsNuma)
else if (has_pages)
{
struct page *pPage = NV_GET_PAGE_STRUCT(mmap_context->page_array[pageIndex]);
NvU8 *pPagePtr = (NvU8 *) page_address(pPage);
@@ -230,7 +230,7 @@ found:
memcpy(buffer, kernel_mapping, length);
#endif // defined(NVCPU_AARCH64)
if (at == NULL && !bIsNuma)
if (at == NULL && !has_pages)
{
kernel_mapping = ((char *)kernel_mapping - pageOffset);
os_unmap_kernel_space(kernel_mapping, PAGE_SIZE);

View File

@@ -104,6 +104,11 @@ NV_STATUS NV_API_CALL nv_add_mapping_context_to_file(
if (*pfile_mapping_list == NULL)
{
*pfile_mapping_list = pNewNode;
if (NV_IS_CTL_DEVICE(nv))
{
nv_alloc_t *at = (nv_alloc_t *) nvamc->alloc;
atomic64_inc(&at->usage_count);
}
}
else
{

View File

@@ -121,6 +121,7 @@ MODULE_LICENSE("Dual MIT/GPL");
MODULE_INFO(supported, "external");
MODULE_VERSION(NV_VERSION_STRING);
MODULE_DESCRIPTION("NVIDIA core GPU kernel module");
MODULE_ALIAS_CHARDEV_MAJOR(NV_MAJOR_DEVICE_NUMBER);
/*
@@ -3213,6 +3214,20 @@ nvidia_ctl_close(
rm_cleanup_file_private(sp, nv, &nvlfp->nvfp);
// Populate free list from file nodes
{
nv_alloc_mapping_list_node_t *pNode = nvlfp->file_mapping_list;
while (pNode != NULL)
{
if (pNode->context.alloc != NULL)
{
nv_alloc_t *at = pNode->context.alloc;
nv_alloc_release(nvlfp, at);
}
pNode = pNode->pNext;
}
}
if (nvlfp->free_list != NULL)
{
at = nvlfp->free_list;

66
kernel-open/pahole.sh Executable file
View File

@@ -0,0 +1,66 @@
#!/bin/sh
if [ "$KBUILD_VERBOSE" = "1" ]; then
set -x
fi
PAHOLE=${ORIG_PAHOLE:-pahole}
# Use DRY_RUN=echo to print the command instead of executing it, for debugging.
DRY_RUN=${DRY_RUN:-}
btf_encode=0
lang_exclude=
# Parse args: detect BTF encoding and existing --lang_exclude, while
# rebuilding the argument list without --lang_exclude.
args_start=0
for arg in "$@"; do
case "$arg" in
--btf_features=*encode_force* | -J | --btf_encode | --btf_encode_force)
btf_encode=1
;;
--lang_exclude=*)
lang_exclude="${arg#*=}"
# Drop this arg from the rebuilt list.
if [ $args_start -eq 0 ]; then
set --
args_start=1
fi
continue
;;
esac
if [ $args_start -eq 0 ]; then
set -- "$arg"
args_start=1
else
set -- "$@" "$arg"
fi
done
# Capture anything the kernel is already excluding and extend it with c++.
# Even if in future the kernel doesn't have to exclude other languages,
# make sure to exclude c++ when generating encoding BTF.
if [ $btf_encode -eq 1 ] || [ -n "$lang_exclude" ]; then
if [ -n "$lang_exclude" ]; then
lang_exclude="${lang_exclude},c++"
else
lang_exclude="c++"
fi
set -- --lang_exclude="$lang_exclude" "$@"
$DRY_RUN "$PAHOLE" "$@"
ret=$?
# Until pahole 1.31, return code was still an error regardless of
# --btf_encode_force / --btf_features=encode_force.
# A pure version check is not possible as it's likely that pahole
# from distros will have fixes backported. Just ignore the error for now.
if [ $ret -ne 0 ]; then
echo "pahole.sh: warning: pahole exited with status $ret (ignored)" >&2
fi
exit 0
fi
# Do not override options or exit status, as this may interfere with the kernel
# build system when determining pahole version and options.
$DRY_RUN exec "$PAHOLE" "$@"