595.44.06

2026-05-13 17:26:11 +00:00 · 2026-05-01 12:47:07 -07:00
parent b04fce9aeb
commit 07dedd6f18
49 changed files with 48117 additions and 47832 deletions
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
 ccflags-y += -I$(src)
 ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
 ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
-ccflags-y += -DNV_VERSION_STRING=\"595.44.05\"
+ccflags-y += -DNV_VERSION_STRING=\"595.44.06\"

 # Include and link Tegra out-of-tree modules.
 ifneq ($(wildcard /usr/src/nvidia/nvidia-public),)
--- a/kernel-open/Makefile
+++ b/kernel-open/Makefile
@@ -71,31 +71,6 @@ else
  CC ?= cc
  LD ?= ld
  OBJDUMP ?= objdump
-  AWK ?= awk
-  # Bake the following awk program in a string. The program is needed to add C++
-  # to the languages excluded from BTF generation.
-  #
-  # Also, unconditionally return success (0) from the awk program, rather than
-  # propagating pahole's return status (with 'exit system(pahole_cmd)'), to
-  # workaround an DW_TAG_rvalue_reference_type error in
-  # kernel/nvidia-modeset.ko.
-  #
-  # BEGIN {
-  #     pahole_cmd = "pahole"
-  #     for (i = 1; i < ARGC; i++) {
-  #         if (ARGV[i] ~ /--lang_exclude=/) {
-  #             pahole_cmd = pahole_cmd sprintf(" %s,c++", ARGV[i])
-  #         } else {
-  #             pahole_cmd = pahole_cmd sprintf(" %s", ARGV[i])
-  #         }
-  #     }
-  #     system(pahole_cmd)
-  # }
-  PAHOLE_AWK_PROGRAM = BEGIN { pahole_cmd = \"pahole\"; for (i = 1; i < ARGC; i++) { if (ARGV[i] ~ /--lang_exclude=/) { pahole_cmd = pahole_cmd sprintf(\" %s,c++\", ARGV[i]); } else { pahole_cmd = pahole_cmd sprintf(\" %s\", ARGV[i]); } } system(pahole_cmd); }
-  # If scripts/pahole-flags.sh is not present in the kernel tree, add PAHOLE and
-  # PAHOLE_AWK_PROGRAM assignments to PAHOLE_VARIABLES; otherwise assign the
-  # empty string to PAHOLE_VARIABLES.
-  PAHOLE_VARIABLES=$(if $(wildcard $(KERNEL_SOURCES)/scripts/pahole-flags.sh),,"PAHOLE=$(AWK) '$(PAHOLE_AWK_PROGRAM)'")

  ifndef ARCH
    ARCH := $(shell uname -m | sed -e 's/i.86/i386/' \
@@ -133,10 +108,12 @@ else
  KBUILD_PARAMS += INSTALL_MOD_DIR="$(INSTALL_MOD_DIR)"
  KBUILD_PARAMS += NV_SPECTRE_V2=$(SPECTRE_V2_RETPOLINE)

+  export ORIG_PAHOLE := $(PAHOLE)
+
  .PHONY: modules module clean clean_conftest modules_install
  modules clean modules_install:
 	@$(MAKE) "LD=$(LD)" "CC=$(CC)" "OBJDUMP=$(OBJDUMP)" \
-	  $(PAHOLE_VARIABLES) $(KBUILD_PARAMS) $@
+	  PAHOLE=$(CURDIR)/pahole.sh $(KBUILD_PARAMS) $@
 	@if [ "$@" = "modules" ]; then \
 	  for module in $(NV_KERNEL_MODULES); do \
 	    if [ -x split-object-file.sh ]; then \
--- a/kernel-open/common/inc/nv-linux.h
+++ b/kernel-open/common/inc/nv-linux.h
@@ -1726,7 +1726,69 @@ typedef enum
 #include <linux/reset.h>
 #include <linux/dma-buf.h>
 #include <linux/gpio.h>
+#if defined(NV_LINUX_OF_GPIO_H_PRESENT)
 #include <linux/of_gpio.h>
+#else
+#include <linux/gpio/driver.h>
+
+/*
+ * of_get_named_gpio() was removed along with linux/of_gpio.h by commit
+ * 51aaf65bbd21 ("gpio: of: Remove <linux/of_gpio.h>"). Provide a compat
+ * implementation using the remaining public GPIO APIs.
+ */
+static inline int of_get_named_gpio(const struct device_node *np,
+                                    const char *propname, int index)
+{
+    struct of_phandle_args gpiospec;
+    struct gpio_device *gdev;
+    struct gpio_desc *desc;
+    int ret;
+
+    if (!np)
+        return -ENOENT;
+
+    ret =
+        of_parse_phandle_with_args_map(np, propname, "gpio", index, &gpiospec);
+    if (ret)
+        return ret;
+
+    gdev = gpio_device_find_by_fwnode(of_fwnode_handle(gpiospec.np));
+    of_node_put(gpiospec.np);
+    if (!gdev)
+        return -EPROBE_DEFER;
+
+    /*
+     * Use the chip's of_xlate callback to translate the DT GPIO
+     * specifier into a linear offset.  Tegra GPIO controllers encode
+     * port and pin in args[0] and of_xlate sums per-port pin counts
+     * to produce the real offset.
+     */
+    {
+        struct gpio_chip *chip = gpio_device_get_chip(gdev);
+        int hwgpio;
+
+#if defined(CONFIG_OF_GPIO)
+        if (chip->of_xlate)
+            hwgpio = chip->of_xlate(chip, &gpiospec, NULL);
+        else
+#endif
+            hwgpio = gpiospec.args[0];
+
+        if (hwgpio < 0) {
+            gpio_device_put(gdev);
+            return hwgpio;
+        }
+
+        desc = gpio_device_get_desc(gdev, hwgpio);
+    }
+    gpio_device_put(gdev);
+
+    if (IS_ERR(desc))
+        return PTR_ERR(desc);
+
+    return desc_to_gpio(desc);
+}
+#endif
 #include <linux/of_device.h>
 #include <linux/of_platform.h>

--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
@@ -363,7 +363,8 @@ check_symbol_exists() {
        for KMOD in linuxkpi.ko linuxkpi_gplv2.ko drm.ko dmabuf.ko ; do
            for KMODPATH in $KMODPATHS; do
                if [ -e "$KMODPATH/$KMOD" ] ; then
-                    if nm "$KMODPATH/$KMOD" | grep "$SYMBOL" >/dev/null 2>&1 ; then
+                    # Search if this symbol is a global text symbol in nm output
+                    if nm "$KMODPATH/$KMOD" | grep "T.*$SYMBOL" >/dev/null 2>&1 ; then
                        return 0
                    fi
                fi
--- a/kernel-open/header-presence-tests.mk
+++ b/kernel-open/header-presence-tests.mk
@@ -41,6 +41,7 @@ NV_HEADER_PRESENCE_TESTS = \
  linux/vfio_pci_core.h \
  linux/cc_platform.h \
  linux/slub_def.h \
+  linux/of_gpio.h \
  asm/cpufeature.h \
  asm/mshyperv.h \
  crypto/sig.h
--- a/kernel-open/nvidia-drm/nvidia-drm-linux.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-linux.c
@@ -64,3 +64,4 @@ module_exit(nv_linux_drm_exit);

 MODULE_INFO(supported, "external");
 MODULE_VERSION(NV_VERSION_STRING);
+MODULE_DESCRIPTION("NVIDIA DRM kernel module");
--- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
@@ -2229,3 +2229,4 @@ module_exit(nvkms_exit);

 MODULE_INFO(supported, "external");
 MODULE_VERSION(NV_VERSION_STRING);
+MODULE_DESCRIPTION("NVIDIA modeset kernel module");
--- a/kernel-open/nvidia-uvm/uvm.c
+++ b/kernel-open/nvidia-uvm/uvm.c
@@ -1230,3 +1230,4 @@ module_exit(uvm_exit_entry);
 MODULE_LICENSE("Dual MIT/GPL");
 MODULE_INFO(supported, "external");
 MODULE_VERSION(NV_VERSION_STRING);
+MODULE_DESCRIPTION("NVIDIA Unified Virtual Memory kernel module");
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
@@ -1800,6 +1800,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
        // ATS faults can't be unserviceable, since unserviceable faults require
        // GMMU PTEs.
        UVM_ASSERT(!current_entry->is_fatal);
+        UVM_ASSERT(current_entry->gpu == gpu);

        i++;

@@ -1849,6 +1850,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,

    } while (current_entry &&
             (current_entry->fault_address < outer) &&
+             (previous_entry->gpu == current_entry->gpu) &&
             (previous_entry->va_space == current_entry->va_space));

    // Service the last sub-batch.
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
@@ -1582,6 +1582,31 @@ uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
    return uvm_va_block_region_from_start_end(va_block, start, end);
 }

+uvm_prot_t uvm_hmm_compute_mapping_prot(uvm_va_block_t *va_block,
+                                        uvm_processor_id_t processor_id,
+                                        uvm_page_index_t page_index)
+{
+    if (!uvm_processor_mask_test(&va_block->mapped, UVM_ID_CPU))
+        return UVM_PROT_NONE;
+
+    if (uvm_page_mask_test(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_WRITE], page_index)) {
+        if (uvm_processor_mask_test(&va_block->hmm.va_space->has_native_atomics[uvm_id_value(UVM_ID_CPU)],
+                                    processor_id))
+            // If the CPU has write access it also has atomic access, so it's
+            // fine for any GPU with HW support to do atomic accesses.
+            return UVM_PROT_READ_WRITE_ATOMIC;
+        else
+            // Otherwise the GPU needs to fault on atomic access to ensure the
+            // CPU is unmapped.
+            return UVM_PROT_READ_WRITE;
+    }
+
+    if (uvm_page_mask_test(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_READ], page_index))
+        return UVM_PROT_READ_ONLY;
+
+    return UVM_PROT_NONE;
+}
+
 uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
                                        struct vm_area_struct *vma,
                                        NvU64 addr)
--- a/kernel-open/nvidia-uvm/uvm_hmm.h
+++ b/kernel-open/nvidia-uvm/uvm_hmm.h
@@ -258,6 +258,14 @@ typedef struct
                                                      const uvm_va_policy_t *policy,
                                                      NvU64 address);

+    // Return the actual permissions allowed when mapping a page within a
+    // va_block on the given processor_id. This may differ from the logical
+    // permission if for example the kernel has the CPU pages mapped read-only
+    // to do copy-on-write.
+    uvm_prot_t uvm_hmm_compute_mapping_prot(uvm_va_block_t *va_block,
+                                            uvm_processor_id_t processor_id,
+                                            uvm_page_index_t page_index);
+
    // Return the logical protection allowed of a HMM va_block for the page at
    // the given address within the vma which must be valid. This is usually
    // obtained from uvm_hmm_va_block_find_create()).
@@ -561,6 +569,13 @@ typedef struct
        return (uvm_va_block_region_t){};
    }

+    static uvm_prot_t uvm_hmm_compute_mapping_prot(uvm_va_block_t *va_block,
+                                                   uvm_processor_id_t processor_id,
+                                                   uvm_page_index_t page_index)
+    {
+        return UVM_PROT_NONE;
+    }
+
    static uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
                                                   struct vm_area_struct *vma,
                                                   NvU64 addr)
--- a/kernel-open/nvidia-uvm/uvm_va_block.c
+++ b/kernel-open/nvidia-uvm/uvm_va_block.c
@@ -10991,6 +10991,13 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block
    uvm_processor_mask_t *resident_processors = &va_block_context->scratch_processor_mask;
    NvU32 resident_processors_count;

+    // TODO: Bug 5841902
+    // There are several calls to uvm_va_block_is_hmm() which need to be removed
+    if (uvm_va_block_is_hmm(va_block))
+        return uvm_hmm_compute_mapping_prot(va_block,
+                                            processor_id,
+                                            page_index);
+
    uvm_va_block_page_resident_processors(va_block, page_index, resident_processors);
    resident_processors_count = uvm_processor_mask_get_count(resident_processors);

--- a/kernel-open/nvidia-uvm/uvm_va_range.c
+++ b/kernel-open/nvidia-uvm/uvm_va_range.c
@@ -929,8 +929,12 @@ void uvm_va_range_remove_gpu_va_space(uvm_va_range_t *va_range,
                                                        gpu_va_space->gpu);
            break;
        case UVM_VA_RANGE_TYPE_DEVICE_P2P:
-            unmap_mapping_range(va_range->va_space->mapping, va_range->node.start, uvm_va_range_size(va_range), 1);
-            uvm_va_range_deinit_device_p2p(uvm_va_range_to_device_p2p(va_range), deferred_free_list);
+            // Device P2P ranges are associated with a specific GPU so destroy
+            // the range entirely if unregistering the associated GPU.
+            if (uvm_va_range_to_device_p2p(va_range)->gpu == gpu_va_space->gpu) {
+                unmap_mapping_range(va_range->va_space->mapping, va_range->node.start, uvm_va_range_size(va_range), 1);
+                uvm_va_range_deinit_device_p2p(uvm_va_range_to_device_p2p(va_range), deferred_free_list);
+            }
            break;
        default:
            UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
@@ -1172,9 +1176,10 @@ void uvm_va_range_unregister_gpu(uvm_va_range_t *va_range,
            va_range_unregister_gpu_semaphore_pool(uvm_va_range_to_semaphore_pool(va_range), gpu);
            break;
        case UVM_VA_RANGE_TYPE_DEVICE_P2P:
-            // All ranges should have been deinited by GPU VA space unregister,
-            // which should have already happened.
-            UVM_ASSERT(!uvm_va_range_to_device_p2p(va_range)->p2p_mem);
+            // All ranges for this GPU should have been deinited by GPU VA space
+            // unregister, which should have already happened.
+            if (uvm_va_range_to_device_p2p(va_range)->p2p_mem != NULL)
+                UVM_ASSERT(uvm_va_range_to_device_p2p(va_range)->gpu != gpu);
            break;
        default:
            UVM_ASSERT_MSG(0, "[0x%llx, 0x%llx] has type %d\n",
--- a/kernel-open/nvidia/nv-mmap.c
+++ b/kernel-open/nvidia/nv-mmap.c
@@ -129,10 +129,8 @@ nvidia_vma_access(
    nv_alloc_mapping_list_node_t **pfile_mapping_list = NULL;
    nv_alloc_mapping_context_t *mmap_context = NULL;
    NvU64 offsInVma = addr - vma->vm_start;
-    NvBool bIsNuma = NV_FALSE;
    int ret = -EINVAL;
-
-    bIsNuma = pfn_valid(mmap_context->access_start >> PAGE_SHIFT);
+    NvBool has_pages;

    pageIndex = (offsInVma >> PAGE_SHIFT);
    pageOffset = (offsInVma & ~PAGE_MASK);
@@ -152,6 +150,8 @@ nvidia_vma_access(

    mmap_context = &(*pfile_mapping_list)->context;

+    has_pages = (mmap_context->num_pages != 0);
+
    if (write && !(mmap_context->prot & NV_PROTECT_WRITEABLE))
    {
        ret = -EACCES;
@@ -180,7 +180,7 @@ nvidia_vma_access(
        pageIndex = nv_array_index_no_speculate(pageIndex, at->num_pages);
        kernel_mapping = (void *)(at->page_table[pageIndex].virt_addr + pageOffset);
    }
-    else if (bIsNuma)
+    else if (has_pages)
    {
        struct page *pPage = NV_GET_PAGE_STRUCT(mmap_context->page_array[pageIndex]);
        NvU8 *pPagePtr = (NvU8 *) page_address(pPage);
@@ -230,7 +230,7 @@ found:
        memcpy(buffer, kernel_mapping, length);
 #endif // defined(NVCPU_AARCH64)

-    if (at == NULL && !bIsNuma)
+    if (at == NULL && !has_pages)
    {
        kernel_mapping = ((char *)kernel_mapping - pageOffset);
        os_unmap_kernel_space(kernel_mapping, PAGE_SIZE);
--- a/kernel-open/nvidia/nv-usermap.c
+++ b/kernel-open/nvidia/nv-usermap.c
@@ -104,6 +104,11 @@ NV_STATUS NV_API_CALL nv_add_mapping_context_to_file(
    if (*pfile_mapping_list == NULL)
    {
        *pfile_mapping_list = pNewNode;
+        if (NV_IS_CTL_DEVICE(nv))
+        {
+            nv_alloc_t *at = (nv_alloc_t *) nvamc->alloc;
+            atomic64_inc(&at->usage_count);
+        }
    }
    else
    {
--- a/kernel-open/nvidia/nv.c
+++ b/kernel-open/nvidia/nv.c
@@ -121,6 +121,7 @@ MODULE_LICENSE("Dual MIT/GPL");

 MODULE_INFO(supported, "external");
 MODULE_VERSION(NV_VERSION_STRING);
+MODULE_DESCRIPTION("NVIDIA core GPU kernel module");
 MODULE_ALIAS_CHARDEV_MAJOR(NV_MAJOR_DEVICE_NUMBER);

 /*
@@ -3213,6 +3214,20 @@ nvidia_ctl_close(

    rm_cleanup_file_private(sp, nv, &nvlfp->nvfp);

+    // Populate free list from file nodes
+    {
+        nv_alloc_mapping_list_node_t *pNode = nvlfp->file_mapping_list;
+        while (pNode != NULL)
+        {
+            if (pNode->context.alloc != NULL)
+            {
+                nv_alloc_t *at = pNode->context.alloc;
+                nv_alloc_release(nvlfp, at);
+            }
+            pNode = pNode->pNext;
+        }
+    }
+
    if (nvlfp->free_list != NULL)
    {
        at = nvlfp->free_list;
--- a/kernel-open/pahole.sh
+++ b/kernel-open/pahole.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+
+if [ "$KBUILD_VERBOSE" = "1" ]; then
+    set -x
+fi
+
+PAHOLE=${ORIG_PAHOLE:-pahole}
+# Use DRY_RUN=echo to print the command instead of executing it, for debugging.
+DRY_RUN=${DRY_RUN:-}
+
+btf_encode=0
+lang_exclude=
+
+# Parse args: detect BTF encoding and existing --lang_exclude, while
+# rebuilding the argument list without --lang_exclude.
+args_start=0
+for arg in "$@"; do
+    case "$arg" in
+    --btf_features=*encode_force* | -J | --btf_encode | --btf_encode_force)
+        btf_encode=1
+        ;;
+    --lang_exclude=*)
+        lang_exclude="${arg#*=}"
+        # Drop this arg from the rebuilt list.
+        if [ $args_start -eq 0 ]; then
+            set --
+            args_start=1
+        fi
+        continue
+        ;;
+    esac
+    if [ $args_start -eq 0 ]; then
+        set -- "$arg"
+        args_start=1
+    else
+        set -- "$@" "$arg"
+    fi
+done
+
+# Capture anything the kernel is already excluding and extend it with c++.
+# Even if in future the kernel doesn't have to exclude other languages,
+# make sure to exclude c++ when generating encoding BTF.
+if [ $btf_encode -eq 1 ] || [ -n "$lang_exclude" ]; then
+    if [ -n "$lang_exclude" ]; then
+        lang_exclude="${lang_exclude},c++"
+    else
+        lang_exclude="c++"
+    fi
+    set -- --lang_exclude="$lang_exclude" "$@"
+
+    $DRY_RUN "$PAHOLE" "$@"
+    ret=$?
+
+    # Until pahole 1.31, return code was still an error regardless of
+    # --btf_encode_force / --btf_features=encode_force.
+    # A pure version check is not possible as it's likely that pahole
+    # from distros will have fixes backported. Just ignore the error for now.
+    if [ $ret -ne 0 ]; then
+        echo "pahole.sh: warning: pahole exited with status $ret (ignored)" >&2
+    fi
+    exit 0
+fi
+
+# Do not override options or exit status, as this may interfere with the kernel
+# build system when determining pahole version and options.
+$DRY_RUN exec "$PAHOLE" "$@"