515.86.01

515.76
2026-01-29 12:39:46 +00:00 · 2022-11-22 10:04:21 -08:00 · 2022-09-20 13:54:59 -07:00
64 changed files with 1073 additions and 368 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,15 @@

 ## Release 515 Entries

+### [515.86.01] 2022-11-22
+
+### [515.76] 2022-09-20
+
+#### Fixed
+
+- Improved compatibility with new Linux kernel releases
+- Fixed possible excessive GPU power draw on an idle X11 or Wayland desktop when driving high resolutions or refresh rates
+
 ### [515.65.07] 2022-10-19

 ### [515.65.01] 2022-08-02
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source

 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 515.65.07.
+version 515.86.01.


 ## How to Build
@@ -17,7 +17,7 @@ as root:

 Note that the kernel modules built here must be used with gsp.bin
 firmware and user-space NVIDIA GPU driver components from a corresponding
-515.65.07 driver release.  This can be achieved by installing
+515.86.01 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,

@@ -167,7 +167,7 @@ for the target kernel.
 ## Compatible GPUs

 The open-gpu-kernel-modules can be used on any Turing or later GPU
-(see the table below). However, in the 515.65.07 release,
+(see the table below). However, in the 515.86.01 release,
 GeForce and Workstation support is still considered alpha-quality.

 To enable use of the open kernel modules on GeForce and Workstation GPUs,
@@ -175,7 +175,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
 parameter to 1. For more details, see the NVIDIA GPU driver end user
 README here:

-https://us.download.nvidia.com/XFree86/Linux-x86_64/515.65.07/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/515.86.01/README/kernel_open.html

 In the below table, if three IDs are listed, the first is the PCI Device 
 ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@@ -652,6 +652,17 @@ Subsystem Device ID.
 | NVIDIA PG506-232                                | 20B6 10DE 1492 |
 | NVIDIA A30                                      | 20B7 10DE 1532 |
 | NVIDIA A100-PCIE-40GB                           | 20F1 10DE 145F |
+| NVIDIA A800-SXM4-80GB                           | 20F3 10DE 179B |
+| NVIDIA A800-SXM4-80GB                           | 20F3 10DE 179C |
+| NVIDIA A800-SXM4-80GB                           | 20F3 10DE 179D |
+| NVIDIA A800-SXM4-80GB                           | 20F3 10DE 179E |
+| NVIDIA A800-SXM4-80GB                           | 20F3 10DE 179F |
+| NVIDIA A800-SXM4-80GB                           | 20F3 10DE 17A0 |
+| NVIDIA A800-SXM4-80GB                           | 20F3 10DE 17A1 |
+| NVIDIA A800-SXM4-80GB                           | 20F3 10DE 17A2 |
+| NVIDIA A800 80GB PCIe                           | 20F5 10DE 1799 |
+| NVIDIA A800 80GB PCIe LC                        | 20F5 10DE 179A |
+| NVIDIA A800 40GB PCIe                           | 20F6 10DE 17A3 |
 | NVIDIA GeForce GTX 1660 Ti                      | 2182           |
 | NVIDIA GeForce GTX 1660                         | 2184           |
 | NVIDIA GeForce GTX 1650 SUPER                   | 2187           |
@@ -685,6 +696,7 @@ Subsystem Device ID.
 | NVIDIA GeForce RTX 3090 Ti                      | 2203           |
 | NVIDIA GeForce RTX 3090                         | 2204           |
 | NVIDIA GeForce RTX 3080                         | 2206           |
+| NVIDIA GeForce RTX 3070 Ti                      | 2207           |
 | NVIDIA GeForce RTX 3080 Ti                      | 2208           |
 | NVIDIA GeForce RTX 3080                         | 220A           |
 | NVIDIA CMP 90HX                                 | 220D           |
@@ -736,6 +748,7 @@ Subsystem Device ID.
 | NVIDIA RTX A3000 12GB Laptop GPU                | 24B9           |
 | NVIDIA RTX A4500 Laptop GPU                     | 24BA           |
 | NVIDIA RTX A3000 12GB Laptop GPU                | 24BB           |
+| NVIDIA GeForce RTX 3060 Ti                      | 24C9           |
 | NVIDIA GeForce RTX 3080 Laptop GPU              | 24DC           |
 | NVIDIA GeForce RTX 3070 Laptop GPU              | 24DD           |
 | NVIDIA GeForce RTX 3070 Ti Laptop GPU           | 24E0           |
@@ -751,6 +764,7 @@ Subsystem Device ID.
 | NVIDIA RTX A2000                                | 2531 103C 151D |
 | NVIDIA RTX A2000                                | 2531 10DE 151D |
 | NVIDIA RTX A2000                                | 2531 17AA 151D |
+| NVIDIA GeForce RTX 3060                         | 2544           |
 | NVIDIA GeForce RTX 3060 Laptop GPU              | 2560           |
 | NVIDIA GeForce RTX 3050 Ti Laptop GPU           | 2563           |
 | NVIDIA RTX A2000 12GB                           | 2571 1028 1611 |
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"515.65.07\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"515.86.01\"

 EXTRA_CFLAGS += -Wno-unused-function

@@ -203,9 +203,108 @@ $(obj)/conftest/patches.h: $(NV_CONFTEST_SCRIPT)
 	@mkdir -p $(obj)/conftest
 	@$(NV_CONFTEST_CMD) patch_check > $@

-$(obj)/conftest/headers.h: $(NV_CONFTEST_SCRIPT)
-	@mkdir -p $(obj)/conftest
-	@$(NV_CONFTEST_CMD) test_kernel_headers '$(NV_CONFTEST_CFLAGS)' > $@
+
+# Each of these headers is checked for presence with a test #include; a
+# corresponding #define will be generated in conftest/headers.h.
+NV_HEADER_PRESENCE_TESTS = \
+ asm/system.h \
+ drm/drmP.h \
+ drm/drm_auth.h \
+ drm/drm_gem.h \
+ drm/drm_crtc.h \
+ drm/drm_atomic.h \
+ drm/drm_atomic_helper.h \
+ drm/drm_encoder.h \
+ drm/drm_atomic_uapi.h \
+ drm/drm_drv.h \
+ drm/drm_framebuffer.h \
+ drm/drm_connector.h \
+ drm/drm_probe_helper.h \
+ drm/drm_blend.h \
+ drm/drm_fourcc.h \
+ drm/drm_prime.h \
+ drm/drm_plane.h \
+ drm/drm_vblank.h \
+ drm/drm_file.h \
+ drm/drm_ioctl.h \
+ drm/drm_device.h \
+ drm/drm_mode_config.h \
+ dt-bindings/interconnect/tegra_icc_id.h \
+ generated/autoconf.h \
+ generated/compile.h \
+ generated/utsrelease.h \
+ linux/efi.h \
+ linux/kconfig.h \
+ linux/platform/tegra/mc_utils.h \
+ linux/semaphore.h \
+ linux/printk.h \
+ linux/ratelimit.h \
+ linux/prio_tree.h \
+ linux/log2.h \
+ linux/of.h \
+ linux/bug.h \
+ linux/sched/signal.h \
+ linux/sched/task.h \
+ linux/sched/task_stack.h \
+ xen/ioemu.h \
+ linux/fence.h \
+ linux/dma-resv.h \
+ soc/tegra/chip-id.h \
+ soc/tegra/fuse.h \
+ soc/tegra/tegra_bpmp.h \
+ video/nv_internal.h \
+ linux/platform/tegra/dce/dce-client-ipc.h \
+ linux/nvhost.h \
+ linux/nvhost_t194.h \
+ asm/book3s/64/hash-64k.h \
+ asm/set_memory.h \
+ asm/prom.h \
+ asm/powernv.h \
+ linux/atomic.h \
+ asm/barrier.h \
+ asm/opal-api.h \
+ sound/hdaudio.h \
+ asm/pgtable_types.h \
+ linux/stringhash.h \
+ linux/dma-map-ops.h \
+ rdma/peer_mem.h \
+ sound/hda_codec.h \
+ linux/dma-buf.h \
+ linux/time.h \
+ linux/platform_device.h \
+ linux/mutex.h \
+ linux/reset.h \
+ linux/of_platform.h \
+ linux/of_device.h \
+ linux/of_gpio.h \
+ linux/gpio.h \
+ linux/gpio/consumer.h \
+ linux/interconnect.h \
+ linux/pm_runtime.h \
+ linux/clk.h \
+ linux/clk-provider.h \
+ linux/ioasid.h \
+ linux/stdarg.h \
+ linux/iosys-map.h \
+ asm/coco.h
+
+# Filename to store the define for the header in $(1); this is only consumed by
+# the rule below that concatenates all of these together.
+NV_HEADER_PRESENCE_PART = $(addprefix $(obj)/conftest/header_presence/,$(addsuffix .part,$(1)))
+
+# Define a rule to check the header $(1).
+define NV_HEADER_PRESENCE_CHECK
+ $$(call NV_HEADER_PRESENCE_PART,$(1)): $$(NV_CONFTEST_SCRIPT) $(obj)/conftest/uts_release
+	@mkdir -p $$(dir $$@)
+	@$$(NV_CONFTEST_CMD) test_kernel_header '$$(NV_CONFTEST_CFLAGS)' '$(1)' > $$@
+endef
+
+# Evaluate the rule above for each header in the list.
+$(foreach header,$(NV_HEADER_PRESENCE_TESTS),$(eval $(call NV_HEADER_PRESENCE_CHECK,$(header))))
+
+# Concatenate all of the parts into headers.h.
+$(obj)/conftest/headers.h: $(call NV_HEADER_PRESENCE_PART,$(NV_HEADER_PRESENCE_TESTS))
+	@cat $^ > $@

 clean-dirs := $(obj)/conftest

--- a/kernel-open/common/inc/nv-linux.h
+++ b/kernel-open/common/inc/nv-linux.h
@@ -227,6 +227,7 @@ static inline uid_t __kuid_val(uid_t uid)
 #endif

 #include <linux/fb.h>               /* fb_info struct                   */
+#include <linux/screen_info.h>      /* screen_info                      */

 #if !defined(CONFIG_PCI)
 #warning "Attempting to build driver for a platform with no PCI support!"
--- a/kernel-open/common/inc/nv-pgprot.h
+++ b/kernel-open/common/inc/nv-pgprot.h
@@ -78,13 +78,8 @@ static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)

 #define NV_PGPROT_UNCACHED_DEVICE(old_prot)     pgprot_noncached(old_prot)
 #if defined(NVCPU_AARCH64)
-#if defined(NV_MT_DEVICE_GRE_PRESENT)
-#define NV_PROT_WRITE_COMBINED_DEVICE   (PROT_DEFAULT | PTE_PXN | PTE_UXN |   \
-                                         PTE_ATTRINDX(MT_DEVICE_GRE))
-#else
 #define NV_PROT_WRITE_COMBINED_DEVICE   (PROT_DEFAULT | PTE_PXN | PTE_UXN |   \
                                         PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#endif
 #define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot)                             \
    __pgprot_modify(old_prot, PTE_ATTRINDX_MASK, NV_PROT_WRITE_COMBINED_DEVICE)
 #define NV_PGPROT_WRITE_COMBINED(old_prot)      NV_PGPROT_UNCACHED(old_prot)
--- a/kernel-open/common/inc/nv.h
+++ b/kernel-open/common/inc/nv.h
@@ -624,27 +624,45 @@ typedef enum
 #define NV_GET_NV_STATE(pGpu) \
    (nv_state_t *)((pGpu) ? (pGpu)->pOsGpuInfo : NULL)

-#define IS_REG_OFFSET(nv, offset, length)                                       \
-    (((offset) >= (nv)->regs->cpu_address) &&                                   \
-    (((offset) + ((length)-1)) <=                                               \
-        (nv)->regs->cpu_address + ((nv)->regs->size-1)))
+static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
+{
+    return ((offset >= nv->regs->cpu_address) &&

-#define IS_FB_OFFSET(nv, offset, length)                                        \
-    (((nv)->fb) && ((offset) >= (nv)->fb->cpu_address) &&                       \
-    (((offset) + ((length)-1)) <= (nv)->fb->cpu_address + ((nv)->fb->size-1)))
+            ((offset + (length - 1)) >= offset) &&

-#define IS_UD_OFFSET(nv, offset, length)                                        \
-    (((nv)->ud.cpu_address != 0) && ((nv)->ud.size != 0) &&                     \
-    ((offset) >= (nv)->ud.cpu_address) &&                                       \
-    (((offset) + ((length)-1)) <= (nv)->ud.cpu_address + ((nv)->ud.size-1)))
+            ((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
+}

-#define IS_IMEM_OFFSET(nv, offset, length)                                      \
-    (((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&                    \
-     ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&                           \
-     ((offset) >= (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&             \
-     (((offset) + ((length) - 1)) <=                                            \
-        (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +                         \
-            ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))
+static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
+{
+    return  ((nv->fb) && (offset >= nv->fb->cpu_address) &&
+
+             ((offset + (length - 1)) >= offset) &&
+
+             ((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
+}
+
+static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
+{
+    return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
+            (offset >= nv->ud.cpu_address) &&
+
+            ((offset + (length - 1)) >= offset) &&
+
+            ((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
+}
+
+static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
+{
+    return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
+            (nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
+            (offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
+
+            ((offset + (length - 1)) >= offset) &&
+
+            ((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
+                                         (nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
+}

 #define NV_RM_MAX_MSIX_LINES  8

--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
@@ -55,9 +55,13 @@ append_conftest() {
    done
 }

-translate_and_preprocess_header_files() {
-    # Inputs:
-    #   $1: list of relative file paths
+test_header_presence() {
+    #
+    # Determine if the given header file (which may or may not be
+    # present) is provided by the target kernel.
+    #
+    # Input:
+    #   $1: relative file path
    #
    # This routine creates an upper case, underscore version of each of the
    # relative file paths, and uses that as the token to either define or
@@ -73,115 +77,25 @@ translate_and_preprocess_header_files() {
    # strings, without special handling of the beginning or the end of the line.
    TEST_CFLAGS=`echo "-E -M $CFLAGS " | sed -e 's/\( -M[DG]\)* / /g'`

-    for file in "$@"; do
-        file_define=NV_`echo $file | tr '/.' '_' | tr '-' '_' | tr 'a-z' 'A-Z'`_PRESENT
+    file="$1"
+    file_define=NV_`echo $file | tr '/.' '_' | tr '-' '_' | tr 'a-z' 'A-Z'`_PRESENT

-        CODE="#include <$file>"
+    CODE="#include <$file>"

-        if echo "$CODE" | $CC $TEST_CFLAGS - > /dev/null 2>&1; then
-            echo "#define $file_define"
+    if echo "$CODE" | $CC $TEST_CFLAGS - > /dev/null 2>&1; then
+        echo "#define $file_define"
+    else
+        # If preprocessing failed, it could have been because the header
+        # file under test is not present, or because it is present but
+        # depends upon the inclusion of other header files. Attempting
+        # preprocessing again with -MG will ignore a missing header file
+        # but will still fail if the header file is present.
+        if echo "$CODE" | $CC $TEST_CFLAGS -MG - > /dev/null 2>&1; then
+            echo "#undef $file_define"
        else
-            # If preprocessing failed, it could have been because the header
-            # file under test is not present, or because it is present but
-            # depends upon the inclusion of other header files. Attempting
-            # preprocessing again with -MG will ignore a missing header file
-            # but will still fail if the header file is present.
-            if echo "$CODE" | $CC $TEST_CFLAGS -MG - > /dev/null 2>&1; then
-                echo "#undef $file_define"
-            else
-                echo "#define $file_define"
-            fi
+            echo "#define $file_define"
        fi
-    done
-}
-
-test_headers() {
-    #
-    # Determine which header files (of a set that may or may not be
-    # present) are provided by the target kernel.
-    #
-    FILES="asm/system.h"
-    FILES="$FILES drm/drmP.h"
-    FILES="$FILES drm/drm_auth.h"
-    FILES="$FILES drm/drm_gem.h"
-    FILES="$FILES drm/drm_crtc.h"
-    FILES="$FILES drm/drm_atomic.h"
-    FILES="$FILES drm/drm_atomic_helper.h"
-    FILES="$FILES drm/drm_encoder.h"
-    FILES="$FILES drm/drm_atomic_uapi.h"
-    FILES="$FILES drm/drm_drv.h"
-    FILES="$FILES drm/drm_framebuffer.h"
-    FILES="$FILES drm/drm_connector.h"
-    FILES="$FILES drm/drm_probe_helper.h"
-    FILES="$FILES drm/drm_blend.h"
-    FILES="$FILES drm/drm_fourcc.h"
-    FILES="$FILES drm/drm_prime.h"
-    FILES="$FILES drm/drm_plane.h"
-    FILES="$FILES drm/drm_vblank.h"
-    FILES="$FILES drm/drm_file.h"
-    FILES="$FILES drm/drm_ioctl.h"
-    FILES="$FILES drm/drm_device.h"
-    FILES="$FILES drm/drm_mode_config.h"
-    FILES="$FILES dt-bindings/interconnect/tegra_icc_id.h"
-    FILES="$FILES generated/autoconf.h"
-    FILES="$FILES generated/compile.h"
-    FILES="$FILES generated/utsrelease.h"
-    FILES="$FILES linux/efi.h"
-    FILES="$FILES linux/kconfig.h"
-    FILES="$FILES linux/platform/tegra/mc_utils.h"
-    FILES="$FILES linux/semaphore.h"
-    FILES="$FILES linux/printk.h"
-    FILES="$FILES linux/ratelimit.h"
-    FILES="$FILES linux/prio_tree.h"
-    FILES="$FILES linux/log2.h"
-    FILES="$FILES linux/of.h"
-    FILES="$FILES linux/bug.h"
-    FILES="$FILES linux/sched/signal.h"
-    FILES="$FILES linux/sched/task.h"
-    FILES="$FILES linux/sched/task_stack.h"
-    FILES="$FILES xen/ioemu.h"
-    FILES="$FILES linux/fence.h"
-    FILES="$FILES linux/dma-resv.h"
-    FILES="$FILES soc/tegra/chip-id.h"
-    FILES="$FILES soc/tegra/fuse.h"
-    FILES="$FILES soc/tegra/tegra_bpmp.h"
-    FILES="$FILES video/nv_internal.h"
-    FILES="$FILES linux/platform/tegra/dce/dce-client-ipc.h"
-    FILES="$FILES linux/nvhost.h"
-    FILES="$FILES linux/nvhost_t194.h"
-    FILES="$FILES asm/book3s/64/hash-64k.h"
-    FILES="$FILES asm/set_memory.h"
-    FILES="$FILES asm/prom.h"
-    FILES="$FILES asm/powernv.h"
-    FILES="$FILES linux/atomic.h"
-    FILES="$FILES asm/barrier.h"
-    FILES="$FILES asm/opal-api.h"
-    FILES="$FILES sound/hdaudio.h"
-    FILES="$FILES asm/pgtable_types.h"
-    FILES="$FILES linux/stringhash.h"
-    FILES="$FILES linux/dma-map-ops.h"
-    FILES="$FILES rdma/peer_mem.h"
-    FILES="$FILES sound/hda_codec.h"
-    FILES="$FILES linux/dma-buf.h"
-    FILES="$FILES linux/time.h"
-    FILES="$FILES linux/platform_device.h"
-    FILES="$FILES linux/mutex.h"
-    FILES="$FILES linux/reset.h"
-    FILES="$FILES linux/of_platform.h"
-    FILES="$FILES linux/of_device.h"
-    FILES="$FILES linux/of_gpio.h"
-    FILES="$FILES linux/gpio.h"
-    FILES="$FILES linux/gpio/consumer.h"
-    FILES="$FILES linux/interconnect.h"
-    FILES="$FILES linux/pm_runtime.h"
-    FILES="$FILES linux/clk.h"
-    FILES="$FILES linux/clk-provider.h"
-    FILES="$FILES linux/ioasid.h"
-    FILES="$FILES linux/stdarg.h"
-    FILES="$FILES linux/iosys-map.h"
-    FILES="$FILES asm/coco.h"
-
-    translate_and_preprocess_header_files $FILES
+    fi
 }

 build_cflags() {
@@ -2420,23 +2334,6 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_PCI_DEV_HAS_ATS_ENABLED" "" "types"
        ;;

-        mt_device_gre)
-            #
-            # Determine if MT_DEVICE_GRE flag is present.
-            #
-            # MT_DEVICE_GRE flag is removed by commit 58cc6b72a21274
-            # ("arm64: mm: Remove unused support for Device-GRE memory type") in v5.14-rc1
-            # (2021-06-01).
-            #
-            CODE="
-            #include <asm/memory.h>
-            unsigned int conftest_mt_device_gre(void) {
-                return MT_DEVICE_GRE;
-            }"
-
-            compile_check_conftest "$CODE" "NV_MT_DEVICE_GRE_PRESENT" "" "types"
-        ;;
-
        get_user_pages)
            #
            # Conftest for get_user_pages()
@@ -5366,6 +5263,48 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_GET_TASK_IOPRIO_PRESENT" "" "functions"
        ;;

+        num_registered_fb)
+            #
+            # Determine if 'num_registered_fb' variable is present.
+            #
+            # 'num_registered_fb' was removed by commit 5727dcfd8486
+            # ("fbdev: Make registered_fb[] private to fbmem.c") for
+            # v5.20 linux-next (2022-07-27).
+            #
+            CODE="
+            #include <linux/fb.h>
+            int conftest_num_registered_fb(void) {
+                return num_registered_fb;
+            }"
+
+            compile_check_conftest "$CODE" "NV_NUM_REGISTERED_FB_PRESENT" "" "types"
+        ;;
+
+        acpi_video_backlight_use_native)
+            #
+            # Determine if acpi_video_backlight_use_native() function is present
+            #
+            # acpi_video_backlight_use_native was added by commit 2600bfa3df99
+            # (ACPI: video: Add acpi_video_backlight_use_native() helper) for
+            # v6.0 (2022-08-17). Note: the include directive for <linux/types.h>
+            # in this conftest is necessary in order to support kernels between
+            # commit 0b9f7d93ca61 ("ACPI / i915: ignore firmware requests for
+            # backlight change") for v3.16 (2014-07-07) and commit 3bd6bce369f5
+            # ("ACPI / video: Port to new backlight interface selection API")
+            # for v4.2 (2015-07-16). Kernels within this range use the 'bool'
+            # type and the related 'false' value in <acpi/video.h> without first
+            # including the definitions of that type and value.
+            #
+            CODE="
+            #include <linux/types.h>
+            #include <acpi/video.h>
+            void conftest_acpi_video_backglight_use_native(void) {
+                acpi_video_backlight_use_native(0);
+            }"
+
+            compile_check_conftest "$CODE" "NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE" "" "functions"
+        ;;
+
        # When adding a new conftest entry, please use the correct format for
        # specifying the relevant upstream Linux kernel commit.
        #
@@ -5764,14 +5703,14 @@ case "$5" in
    ;;


-    test_kernel_headers)
+    test_kernel_header)
        #
-        # Check for the availability of certain kernel headers
+        # Check for the availability of the given kernel header
        #

        CFLAGS=$6

-        test_headers
+        test_header_presence "${7}"

        for file in conftest*.d; do
            rm -f $file > /dev/null 2>&1
--- a/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
@@ -95,7 +95,11 @@ static vm_fault_t __nv_drm_gem_nvkms_handle_vma_fault(
        pfn >>= PAGE_SHIFT;
        pfn += page_offset;
    } else {
-        BUG_ON(page_offset > nv_nvkms_memory->pages_count);
+
+        BUG_ON(page_offset >= nv_nvkms_memory->pages_count);
+
+
+
        pfn = page_to_pfn(nv_nvkms_memory->pages[page_offset]);
    }

--- a/kernel-open/nvidia-drm/nvidia-drm-gem-user-memory.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem-user-memory.c
@@ -112,7 +112,11 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(

    page_offset = vmf->pgoff - drm_vma_node_start(&gem->vma_node);

-    BUG_ON(page_offset > nv_user_memory->pages_count);
+
+    BUG_ON(page_offset >= nv_user_memory->pages_count);
+
+
+

    ret = vm_insert_page(vma, address, nv_user_memory->pages[page_offset]);
    switch (ret) {
--- a/kernel-open/nvidia-drm/nvidia-drm-helper.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-helper.c
@@ -41,6 +41,19 @@
 #include <drm/drm_atomic_uapi.h>
 #endif

+/*
+ * The inclusion of drm_framebuffer.h was removed from drm_crtc.h by commit
+ * 720cf96d8fecde29b72e1101f8a567a0ce99594f ("drm: Drop drm_framebuffer.h from
+ * drm_crtc.h") in linux-next, expected in v5.19-rc7.
+ *
+ * We only need drm_framebuffer.h for drm_framebuffer_put(), and it is always
+ * present (v4.9+) when drm_framebuffer_{put,get}() is present (v4.12+), so it
+ * is safe to unconditionally include it when drm_framebuffer_get() is present.
+ */
+#if defined(NV_DRM_FRAMEBUFFER_GET_PRESENT)
+#include <drm/drm_framebuffer.h>
+#endif
+
 static void __nv_drm_framebuffer_put(struct drm_framebuffer *fb)
 {
 #if defined(NV_DRM_FRAMEBUFFER_GET_PRESENT)
--- a/kernel-open/nvidia-drm/nvidia-drm-linux.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-linux.c
@@ -47,6 +47,16 @@ module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);

 void *nv_drm_calloc(size_t nmemb, size_t size)
 {
+
+    size_t total_size = nmemb * size;
+    //
+    // Check for overflow.
+    //
+    if ((nmemb != 0) && ((total_size / nmemb) != size))
+    {
+        return NULL;
+    }
+
    return kzalloc(nmemb * size, GFP_KERNEL);
 }

--- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
@@ -35,6 +35,8 @@
 #include <linux/list.h>
 #include <linux/rwsem.h>

+#include <acpi/video.h>
+
 #include "nvstatus.h"

 #include "nv-register-module.h"
@@ -59,6 +61,9 @@

 #define NVKMS_LOG_PREFIX "nvidia-modeset: "

+static bool output_rounding_fix = false;
+module_param_named(output_rounding_fix, output_rounding_fix, bool, 0400);
+
 /* These parameters are used for fault injection tests.  Normally the defaults
 * should be used. */
 MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc");
@@ -71,6 +76,10 @@ module_param_named(malloc_verbose, malloc_verbose, bool, 0400);

 static atomic_t nvkms_alloc_called_count;

+NvBool nvkms_output_rounding_fix(void)
+{
+    return output_rounding_fix;
+}

 #define NVKMS_SYNCPT_STUBS_NEEDED

@@ -1053,6 +1062,12 @@ nvkms_register_backlight(NvU32 gpu_id, NvU32 display_id, void *drv_priv,
    struct nvkms_backlight_device *nvkms_bd = NULL;
    int i;

+#if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE)
+    if (!acpi_video_backlight_use_native()) {
+        return NULL;
+    }
+#endif
+
    gpu_info = nvkms_alloc(NV_MAX_GPUS * sizeof(*gpu_info), NV_TRUE);
    if (gpu_info == NULL) {
        return NULL;
--- a/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
@@ -110,6 +110,7 @@ typedef struct {
    } set_maxval;
 } NvKmsSyncPtOpParams;

+NvBool nvkms_output_rounding_fix(void);

 void   nvkms_call_rm    (void *ops);
 void*  nvkms_alloc      (size_t size,
--- a/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild
+++ b/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild
@@ -96,4 +96,5 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
 NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_kthread_create_on_node
--- a/kernel-open/nvidia-uvm/uvm_channel.c
+++ b/kernel-open/nvidia-uvm/uvm_channel.c
@@ -35,10 +35,6 @@
 #include "nv_uvm_interface.h"
 #include "clb06f.h"

-#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT 1024
-#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MIN 32
-#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX (1024 * 1024)
-
 static unsigned uvm_channel_num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT;

 #define UVM_CHANNEL_GPFIFO_LOC_DEFAULT "auto"
@@ -86,6 +82,12 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,

    uvm_spin_lock(&channel->pool->lock);

+    // Completed value should never exceed the queued value
+    UVM_ASSERT_MSG_RELEASE(completed_value <= channel->tracking_sem.queued_value,
+                           "GPU %s channel %s unexpected completed_value 0x%llx > queued_value 0x%llx\n",
+                           channel->pool->manager->gpu->parent->name, channel->name, completed_value,
+                           channel->tracking_sem.queued_value);
+
    cpu_put = channel->cpu_put;
    gpu_get = channel->gpu_get;

@@ -395,6 +397,14 @@ static void uvm_channel_semaphore_release(uvm_push_t *push, NvU64 semaphore_va,
 {
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);

+    // We used to skip the membar or use membar GPU for the semaphore release
+    // for a few pushes, but that doesn't provide sufficient ordering guarantees
+    // in some cases (e.g. ga100 with an LCE with PCEs from both HSHUBs) for the
+    // semaphore writes. To be safe, just always uses a membar sys for now.
+    // TODO bug 3770539: Optimize membars used by end of push semaphore releases
+    (void)uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
+    (void)uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+
    if (uvm_channel_is_ce(push->channel))
        gpu->parent->ce_hal->semaphore_release(push, semaphore_va, new_payload);

@@ -1562,6 +1572,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
    UVM_SEQ_OR_DBG_PRINT(s, "get                %u\n", channel->gpu_get);
    UVM_SEQ_OR_DBG_PRINT(s, "put                %u\n", channel->cpu_put);
    UVM_SEQ_OR_DBG_PRINT(s, "Semaphore GPU VA   0x%llx\n", uvm_channel_tracking_semaphore_get_gpu_va(channel));
+    UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA   0x%llx\n", (NvU64)(uintptr_t)channel->tracking_sem.semaphore.payload);

    uvm_spin_unlock(&channel->pool->lock);
 }
--- a/kernel-open/nvidia-uvm/uvm_channel.h
+++ b/kernel-open/nvidia-uvm/uvm_channel.h
@@ -46,6 +46,21 @@
 // wait for a GPFIFO entry to free up.
 //

+#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT 1024
+#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MIN 32
+#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX (1024 * 1024)
+
+// Semaphore payloads cannot advance too much between calls to
+// uvm_gpu_tracking_semaphore_update_completed_value(). In practice the jumps
+// are bound by gpfifo sizing as we have to update the completed value to
+// reclaim gpfifo entries. Set a limit based on the max gpfifo entries we could
+// ever see.
+//
+// Logically this define belongs to uvm_gpu_semaphore.h but it depends on the
+// channel GPFIFO sizing defined here so it's easiest to just have it here as
+// uvm_channel.h includes uvm_gpu_semaphore.h.
+#define UVM_GPU_SEMAPHORE_MAX_JUMP (2 * UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX)
+
 // Channel types
 typedef enum
 {
--- a/kernel-open/nvidia-uvm/uvm_channel_test.c
+++ b/kernel-open/nvidia-uvm/uvm_channel_test.c
@@ -151,6 +151,37 @@ done:
    return status;
 }

+static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
+{
+    NV_STATUS status;
+    uvm_gpu_t *gpu;
+
+    for_each_va_space_gpu(gpu, va_space) {
+        uvm_channel_t *channel;
+        NvU64 completed_value;
+
+        // The GPU channel manager is destroyed and then re-created after
+        // the test, so this test requires exclusive access to the GPU.
+        TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
+
+        channel = &gpu->channel_manager->channel_pools[0].channels[0];
+        completed_value = uvm_channel_update_completed_value(channel);
+        uvm_gpu_semaphore_set_payload(&channel->tracking_sem.semaphore, (NvU32)completed_value + 1);
+
+        TEST_CHECK_RET(uvm_global_get_status() == NV_OK);
+        uvm_channel_update_progress_all(channel);
+        TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE);
+
+        uvm_channel_manager_destroy(gpu->channel_manager);
+        // Destruction will hit the error again, so clear one more time.
+        uvm_global_reset_fatal_error();
+
+        TEST_NV_CHECK_RET(uvm_channel_manager_create(gpu, &gpu->channel_manager));
+    }
+
+    return NV_OK;
+}
+
 static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
 {
    uvm_push_t push;
@@ -712,6 +743,14 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct



+    g_uvm_global.disable_fatal_error_assert = true;
+    uvm_release_asserts_set_global_error_for_tests = true;
+    status = test_unexpected_completed_values(va_space);
+    uvm_release_asserts_set_global_error_for_tests = false;
+    g_uvm_global.disable_fatal_error_assert = false;
+    if (status != NV_OK)
+        goto done;
+
    if (g_uvm_global.num_simulated_devices == 0) {
        status = test_rc(va_space);
        if (status != NV_OK)
--- a/kernel-open/nvidia-uvm/uvm_common.c
+++ b/kernel-open/nvidia-uvm/uvm_common.c
@@ -48,6 +48,33 @@ module_param(uvm_enable_builtin_tests, int, S_IRUGO);
 MODULE_PARM_DESC(uvm_enable_builtin_tests,
                 "Enable the UVM built-in tests. (This is a security risk)");

+// Default to release asserts being enabled.
+int uvm_release_asserts __read_mostly = 1;
+
+// Make the module param writable so that release asserts can be enabled or
+// disabled at any time by modifying the module parameter.
+module_param(uvm_release_asserts, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(uvm_release_asserts, "Enable uvm asserts included in release builds.");
+
+// Default to failed release asserts not dumping stack.
+int uvm_release_asserts_dump_stack __read_mostly = 0;
+
+// Make the module param writable so that dumping the stack can be enabled and
+// disabled at any time by modifying the module parameter.
+module_param(uvm_release_asserts_dump_stack, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(uvm_release_asserts_dump_stack, "dump_stack() on failed UVM release asserts.");
+
+// Default to failed release asserts not setting the global UVM error.
+int uvm_release_asserts_set_global_error __read_mostly = 0;
+
+// Make the module param writable so that setting the global fatal error can be
+// enabled and disabled at any time by modifying the module parameter.
+module_param(uvm_release_asserts_set_global_error, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(uvm_release_asserts_set_global_error, "Set UVM global fatal error on failed release asserts.");
+
+// A separate flag to enable setting global error, to be used by tests only.
+bool uvm_release_asserts_set_global_error_for_tests __read_mostly = false;
+
 //
 // Convert kernel errno codes to corresponding NV_STATUS
 //
--- a/kernel-open/nvidia-uvm/uvm_common.h
+++ b/kernel-open/nvidia-uvm/uvm_common.h
@@ -80,6 +80,9 @@ bool uvm_debug_prints_enabled(void);
 #define UVM_ASSERT_PRINT(fmt, ...) \
    UVM_PRINT_FUNC_PREFIX(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

+#define UVM_ASSERT_PRINT_RL(fmt, ...) \
+    UVM_PRINT_FUNC_PREFIX(printk_ratelimited, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
+
 #define UVM_ERR_PRINT(fmt, ...) \
    UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

@@ -146,9 +149,7 @@ void on_uvm_test_fail(void);
 // Unlike on_uvm_test_fail it provides 'panic' coverity semantics
 void on_uvm_assert(void);

-// UVM_ASSERT_RELEASE and UVM_ASSERT_MSG_RELEASE are always enabled, even on
-// release builds.
-#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...)                                           \
+#define _UVM_ASSERT_MSG(expr, cond, fmt, ...)                                                   \
    do {                                                                                        \
        if (unlikely(!(expr))) {                                                                \
            UVM_ASSERT_PRINT("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);  \
@@ -157,9 +158,6 @@ void on_uvm_assert(void);
        }                                                                                       \
    } while (0)

-#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...)  _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
-#define UVM_ASSERT_RELEASE(expr)                _UVM_ASSERT_MSG_RELEASE(expr, #expr, "\n")
-
 // Prevent function calls in expr and the print argument list from being
 // evaluated.
 #define UVM_ASSERT_MSG_IGNORE(expr, fmt, ...)   \
@@ -170,13 +168,42 @@ void on_uvm_assert(void);

 // UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity builds
 #if UVM_IS_DEBUG() || defined __COVERITY__
-    #define UVM_ASSERT_MSG                  UVM_ASSERT_MSG_RELEASE
-    #define UVM_ASSERT                      UVM_ASSERT_RELEASE
+    #define UVM_ASSERT_MSG(expr, fmt, ...)  _UVM_ASSERT_MSG(expr, #expr, ": " fmt, ##__VA_ARGS__)
+    #define UVM_ASSERT(expr)                _UVM_ASSERT_MSG(expr, #expr, "\n")
 #else
    #define UVM_ASSERT_MSG(expr, fmt, ...)  UVM_ASSERT_MSG_IGNORE(expr, fmt, ##__VA_ARGS__)
    #define UVM_ASSERT(expr)                UVM_ASSERT_MSG_IGNORE(expr, "\n")
 #endif

+// UVM_ASSERT_RELEASE and UVM_ASSERT_MSG_RELEASE are always included in the
+// build, even on release builds. They are skipped at runtime if
+// uvm_release_asserts is 0.
+
+// Whether release asserts are enabled and whether they should dump the stack
+// and set the global error.
+extern int uvm_release_asserts;
+extern int uvm_release_asserts_dump_stack;
+extern int uvm_release_asserts_set_global_error;
+extern bool uvm_release_asserts_set_global_error_for_tests;
+
+// Given these are enabled for release builds, we need to be more cautious than
+// in UVM_ASSERT(). Use a ratelimited print and only dump the stack if a module
+// param is enabled.
+#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...)                                                   \
+    do {                                                                                                \
+        if (uvm_release_asserts && unlikely(!(expr))) {                                                 \
+            UVM_ASSERT_PRINT_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);       \
+            if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
+                uvm_global_set_fatal_error(NV_ERR_INVALID_STATE);                                       \
+            if (uvm_release_asserts_dump_stack)                                                         \
+                dump_stack();                                                                           \
+            on_uvm_assert();                                                                            \
+        }                                                                                               \
+    } while (0)
+
+#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...)  _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
+#define UVM_ASSERT_RELEASE(expr)                _UVM_ASSERT_MSG_RELEASE(expr, #expr, "\n")
+
 // Provide a short form of UUID's, typically for use in debug printing:
 #define ABBREV_UUID(uuid) (unsigned)(uuid)

--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
@@ -25,6 +25,7 @@
 #include "uvm_lock.h"
 #include "uvm_global.h"
 #include "uvm_kvmalloc.h"
+#include "uvm_channel.h" // For UVM_GPU_SEMAPHORE_MAX_JUMP

 #define UVM_SEMAPHORE_SIZE 4
 #define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE
@@ -467,9 +468,16 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
    // push, it's easily guaranteed because of the small number of GPFIFO
    // entries available per channel (there could be at most as many pending
    // pushes as GPFIFO entries).
-    if (new_sem_value < old_sem_value)
+    if (unlikely(new_sem_value < old_sem_value))
        new_value += 1ULL << 32;

+    // Check for unexpected large jumps of the semaphore value
+    UVM_ASSERT_MSG_RELEASE(new_value - old_value <= UVM_GPU_SEMAPHORE_MAX_JUMP,
+                           "GPU %s unexpected semaphore (CPU VA 0x%llx) jump from 0x%llx to 0x%llx\n",
+                           tracking_semaphore->semaphore.page->pool->gpu->parent->name,
+                           (NvU64)(uintptr_t)tracking_semaphore->semaphore.payload,
+                           old_value, new_value);
+
    // Use an atomic write even though the spinlock is held so that the value can
    // be (carefully) read atomically outside of the lock.
    //
--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore_test.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore_test.c
@@ -27,6 +27,18 @@
 #include "uvm_va_space.h"
 #include "uvm_kvmalloc.h"

+static NV_STATUS set_and_test(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 new_value)
+{
+    uvm_gpu_semaphore_set_payload(&tracking_sem->semaphore, (NvU32)new_value);
+    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) == new_value);
+    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value));
+    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value - 1));
+    TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value + 1));
+    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_completed(tracking_sem));
+
+    return NV_OK;
+}
+
 static NV_STATUS add_and_test(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU32 increment_by)
 {
    NvU64 new_value;
@@ -43,13 +55,45 @@ static NV_STATUS add_and_test(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU32
    TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value));
    TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_completed(tracking_sem));

-    uvm_gpu_semaphore_set_payload(&tracking_sem->semaphore, (NvU32)new_value);
-    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) == new_value);
+    TEST_NV_CHECK_RET(set_and_test(tracking_sem, new_value));
    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, completed));
-    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value));
-    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value - 1));
-    TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value + 1));
-    TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_completed(tracking_sem));
+
+    return NV_OK;
+}
+
+// Set the current state of the sema, avoiding UVM_GPU_SEMAPHORE_MAX_JUMP
+// detection.
+static void manual_set(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 value)
+{
+    uvm_gpu_semaphore_set_payload(&tracking_sem->semaphore, (NvU32)value);
+    atomic64_set(&tracking_sem->completed_value, value);
+    tracking_sem->queued_value = value;
+}
+
+// Set the starting value and payload and expect a global error
+static NV_STATUS set_and_expect_error(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 starting_value, NvU32 payload)
+{
+    manual_set(tracking_sem, starting_value);
+    uvm_gpu_semaphore_set_payload(&tracking_sem->semaphore, payload);
+
+    TEST_CHECK_RET(uvm_global_get_status() == NV_OK);
+    uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem);
+    TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE);
+
+    return NV_OK;
+}
+
+static NV_STATUS test_invalid_jumps(uvm_gpu_tracking_semaphore_t *tracking_sem)
+{
+    int i;
+    for (i = 0; i < 10; ++i) {
+        NvU64 base = (1ULL<<32) * i;
+        TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base, UVM_GPU_SEMAPHORE_MAX_JUMP + 1));
+        TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base, UINT_MAX));
+        TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base + i + 1, i));
+        TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base + UINT_MAX / 2, UINT_MAX / 2 + UVM_GPU_SEMAPHORE_MAX_JUMP + 1));
+        TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base + UINT_MAX / 2, UINT_MAX / 2 - i - 1));
+    }

    return NV_OK;
 }
@@ -73,11 +117,31 @@ static NV_STATUS test_tracking(uvm_va_space_t *va_space)
        goto done;

    for (i = 0; i < 100; ++i) {
-        status = add_and_test(&tracking_sem, UINT_MAX - 1);
+        status = add_and_test(&tracking_sem, UVM_GPU_SEMAPHORE_MAX_JUMP - i);
        if (status != NV_OK)
            goto done;
    }

+    // Test wrap-around cases
+    for (i = 0; i < 100; ++i) {
+        // Start with a value right before wrap-around
+        NvU64 starting_value = (1ULL<<32) * (i + 1) - i - 1;
+        manual_set(&tracking_sem, starting_value);
+
+        // And set payload to after wrap-around
+        status = set_and_test(&tracking_sem, (1ULL<<32) * (i + 1) + i);
+        if (status != NV_OK)
+            goto done;
+    }
+
+    g_uvm_global.disable_fatal_error_assert = true;
+    uvm_release_asserts_set_global_error_for_tests = true;
+    status = test_invalid_jumps(&tracking_sem);
+    uvm_release_asserts_set_global_error_for_tests = false;
+    g_uvm_global.disable_fatal_error_assert = false;
+    if (status != NV_OK)
+        goto done;
+
 done:
    uvm_gpu_tracking_semaphore_free(&tracking_sem);
    return status;
--- a/kernel-open/nvidia-uvm/uvm_push.h
+++ b/kernel-open/nvidia-uvm/uvm_push.h
@@ -52,11 +52,21 @@ typedef enum
    // By default all operations include a membar sys after any transfer and
    // before a semaphore operation.
    // This flag indicates that next operation should use no membar at all.
+    //
+    // For end of push semaphore release, this flag indicates that the push
+    // itself does not need a membar to be used (membar sys is the default). A
+    // membar may still be used, if needed to order the semaphore release
+    // write. See comments in uvm_channel_end_push().
    UVM_PUSH_FLAG_NEXT_MEMBAR_NONE,

    // By default all operations include a membar sys after any transfer and
    // before a semaphore operation.
    // This flag indicates that next operation should use a membar gpu instead.
+    //
+    // For end of push semaphore release, this flag indicates that the push
+    // itself only needs a membar gpu (the default is membar sys). A membar sys
+    // may still be used, if needed to order the semaphore release write. See
+    // comments in uvm_channel_end_push().
    UVM_PUSH_FLAG_NEXT_MEMBAR_GPU,

    UVM_PUSH_FLAG_COUNT,
--- a/kernel-open/nvidia/nv-dmabuf.c
+++ b/kernel-open/nvidia/nv-dmabuf.c
@@ -26,6 +26,15 @@


 #if defined(CONFIG_DMA_SHARED_BUFFER)
+
+//
+// The Linux kernel's dma_length in struct scatterlist is unsigned int
+// which limits the maximum sg length to 4GB - 1.
+// To get around this limitation, the BAR1 scatterlist returned by RM
+// is split into (4GB - PAGE_SIZE) sized chunks to build the sg_table.
+//
+#define NV_DMA_BUF_SG_MAX_LEN         ((NvU32)(NVBIT64(32) - PAGE_SIZE))
+
 typedef struct nv_dma_buf_mem_handle
 {
    NvHandle h_memory;
@@ -259,26 +268,36 @@ nv_dma_buf_unmap_unlocked(
    nv_dma_device_t *peer_dma_dev,
    nv_dma_buf_file_private_t *priv,
    struct sg_table *sgt,
-    NvU32 count
+    NvU32 mapped_handle_count
 )
 {
    NV_STATUS status;
    NvU32 i;
    NvU64 dma_len;
    NvU64 dma_addr;
-    NvU64 bar1_va;
    NvBool bar1_unmap_needed;
    struct scatterlist *sg = NULL;

    bar1_unmap_needed = (priv->bar1_va_ref_count == 0);

-    for_each_sg(sgt->sgl, sg, count, i)
+    sg = sgt->sgl;
+    for (i = 0; i < mapped_handle_count; i++)
    {
-        dma_addr = sg_dma_address(sg);
-        dma_len  = priv->handles[i].size;
-        bar1_va  = priv->handles[i].bar1_va;
+        NvU64 handle_size = priv->handles[i].size;

-        WARN_ON(sg_dma_len(sg) != priv->handles[i].size);
+        dma_addr = sg_dma_address(sg);
+        dma_len  = 0;
+
+        //
+        // Seek ahead in the scatterlist until the handle size is covered.
+        // IOVA unmap can then be done all at once instead of doing it
+        // one sg at a time.
+        //
+        while(handle_size != dma_len)
+        {
+            dma_len += sg_dma_len(sg);
+            sg = sg_next(sg);
+        }

        nv_dma_unmap_peer(peer_dma_dev, (dma_len / os_page_size), dma_addr);

@@ -309,7 +328,8 @@ nv_dma_buf_map(
    nv_dma_device_t peer_dma_dev = {{ 0 }};
    NvBool bar1_map_needed;
    NvBool bar1_unmap_needed;
-    NvU32 count = 0;
+    NvU32 mapped_handle_count = 0;
+    NvU32 num_sg_entries = 0;
    NvU32 i = 0;
    int rc = 0;

@@ -361,13 +381,23 @@ nv_dma_buf_map(
    }

    memset(sgt, 0, sizeof(struct sg_table));
+    //
+    // Pre-calculate number of sg entries we need based on handle size.
+    // This is needed to allocate sg_table.
+    //
+    for (i = 0; i < priv->num_objects; i++)
+    {
+        NvU64 count = priv->handles[i].size + NV_DMA_BUF_SG_MAX_LEN - 1;
+        do_div(count, NV_DMA_BUF_SG_MAX_LEN);
+        num_sg_entries += count;
+    }

    //
    // RM currently returns contiguous BAR1, so we create as many
-    // sg entries as the number of handles being mapped.
+    // sg entries as num_sg_entries calculated above.
    // When RM can alloc discontiguous BAR1, this code will need to be revisited.
    //
-    rc = sg_alloc_table(sgt, priv->num_objects, GFP_KERNEL);
+    rc = sg_alloc_table(sgt, num_sg_entries, GFP_KERNEL);
    if (rc != 0)
    {
        goto free_sgt;
@@ -377,7 +407,8 @@ nv_dma_buf_map(
    peer_dma_dev.addressable_range.limit = (NvU64)dev->dma_mask;
    bar1_map_needed = bar1_unmap_needed = (priv->bar1_va_ref_count == 0);

-    for_each_sg(sgt->sgl, sg, priv->num_objects, i)
+    sg = sgt->sgl;
+    for (i = 0; i < priv->num_objects; i++)
    {
        NvU64 dma_addr;
        NvU64 dma_len;
@@ -395,9 +426,15 @@ nv_dma_buf_map(
            }
        }

+        mapped_handle_count++;
+
        dma_addr = priv->handles[i].bar1_va;
        dma_len  = priv->handles[i].size;

+        //
+        // IOVA map the full handle at once and then breakdown the range
+        // (dma_addr, dma_addr + dma_len) into smaller sg entries.
+        //
        status = nv_dma_map_peer(&peer_dma_dev, priv->nv->dma_dev,
                                 0x1, (dma_len / os_page_size), &dma_addr);
        if (status != NV_OK)
@@ -411,14 +448,23 @@ nv_dma_buf_map(
                                                   priv->handles[i].bar1_va);
            }

+            mapped_handle_count--;
+
            // Unmap remaining memory handles
            goto unmap_handles;
        }

-        sg_set_page(sg, NULL, dma_len, 0);
-        sg_dma_address(sg) = (dma_addr_t)dma_addr;
-        sg_dma_len(sg) = dma_len;
-        count++;
+        while(dma_len != 0)
+        {
+            NvU32 sg_len = NV_MIN(dma_len, NV_DMA_BUF_SG_MAX_LEN);
+
+            sg_set_page(sg, NULL, sg_len, 0);
+            sg_dma_address(sg) = (dma_addr_t)dma_addr;
+            sg_dma_len(sg) = sg_len;
+            dma_addr += sg_len;
+            dma_len -= sg_len;
+            sg = sg_next(sg);
+        }
    }

    priv->bar1_va_ref_count++;
@@ -434,7 +480,7 @@ nv_dma_buf_map(
    return sgt;

 unmap_handles:
-    nv_dma_buf_unmap_unlocked(sp, &peer_dma_dev, priv, sgt, count);
+    nv_dma_buf_unmap_unlocked(sp, &peer_dma_dev, priv, sgt, mapped_handle_count);

    sg_free_table(sgt);

@@ -820,7 +866,12 @@ nv_dma_buf_reuse(
        goto cleanup_dmabuf;
    }

-    if (params->index > (priv->total_objects - params->numObjects))
+
+    if ((priv->total_objects < params->numObjects) ||
+        (params->index > (priv->total_objects - params->numObjects)))
+
+
+
    {
        status = NV_ERR_INVALID_ARGUMENT;
        goto unlock_priv;
--- a/kernel-open/nvidia/nv-mmap.c
+++ b/kernel-open/nvidia/nv-mmap.c
@@ -132,6 +132,13 @@ nvidia_vma_access(
    pageIndex = ((addr - vma->vm_start) >> PAGE_SHIFT);
    pageOffset = (addr & ~PAGE_MASK);

+
+    if (length < 0)
+    {
+        return -EINVAL;
+    }
+
+
    if (!mmap_context->valid)
    {
        nv_printf(NV_DBG_ERRORS, "NVRM: VM: invalid mmap context\n");
@@ -210,8 +217,19 @@ static vm_fault_t nvidia_fault(

    NvU64 page;
    NvU64 num_pages = NV_VMA_SIZE(vma) >> PAGE_SHIFT;
-    NvU64 pfn_start =
-        (nvlfp->mmap_context.mmap_start >> PAGE_SHIFT) + vma->vm_pgoff;
+
+    NvU64 pfn_start = (nvlfp->mmap_context.mmap_start >> PAGE_SHIFT);
+
+
+
+
+
+
+    if (vma->vm_pgoff != 0)
+    {
+        return VM_FAULT_SIGBUS;
+    }
+

    // Mapping revocation is only supported for GPU mappings.
    if (NV_IS_CTL_DEVICE(nv))
@@ -430,7 +448,7 @@ static int nvidia_mmap_numa(
    const nv_alloc_mapping_context_t *mmap_context)
 {
    NvU64 start, addr;
-    unsigned int pages;
+    NvU64 pages;
    NvU64 i;

    pages = NV_VMA_SIZE(vma) >> PAGE_SHIFT;
@@ -483,6 +501,13 @@ int nvidia_mmap_helper(
        return -EINVAL;
    }

+
+    if (vma->vm_pgoff != 0)
+    {
+        return -EINVAL;
+    }
+
+
    NV_PRINT_VMA(NV_DBG_MEMINFO, vma);

    status = nv_check_gpu_state(nv);
@@ -509,6 +534,13 @@ int nvidia_mmap_helper(
        NvU64 access_start = mmap_context->access_start;
        NvU64 access_len = mmap_context->access_size;

+
+        // validate the size
+        if (NV_VMA_SIZE(vma) != mmap_length)
+        {
+            return -ENXIO;
+        }
+
        if (IS_REG_OFFSET(nv, access_start, access_len))
        {
            if (nv_encode_caching(&vma->vm_page_prot, NV_MEMORY_UNCACHED,
--- a/kernel-open/nvidia/nv.c
+++ b/kernel-open/nvidia/nv.c
@@ -1467,6 +1467,11 @@ static int nv_open_device(nv_state_t *nv, nvidia_stack_t *sp)
        return -ENODEV;
    }

+
+    if (unlikely(NV_ATOMIC_READ(nvl->usage_count) >= NV_S32_MAX))
+        return -EMFILE;
+
+
    if ( ! (nv->flags & NV_FLAG_OPEN))
    {
        /* Sanity check: !NV_FLAG_OPEN requires usage_count == 0 */
--- a/kernel-open/nvidia/nvidia.Kbuild
+++ b/kernel-open/nvidia/nvidia.Kbuild
@@ -219,6 +219,7 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_dram_clk_to_mc_clk
 NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_get_dram_num_channels
 NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tegra_dram_types
 NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_pxm_to_node
+NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_screen_info

 NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
 NV_CONFTEST_TYPE_COMPILE_TESTS += kuid_t
@@ -242,9 +243,9 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vmalloc_has_pgprot_t_arg
 NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
 NV_CONFTEST_TYPE_COMPILE_TESTS += pci_channel_state
 NV_CONFTEST_TYPE_COMPILE_TESTS += pci_dev_has_ats_enabled
-NV_CONFTEST_TYPE_COMPILE_TESTS += mt_device_gre
 NV_CONFTEST_TYPE_COMPILE_TESTS += remove_memory_has_nid_arg
 NV_CONFTEST_TYPE_COMPILE_TESTS += add_memory_driver_managed_has_mhp_flags_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += num_registered_fb

 NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
 NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build
--- a/kernel-open/nvidia/nvlink_linux.c
+++ b/kernel-open/nvidia/nvlink_linux.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2015-2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2015-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -207,7 +207,10 @@ static int nvlink_fops_release(struct inode *inode, struct file *filp)

    nvlink_print(NVLINK_DBG_INFO, "nvlink driver close\n");

-    WARN_ON(private == NULL);
+
+    if (private == NULL)
+        return -ENOMEM;
+

    mutex_lock(&nvlink_drvctx.lock);

--- a/kernel-open/nvidia/os-interface.c
+++ b/kernel-open/nvidia/os-interface.c
@@ -1120,31 +1120,58 @@ void NV_API_CALL os_get_screen_info(
    NvU64 consoleBar2Address
 )
 {
-#if defined(CONFIG_FB)
-    int i;
    *pPhysicalAddress = 0;
    *pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = 0;

-    for (i = 0; i < num_registered_fb; i++)
+#if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT)
+    if (num_registered_fb > 0)
    {
-        if (!registered_fb[i])
-            continue;
+        int i;

-        /* Make sure base address is mapped to GPU BAR */
-        if ((registered_fb[i]->fix.smem_start == consoleBar1Address) ||
-            (registered_fb[i]->fix.smem_start == consoleBar2Address))
+        for (i = 0; i < num_registered_fb; i++)
        {
-            *pPhysicalAddress = registered_fb[i]->fix.smem_start;
-            *pFbWidth = registered_fb[i]->var.xres;
-            *pFbHeight = registered_fb[i]->var.yres;
-            *pFbDepth = registered_fb[i]->var.bits_per_pixel;
-            *pFbPitch = registered_fb[i]->fix.line_length;
-            break;
+            if (!registered_fb[i])
+                continue;
+
+            /* Make sure base address is mapped to GPU BAR */
+            if ((registered_fb[i]->fix.smem_start == consoleBar1Address) ||
+                (registered_fb[i]->fix.smem_start == consoleBar2Address))
+            {
+                *pPhysicalAddress = registered_fb[i]->fix.smem_start;
+                *pFbWidth = registered_fb[i]->var.xres;
+                *pFbHeight = registered_fb[i]->var.yres;
+                *pFbDepth = registered_fb[i]->var.bits_per_pixel;
+                *pFbPitch = registered_fb[i]->fix.line_length;
+                break;
+            }
+        }
+    }
+#elif NV_IS_EXPORT_SYMBOL_PRESENT_screen_info
+    /*
+     * If there is not a framebuffer console, return 0 size.
+     *
+     * orig_video_isVGA is set to 1 during early Linux kernel
+     * initialization, and then will be set to a value, such as
+     * VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used.
+     */
+    if (screen_info.orig_video_isVGA > 1)
+    {
+        NvU64 physAddr = screen_info.lfb_base;
+#if defined(VIDEO_CAPABILITY_64BIT_BASE)
+        physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
+#endif
+
+        /* Make sure base address is mapped to GPU BAR */
+        if ((physAddr == consoleBar1Address) ||
+            (physAddr == consoleBar2Address))
+        {
+            *pPhysicalAddress = physAddr;
+            *pFbWidth = screen_info.lfb_width;
+            *pFbHeight = screen_info.lfb_height;
+            *pFbDepth = screen_info.lfb_depth;
+            *pFbPitch = screen_info.lfb_linelength;
        }
    }
-#else
-    *pPhysicalAddress = 0;
-    *pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = 0;
 #endif
 }

--- a/src/common/inc/nvBldVer.h
+++ b/src/common/inc/nvBldVer.h
@@ -36,25 +36,25 @@
 // and then checked back in. You cannot make changes to these sections without
 // corresponding changes to the buildmeister script
 #ifndef NV_BUILD_BRANCH
-    #define NV_BUILD_BRANCH             r516_87
+    #define NV_BUILD_BRANCH             r517_71
 #endif
 #ifndef NV_PUBLIC_BRANCH
-    #define NV_PUBLIC_BRANCH             r516_87
+    #define NV_PUBLIC_BRANCH             r517_71
 #endif

 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
-#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r515/r516_87-323"
-#define NV_BUILD_CHANGELIST_NUM         (31835630)
+#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r515/r517_71-480"
+#define NV_BUILD_CHANGELIST_NUM         (31976733)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "rel/gpu_drv/r515/r516_87-323"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31835630)
+#define NV_BUILD_NAME                   "rel/gpu_drv/r515/r517_71-480"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31976733)

 #else     /* Windows builds */
-#define NV_BUILD_BRANCH_VERSION         "r516_87-7"
-#define NV_BUILD_CHANGELIST_NUM         (31779607)
+#define NV_BUILD_BRANCH_VERSION         "r517_71-1"
+#define NV_BUILD_CHANGELIST_NUM         (31976733)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "517.33"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31779607)
+#define NV_BUILD_NAME                   "517.72"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31976733)
 #define NV_BUILD_BRANCH_BASE_VERSION    R515
 #endif
 // End buildmeister python edited section
--- a/src/common/inc/nvUnixVersion.h
+++ b/src/common/inc/nvUnixVersion.h
@@ -4,7 +4,7 @@
 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
    (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)

-#define NV_VERSION_STRING               "515.65.07"
+#define NV_VERSION_STRING               "515.86.01"

 #else

--- a/src/common/inc/nvlog_defs.h
+++ b/src/common/inc/nvlog_defs.h
@@ -195,6 +195,11 @@ extern NVLOG_LOGGER NvLogLogger;
 #define NVLOG_BUFFER_FLAGS_FORMAT_LIBOS_LOG              1
 #define NVLOG_BUFFER_FLAGS_FORMAT_MEMTRACK               2

+// Never deallocate this buffer until RM is unloaded
+#define NVLOG_BUFFER_FLAGS_PRESERVE                     11:11
+#define NVLOG_BUFFER_FLAGS_PRESERVE_NO                  0
+#define NVLOG_BUFFER_FLAGS_PRESERVE_YES                 1
+
 // Buffer GPU index
 #define NVLOG_BUFFER_FLAGS_GPU_INSTANCE              31:24

--- a/src/common/modeset/timing/nvtiming.h
+++ b/src/common/modeset/timing/nvtiming.h
@@ -4091,6 +4091,8 @@ typedef struct tagNVT_GAMUT_METADATA
 #define NVT_DPCD_ADDRESS_DOWN_REP_BUFFER_FIELD              0x01400
 #define NVT_DPCD_ADDRESS_UP_REQ_BUFFER_FIELD                0x01600
 #define NVT_DPCD_ADDRESS_DEVICE_SERVICE_IRQ_VECTOR_ESI0     0x02003
+#define NVT_DPCD_ADDRESS_DP_TUNNELING_DEVICE_IEEE_OUI       0xE0000
+#define NVT_DPCD_ADDRESS_DP_TUNNELING_DEVICE_ID_STRING      0xE0003
 #define NVT_DPCD_ADDRESS_DP_TUNNELING_CAPS_SUPPORT_FIELD    0xE000D
 #define NVT_DPCD_ADDRESS_DP_IN_ADAPTER_INFO_FIELD           0xE000E
 #define NVT_DPCD_ADDRESS_USB4_DRIVER_ID_FIELD               0xE000F
@@ -5079,7 +5081,7 @@ typedef struct tagNVT_DPCD_CONFIG

 typedef struct tagNVT_DPCD_DP_TUNNELING_CAPS
 {
-    NvU8 dpTunnelingSupport               : 1; // DP Tunneling through USB4 Support
+    NvU8 dpTunneling                      : 1; // DP Tunneling through USB4 Support
    NvU8 reserved                         : 5; // Reserved.
    NvU8 dpPanelReplayTunnelingOptSupport : 1; // Panel Replay Tunneling Optimization Support
    NvU8 dpInBwAllocationModeSupport      : 1; // DP IN Bandwidth Allocation Mode Support
--- a/src/common/nvlink/interface/nvlink_lib_ctrl.h
+++ b/src/common/nvlink/interface/nvlink_lib_ctrl.h
@@ -64,7 +64,7 @@
 * Total number of nvlink endpoints core library can have
 *  This is mapped to NVLINK_MAX_SYSTEM_LINK_NUM in drivers/nvlink/interface/nvlink.h
 */
-#define NVLINK_MAX_NVLINK_ENDPOINTS 312
+#define NVLINK_MAX_NVLINK_ENDPOINTS 624

 #define NVLINK_VERSION_STRING_LENGTH    64

--- a/src/common/nvlink/kernel/nvlink/interface/nvlink_ioctl_entry.c
+++ b/src/common/nvlink/kernel/nvlink/interface/nvlink_ioctl_entry.c
@@ -28,6 +28,7 @@
 #include "../nvlink_ctx.h"
 #include "../nvlink_helper.h"
 #include "nvlink_lock.h"
+#include "nvctassert.h"

 #define NVLINK_IOC_GET_BUF(ctrlParams, type) (ctrlParams)->size >= sizeof(type) ? (type *) (ctrlParams)->buf : NULL

@@ -3423,6 +3424,8 @@ nvlink_lib_ctrl_get_device_link_states
    NvU32         numLinks  = 0;
    NvU32         i         = 0;

+    ct_assert(NVLINK_MAX_SYSTEM_LINK_NUM == NVLINK_MAX_NVLINK_ENDPOINTS);
+
    nvlink_link   **links = (nvlink_link **)nvlink_malloc(
                            sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM);
    if (links == NULL)
--- a/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080perf.h
+++ b/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080perf.h
@@ -379,7 +379,7 @@ typedef NV2080_CTRL_GPUMON_SAMPLES NV2080_CTRL_PERF_GET_GPUMON_PERFMON_UTIL_SAMP
 /*!
 * Number of GPU monitoring sample in their respective buffers.
 */
-#define NV2080_CTRL_PERF_GPUMON_SAMPLE_COUNT_PERFMON_UTIL       100
+#define NV2080_CTRL_PERF_GPUMON_SAMPLE_COUNT_PERFMON_UTIL       72

 #define NV2080_CTRL_PERF_GPUMON_PERFMON_UTIL_BUFFER_SIZE           \
    NV_SIZEOF32(NV2080_CTRL_PERF_GPUMON_PERFMON_UTIL_SAMPLE) *     \
--- a/src/common/uproc/os/libos-v2.0.0/debug/logdecode.c
+++ b/src/common/uproc/os/libos-v2.0.0/debug/logdecode.c
@@ -1041,24 +1041,41 @@ static NvBool libosCopyLogToNvlog_nowrap(LIBOS_LOG_DECODE_LOG *pLog)
    NvU64 putCopy                      = pLog->physicLogBuffer[0];
    NvU64 putOffset                    = putCopy * sizeof(NvU64) + sizeof(NvU64);

-    if (putOffset == pNvLogBuffer->pos)
+    //
+    // If RM was not unloaded, we will reuse a preserved nowrap nvlog buffer with the fresh
+    // physical log buffer. In this case, we fix up all the offsets into the nvlog buffer to be
+    // relative to its preserved position rather than the start.
+    //
+    NvU64 nvlogPos                     = pNvLogBuffer->pos - pLog->preservedNoWrapPos;
+
+    if (putOffset < nvlogPos)
+    {
+        // Buffer put counter unexpectedly reset. Terminate nowrap log collection.
+        return NV_FALSE;
+    }
+
+    if (putOffset == nvlogPos)
    {
        // No new data
        return NV_TRUE;
    }

-    if (putOffset > pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64))
+    if (putOffset + pLog->preservedNoWrapPos >
+        pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64))
    {
        // Are we done filling nowrap?
        return NV_FALSE;
    }

-    NvU64 len  = putOffset - pNvLogBuffer->pos;
-    NvU8 *pSrc = ((NvU8 *)pLog->physicLogBuffer) + pNvLogBuffer->pos;
+    NvU64 len  = putOffset - nvlogPos;
+    NvU8 *pSrc = ((NvU8 *)pLog->physicLogBuffer) + nvlogPos;
    NvU8 *pDst = pNoWrapBuf->data + pNvLogBuffer->pos;
+
+    pLog->bDidPush = NV_TRUE;
+
    portMemCopy(pDst, len, pSrc, len);
-    pNvLogBuffer->pos            = putOffset; // TODO: usage of NVLOG_BUFFER::pos is sus here, reconsider?
-    *(NvU64 *)(pNoWrapBuf->data) = putCopy;
+    pNvLogBuffer->pos            = putOffset + pLog->preservedNoWrapPos; // TODO: usage of NVLOG_BUFFER::pos is sus here, reconsider?
+    *(NvU64 *)(pNoWrapBuf->data) = putCopy + pLog->preservedNoWrapPos / sizeof(NvU64);
    return NV_TRUE;
 }

@@ -1095,6 +1112,46 @@ static void libosExtractLogs_nvlog(LIBOS_LOG_DECODE *logDecode, NvBool bSyncNvLo
    }
 }

+void libosPreserveLogs(LIBOS_LOG_DECODE *pLogDecode)
+{
+    NvU64 i;
+    for (i = 0; i < pLogDecode->numLogBuffers; i++)
+    {
+        LIBOS_LOG_DECODE_LOG *pLog = &pLogDecode->log[i];
+
+        if (pLog->bDidPush)
+        {
+            NvHandle hNvlog = pLog->hNvLogNoWrap;
+            NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[hNvlog];
+
+            if (hNvlog == 0 || pNvLogBuffer == NULL)
+                continue;
+
+            pNvLogBuffer->flags |= DRF_DEF(LOG, _BUFFER_FLAGS, _PRESERVE, _YES);
+        }
+    }
+}
+
+static NvBool findPreservedNvlogBuffer(NvU32 tag, NvU32 gpuInstance, NVLOG_BUFFER_HANDLE *pHandle)
+{
+    NVLOG_BUFFER_HANDLE handle = 0;
+    NV_STATUS status = nvlogGetBufferHandleFromTag(tag, &handle);
+
+    if (status != NV_OK)
+        return NV_FALSE;
+
+    NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[handle];
+    if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) &&
+        DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance &&
+        (pNvLogBuffer->pos < pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64)))
+    {
+        *pHandle = handle;
+        return NV_TRUE;
+    }
+
+    return NV_FALSE;
+}
+
 #endif // LIBOS_LOG_TO_NVLOG

 /**
@@ -1211,39 +1268,60 @@ void libosLogAddLogEx(LIBOS_LOG_DECODE *logDecode, void *buffer, NvU64 bufferSiz
    pLog->hNvLogWrap   = 0;
    pLog->bNvLogNoWrap = NV_FALSE;

+    pLog->bDidPush             = NV_FALSE;
+    pLog->preservedNoWrapPos   = 0;
+
    LIBOS_LOG_NVLOG_BUFFER *pNoWrapBuf;
+    NvU32 tag = LIBOS_LOG_NVLOG_BUFFER_TAG(logDecode->sourceName, i * 2);
+    NvBool bFoundPreserved = findPreservedNvlogBuffer(tag, gpuInstance, &pLog->hNvLogNoWrap);

-    status = nvlogAllocBuffer(
-        bufferSize + NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data), libosNoWrapBufferFlags,
-        LIBOS_LOG_NVLOG_BUFFER_TAG(logDecode->sourceName, i * 2),
-        &pLog->hNvLogNoWrap);
-
-    if (status == NV_OK)
+    if (!bFoundPreserved)
    {
-        pNoWrapBuf = (LIBOS_LOG_NVLOG_BUFFER *)NvLogLogger.pBuffers[pLog->hNvLogNoWrap]->data;
-        if (name)
+        status = nvlogAllocBuffer(
+            bufferSize + NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data), libosNoWrapBufferFlags,
+            tag,
+            &pLog->hNvLogNoWrap);
+
+        if (status == NV_OK)
        {
-            portStringCopy(
-                pNoWrapBuf->taskPrefix, sizeof pNoWrapBuf->taskPrefix, name, sizeof pNoWrapBuf->taskPrefix);
+            pNoWrapBuf = (LIBOS_LOG_NVLOG_BUFFER *)NvLogLogger.pBuffers[pLog->hNvLogNoWrap]->data;
+            if (name)
+            {
+                portStringCopy(
+                    pNoWrapBuf->taskPrefix, sizeof pNoWrapBuf->taskPrefix, name, sizeof pNoWrapBuf->taskPrefix);
+            }
+
+            pNoWrapBuf->gpuArch = gpuArch;
+            pNoWrapBuf->gpuImpl = gpuImpl;
+
+            NvLogLogger.pBuffers[pLog->hNvLogNoWrap]->pos = sizeof(NvU64); // offset to account for put pointer
+            pLog->bNvLogNoWrap                            = NV_TRUE;
+        }
+        else
+        {
+            printf("nvlogAllocBuffer nowrap failed\n");
        }
-
-        pNoWrapBuf->gpuArch = gpuArch;
-        pNoWrapBuf->gpuImpl = gpuImpl;
-
-        NvLogLogger.pBuffers[pLog->hNvLogNoWrap]->pos = sizeof(NvU64); // offset to account for put pointer
-        pLog->bNvLogNoWrap                            = NV_TRUE;
    }
    else
    {
-        printf("nvlogAllocBuffer nowrap failed\n");
+        pLog->bNvLogNoWrap = NV_TRUE;
+        pLog->preservedNoWrapPos = NvLogLogger.pBuffers[pLog->hNvLogNoWrap]->pos;
+
+        //
+        // The 0th NvU64 is the last value of put pointer from the physical log buffer, which is
+        // the number of NvU64 log buffer elements in it plus one.
+        // Subtract one NvU64 from it to avoid off-by-one error.
+        //
+        if (pLog->preservedNoWrapPos >= sizeof(NvU64))
+            pLog->preservedNoWrapPos -= sizeof(NvU64);
    }

    LIBOS_LOG_NVLOG_BUFFER *pWrapBuf;
+    tag = LIBOS_LOG_NVLOG_BUFFER_TAG(logDecode->sourceName, i * 2 + 1);

    status = nvlogAllocBuffer(
        bufferSize + NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data), libosWrapBufferFlags,
-        LIBOS_LOG_NVLOG_BUFFER_TAG(logDecode->sourceName, i * 2 + 1),
-        &pLog->hNvLogWrap);
+        tag, &pLog->hNvLogWrap);

    if (status == NV_OK)
    {
@@ -1349,13 +1427,13 @@ void libosLogDestroy(LIBOS_LOG_DECODE *logDecode)

        if (pLog->hNvLogNoWrap != 0)
        {
-            nvlogDeallocBuffer(pLog->hNvLogNoWrap);
+            nvlogDeallocBuffer(pLog->hNvLogNoWrap, NV_FALSE);
            pLog->hNvLogNoWrap = 0;
        }

        if (pLog->hNvLogWrap != 0)
        {
-            nvlogDeallocBuffer(pLog->hNvLogWrap);
+            nvlogDeallocBuffer(pLog->hNvLogWrap, NV_FALSE);
            pLog->hNvLogWrap = 0;
        }
    }
--- a/src/common/uproc/os/libos-v2.0.0/debug/logdecode.h
+++ b/src/common/uproc/os/libos-v2.0.0/debug/logdecode.h
@@ -108,6 +108,9 @@ struct LIBOS_LOG_DECODE_LOG
    NvU32 hNvLogNoWrap;  // No wrap buffer captures first records.
    NvU32 hNvLogWrap;    // Wrap buffer captures last records.
    NvBool bNvLogNoWrap; // NV_TRUE if no wrap buffer not full.
+
+    NvBool bDidPush;     // NV_TRUE if this buffer was ever pushed to
+    NvU64 preservedNoWrapPos; // Position in preserved nvlog buffer
 #endif

 #if LIBOS_LOG_DECODE_ENABLE
@@ -170,6 +173,8 @@ void libosLogDestroy(LIBOS_LOG_DECODE *logDecode);

 void libosExtractLogs(LIBOS_LOG_DECODE *logDecode, NvBool bSyncNvLog);

+void libosPreserveLogs(LIBOS_LOG_DECODE *pLogDecode);
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/nvidia-modeset/os-interface/include/nvidia-modeset-os-interface.h
+++ b/src/nvidia-modeset/os-interface/include/nvidia-modeset-os-interface.h
@@ -110,6 +110,7 @@ typedef struct {
    } set_maxval;
 } NvKmsSyncPtOpParams;

+NvBool nvkms_output_rounding_fix(void);

 void   nvkms_call_rm    (void *ops);
 void*  nvkms_alloc      (size_t size,
--- a/src/nvidia-modeset/src/nvkms-evo3.c
+++ b/src/nvidia-modeset/src/nvkms-evo3.c
@@ -1288,6 +1288,8 @@ static void EvoSetOCsc0C5(NVDispEvoPtr pDispEvo, const NvU32 head)

    const float32_t zeroF32 = NvU32viewAsF32(NV_FLOAT_ZERO);
    const float32_t oneF32 = NvU32viewAsF32(NV_FLOAT_ONE);
+    const float32_t inv2048F32 = f32_div(NvU32viewAsF32(NV_FLOAT_HALF),
+                                         NvU32viewAsF32(NV_FLOAT_1024));
    /* divide satCos by the default setting of 1024 */
    const float32_t satCos = f32_div(i32_to_f32(pHeadState->procAmp.satCos),
                                     NvU32viewAsF32(NV_FLOAT_1024));
@@ -1324,6 +1326,12 @@ static void EvoSetOCsc0C5(NVDispEvoPtr pDispEvo, const NvU32 head)
    ocsc0Matrix = nvMultiply3x4Matrix(&satHueMatrix, &ocsc0Matrix);
    ocsc0Matrix = nvMultiply3x4Matrix(&CrYCbtoRGBMatrix, &ocsc0Matrix);

+    if (nvkms_output_rounding_fix()) {
+        ocsc0Matrix.m[0][3] = f32_add(ocsc0Matrix.m[0][3], inv2048F32);
+        ocsc0Matrix.m[1][3] = f32_add(ocsc0Matrix.m[1][3], inv2048F32);
+        ocsc0Matrix.m[2][3] = f32_add(ocsc0Matrix.m[2][3], inv2048F32);
+    }
+
    nvDmaSetStartEvoMethod(pChannel, NVC57D_HEAD_SET_OCSC0COEFFICIENT_C00(head), 12);
    nvDmaSetEvoMethodData(pChannel, DRF_NUM(C57D, _HEAD_SET_OCSC0COEFFICIENT_C00, _VALUE, cscCoefConvertS514(ocsc0Matrix.m[0][0])));
    nvDmaSetEvoMethodData(pChannel, DRF_NUM(C57D, _HEAD_SET_OCSC0COEFFICIENT_C01, _VALUE, cscCoefConvertS514(ocsc0Matrix.m[0][1])));
@@ -1965,11 +1973,13 @@ static inline NvU32 GetMaxPixelsFetchedPerLine(NvU16 inWidth,
 static void SetScalingUsageBoundsOneWindow5(
                                NVDevEvoPtr pDevEvo, NvU32 window,
                                const struct NvKmsScalingUsageBounds *pScaling,
+                                NvBool layerUsable,
                                const NVHwModeViewPortEvo *pViewPort,
                                NVEvoUpdateState *updateState)
 {
    NVEvoChannelPtr pChannel = pDevEvo->core;
    NvU32 setWindowUsageBounds = NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C5;
+    NvU32 maxPixelsFetchedPerLine;

    nvUpdateUpdateState(pDevEvo, updateState, pChannel);

@@ -1981,10 +1991,15 @@ static void SetScalingUsageBoundsOneWindow5(
        DRF_NUM(C57D, _WINDOW_SET_MAX_INPUT_SCALE_FACTOR, _VERTICAL,
                pScaling->maxVDownscaleFactor));

+    if (layerUsable) {
+        maxPixelsFetchedPerLine = GetMaxPixelsFetchedPerLine(pViewPort->in.width,
+                                                   pScaling->maxHDownscaleFactor);
+    } else {
+        maxPixelsFetchedPerLine = 0;
+    }
+
    setWindowUsageBounds |=
-        (DRF_NUM(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE,
-                 GetMaxPixelsFetchedPerLine(pViewPort->in.width,
-                 pScaling->maxHDownscaleFactor))) |
+        (DRF_NUM(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE,maxPixelsFetchedPerLine)) |
        (pScaling->vTaps >= NV_EVO_SCALER_5TAPS ?
            DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _INPUT_SCALER_TAPS, _TAPS_5) :
            DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _INPUT_SCALER_TAPS, _TAPS_2)) |
@@ -2056,8 +2071,9 @@ static NvBool EvoSetUsageBoundsC5(NVDevEvoPtr pDevEvo, NvU32 sd, NvU32 head,
    needCoreUpdate = EvoSetUsageBounds3(pDevEvo, sd, head, pUsage, updateState);

    for (layer = 0; layer < pDevEvo->head[head].numLayers; layer++) {
-        if (!nvEvoScalingUsageBoundsEqual(&pCurrentUsage->layer[layer].scaling,
-                                          &pUsage->layer[layer].scaling)) {
+        if ((pCurrentUsage->layer[layer].usable != pUsage->layer[layer].usable) ||
+            (!nvEvoScalingUsageBoundsEqual(&pCurrentUsage->layer[layer].scaling,
+                                           &pUsage->layer[layer].scaling))) {
            const NVHwModeViewPortEvo *pViewPort =
                &pDevEvo->gpus[sd].pDispEvo->headState[head].timings.viewPort;

@@ -2066,6 +2082,7 @@ static NvBool EvoSetUsageBoundsC5(NVDevEvoPtr pDevEvo, NvU32 sd, NvU32 head,
                NV_EVO_CHANNEL_MASK_WINDOW_NUMBER(
                    pDevEvo->head[head].layer[layer]->channelMask),
                &pUsage->layer[layer].scaling,
+                pUsage->layer[layer].usable,
                pViewPort,
                updateState);
            needCoreUpdate = TRUE;
@@ -4383,7 +4400,9 @@ static void EvoSetLUTContextDmaC5(const NVDispEvoRec *pDispEvo,

    nvDmaSetStartEvoMethod(pChannel, NVC57D_HEAD_SET_OLUT_CONTROL(head), 1);
    nvDmaSetEvoMethodData(pChannel,
-        DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _INTERPOLATE, _ENABLE) |
+        (!nvkms_output_rounding_fix() ?
+            DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _INTERPOLATE, _ENABLE) :
+            DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _INTERPOLATE, _DISABLE)) |
        DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _MIRROR, _DISABLE) |
        DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _MODE, _DIRECT10) |
        DRF_NUM(C57D, _HEAD_SET_OLUT_CONTROL, _SIZE, NV_LUT_VSS_HEADER_SIZE +
@@ -5234,7 +5253,7 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head,

    /*
     * Program MAX_PIXELS_FETCHED_PER_LINE window usage bounds
-     * for each window that’s attached to the head.
+     * for each window that is attached to the head.
     *
     * Precomp will clip the post-scaled window to the input viewport, reverse-scale
     * this cropped size back to the input surface domain, and isohub will fetch
@@ -5242,6 +5261,10 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head,
     * so the MAX_PIXELS_FETCHED_PER_LINE will be bounded by the input viewport
     * width. SetScalingUsageBoundsOneWindow5() will take care of updating
     * MAX_PIXELS_FETCHED_PER_LINE, if window scaling is enabled later.
+     *
+     * Program MAX_PIXELS_FETCHED_PER_LINE for each window that is attached to
+     * head. For Turing+, SetScalingUsageBoundsOneWindow5() will take care of
+     * programming window usage bounds only for the layers/windows in use.
     */
    setWindowUsageBounds |=
        DRF_NUM(C37D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE,
--- a/src/nvidia/arch/nvalloc/common/inc/nvcst.h
+++ b/src/nvidia/arch/nvalloc/common/inc/nvcst.h
@@ -153,6 +153,7 @@ CSINFO chipsetInfo[] =
    {PCI_VENDOR_ID_INTEL,       0xA14A, CS_INTEL_A145,      "SkyLake C232",         Intel_A145_setupFunc},
    {PCI_VENDOR_ID_INTEL,       0xA14D, CS_INTEL_A145,      "SkyLake-H",            Intel_A145_setupFunc},
    {PCI_VENDOR_ID_INTEL,       0xA244, CS_INTEL_A145,      "SkyLake C620",         Intel_A145_setupFunc},
+    {PCI_VENDOR_ID_INTEL,       0xA1C8, CS_INTEL_A145,      "SkyLake C620",         Intel_A145_setupFunc},
    {PCI_VENDOR_ID_INTEL,       0x8D47, CS_INTEL_8D47,      "IntelX99",             Intel_8D47_setupFunc},
    {PCI_VENDOR_ID_INTEL,       0x8D44, CS_INTEL_8D47,      "IntelC612",            Intel_8D44_setupFunc},
    {PCI_VENDOR_ID_INTEL,       0xA2C5, CS_INTEL_A2C5,      "IntelZ270",            Intel_A2C5_setupFunc},
--- a/src/nvidia/arch/nvalloc/unix/include/nv.h
+++ b/src/nvidia/arch/nvalloc/unix/include/nv.h
@@ -619,27 +619,37 @@ typedef enum
 #define NV_GET_NV_STATE(pGpu) \
    (nv_state_t *)((pGpu) ? (pGpu)->pOsGpuInfo : NULL)

-#define IS_REG_OFFSET(nv, offset, length)                                       \
-    (((offset) >= (nv)->regs->cpu_address) &&                                   \
-    (((offset) + ((length)-1)) <=                                               \
-        (nv)->regs->cpu_address + ((nv)->regs->size-1)))
+static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
+{
+    return ((offset >= nv->regs->cpu_address) &&
+            ((offset + (length - 1)) >= offset) &&
+            ((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
+}

-#define IS_FB_OFFSET(nv, offset, length)                                        \
-    (((nv)->fb) && ((offset) >= (nv)->fb->cpu_address) &&                       \
-    (((offset) + ((length)-1)) <= (nv)->fb->cpu_address + ((nv)->fb->size-1)))
+static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
+{
+    return  ((nv->fb) && (offset >= nv->fb->cpu_address) &&
+             ((offset + (length - 1)) >= offset) &&
+             ((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
+}

-#define IS_UD_OFFSET(nv, offset, length)                                        \
-    (((nv)->ud.cpu_address != 0) && ((nv)->ud.size != 0) &&                     \
-    ((offset) >= (nv)->ud.cpu_address) &&                                       \
-    (((offset) + ((length)-1)) <= (nv)->ud.cpu_address + ((nv)->ud.size-1)))
+static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
+{
+    return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
+            (offset >= nv->ud.cpu_address) &&
+            ((offset + (length - 1)) >= offset) &&
+            ((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
+}

-#define IS_IMEM_OFFSET(nv, offset, length)                                      \
-    (((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&                    \
-     ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&                           \
-     ((offset) >= (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&             \
-     (((offset) + ((length) - 1)) <=                                            \
-        (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +                         \
-            ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))
+static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
+{
+    return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
+            (nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
+            (offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
+            ((offset + (length - 1)) >= offset) &&
+            ((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
+                                         (nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
+}

 #define NV_RM_MAX_MSIX_LINES  8

--- a/src/nvidia/arch/nvalloc/unix/src/osapi.c
+++ b/src/nvidia/arch/nvalloc/unix/src/osapi.c
@@ -775,21 +775,24 @@ static NV_STATUS RmAccessRegistry(
        // the passed-in ParmStrLength does not account for '\0'
        ParmStrLength++;

-        if (ParmStrLength > NVOS38_MAX_REGISTRY_STRING_LENGTH)
+        if ((ParmStrLength == 0) || (ParmStrLength > NVOS38_MAX_REGISTRY_STRING_LENGTH))
        {
            RmStatus = NV_ERR_INVALID_STRING_LENGTH;
            goto done;
        }
-
        // get access to client's parmStr
        RMAPI_PARAM_COPY_INIT(parmStrParamCopy, tmpParmStr, clientParmStrAddress, ParmStrLength, 1);
-        parmStrParamCopy.flags |= RMAPI_PARAM_COPY_FLAGS_ZERO_BUFFER;
        RmStatus = rmapiParamsAcquire(&parmStrParamCopy, NV_TRUE);
        if (RmStatus != NV_OK)
        {
            RmStatus = NV_ERR_OPERATING_SYSTEM;
            goto done;
        }
+        if (tmpParmStr[ParmStrLength - 1] != '\0')
+        {
+            RmStatus = NV_ERR_INVALID_ARGUMENT;
+            goto done;
+        }
    }

    if ((AccessType == NVOS38_ACCESS_TYPE_READ_BINARY) ||
@@ -2026,6 +2029,7 @@ static NV_STATUS RmGetAllocPrivate(
    PMEMORY_DESCRIPTOR pMemDesc;
    NvU32 pageOffset;
    NvU64 pageCount;
+    NvU64 endingOffset;
    RsResourceRef *pResourceRef;
    RmResource *pRmResource;
    void *pMemData;
@@ -2086,8 +2090,20 @@ static NV_STATUS RmGetAllocPrivate(
    if (rmStatus != NV_OK)
        goto done;

-    pageCount = ((pageOffset + length) / os_page_size);
-    pageCount += (*pPageIndex + (((pageOffset + length) % os_page_size) ? 1 : 0));
+    if (!portSafeAddU64(pageOffset, length, &endingOffset))
+    {
+        rmStatus = NV_ERR_INVALID_ARGUMENT;
+        goto done;
+    }
+
+    pageCount = (endingOffset / os_page_size);
+
+    if (!portSafeAddU64(*pPageIndex + ((endingOffset % os_page_size) ? 1 : 0),
+                        pageCount, &pageCount))
+    {
+        rmStatus = NV_ERR_INVALID_ARGUMENT;
+        goto done;
+    }

    if (pageCount > NV_RM_PAGES_TO_OS_PAGES(pMemDesc->PageCount))
    {
--- a/src/nvidia/arch/nvalloc/unix/src/osinit.c
+++ b/src/nvidia/arch/nvalloc/unix/src/osinit.c
@@ -362,10 +362,6 @@ osHandleGpuLost
    pmc_boot_0 = NV_PRIV_REG_RD32(nv->regs->map_u, NV_PMC_BOOT_0);
    if (pmc_boot_0 != nvp->pmc_boot_0)
    {
-        RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
-        NV2080_CTRL_GPU_GET_OEM_BOARD_INFO_PARAMS *pBoardInfoParams;
-        NV_STATUS status;
-
        //
        // This doesn't support PEX Reset and Recovery yet.
        // This will help to prevent accessing registers of a GPU
@@ -376,24 +372,11 @@ osHandleGpuLost

        NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "GPU has fallen off the bus.\n");

-        pBoardInfoParams = portMemAllocNonPaged(sizeof(*pBoardInfoParams));
-        if (pBoardInfoParams != NULL)
+        if (pGpu->boardInfo != NULL && pGpu->boardInfo->serialNumber[0] != '\0')
        {
-            portMemSet(pBoardInfoParams, 0, sizeof(*pBoardInfoParams));
-
-            status = pRmApi->Control(pRmApi, nv->rmapi.hClient,
-                                     nv->rmapi.hSubDevice,
-                                     NV2080_CTRL_CMD_GPU_GET_OEM_BOARD_INFO,
-                                     pBoardInfoParams,
-                                     sizeof(*pBoardInfoParams));
-            if (status == NV_OK)
-            {
-                NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
-                              "GPU serial number is %s.\n",
-                              pBoardInfoParams->serialNumber);
-            }
-
-            portMemFree(pBoardInfoParams);
+            NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
+                          "GPU serial number is %s.\n",
+                          pGpu->boardInfo->serialNumber);
        }

        gpuSetDisconnectedProperties(pGpu);
--- a/src/nvidia/generated/g_gpu_nvoc.h
+++ b/src/nvidia/generated/g_gpu_nvoc.h
@@ -60,6 +60,7 @@ typedef struct GPUATTACHARG GPUATTACHARG;
 * */
 #include "ctrl/ctrl0080/ctrl0080gpu.h" // NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS (form hal)
 #include "ctrl/ctrl2080/ctrl2080internal.h" // NV2080_CTRL_CMD_INTERNAL_MAX_BSPS/NVENCS
+#include "ctrl/ctrl2080/ctrl2080ecc.h"
 #include "ctrl/ctrl2080/ctrl2080nvd.h"
 #include "class/cl2080.h"
 #include "class/cl90cd.h"
--- a/src/nvidia/generated/g_kernel_gsp_nvoc.h
+++ b/src/nvidia/generated/g_kernel_gsp_nvoc.h
@@ -301,6 +301,7 @@ struct KernelGsp {
    LIBOS_LOG_DECODE logDecode;
    RM_LIBOS_LOG_MEM rmLibosLogMem[2];
    void *pLogElf;
+    NvBool bInInit;
    MEMORY_DESCRIPTOR *pMemDesc_simAccessBuf;
    SimAccessBuffer *pSimAccessBuf;
    NvP64 pSimAccessBufPriv;
--- a/src/nvidia/generated/g_nv_name_released.h
+++ b/src/nvidia/generated/g_nv_name_released.h
@@ -815,6 +815,17 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x20B6, 0x1492, 0x10de, "NVIDIA PG506-232" },
    { 0x20B7, 0x1532, 0x10de, "NVIDIA A30" },
    { 0x20F1, 0x145f, 0x10de, "NVIDIA A100-PCIE-40GB" },
+    { 0x20F3, 0x179b, 0x10de, "NVIDIA A800-SXM4-80GB" },
+    { 0x20F3, 0x179c, 0x10de, "NVIDIA A800-SXM4-80GB" },
+    { 0x20F3, 0x179d, 0x10de, "NVIDIA A800-SXM4-80GB" },
+    { 0x20F3, 0x179e, 0x10de, "NVIDIA A800-SXM4-80GB" },
+    { 0x20F3, 0x179f, 0x10de, "NVIDIA A800-SXM4-80GB" },
+    { 0x20F3, 0x17a0, 0x10de, "NVIDIA A800-SXM4-80GB" },
+    { 0x20F3, 0x17a1, 0x10de, "NVIDIA A800-SXM4-80GB" },
+    { 0x20F3, 0x17a2, 0x10de, "NVIDIA A800-SXM4-80GB" },
+    { 0x20F5, 0x1799, 0x10de, "NVIDIA A800 80GB PCIe" },
+    { 0x20F5, 0x179a, 0x10de, "NVIDIA A800 80GB PCIe LC" },
+    { 0x20F6, 0x17a3, 0x10de, "NVIDIA A800 40GB PCIe" },
    { 0x2182, 0x0000, 0x0000, "NVIDIA GeForce GTX 1660 Ti" },
    { 0x2184, 0x0000, 0x0000, "NVIDIA GeForce GTX 1660" },
    { 0x2187, 0x0000, 0x0000, "NVIDIA GeForce GTX 1650 SUPER" },
@@ -849,6 +860,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x2203, 0x0000, 0x0000, "NVIDIA GeForce RTX 3090 Ti" },
    { 0x2204, 0x0000, 0x0000, "NVIDIA GeForce RTX 3090" },
    { 0x2206, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080" },
+    { 0x2207, 0x0000, 0x0000, "NVIDIA GeForce RTX 3070 Ti" },
    { 0x2208, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080 Ti" },
    { 0x220A, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080" },
    { 0x220D, 0x0000, 0x0000, "NVIDIA CMP 90HX" },
@@ -900,6 +912,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x24B9, 0x0000, 0x0000, "NVIDIA RTX A3000 12GB Laptop GPU" },
    { 0x24BA, 0x0000, 0x0000, "NVIDIA RTX A4500 Laptop GPU" },
    { 0x24BB, 0x0000, 0x0000, "NVIDIA RTX A3000 12GB Laptop GPU" },
+    { 0x24C9, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Ti" },
    { 0x24DC, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080 Laptop GPU" },
    { 0x24DD, 0x0000, 0x0000, "NVIDIA GeForce RTX 3070 Laptop GPU" },
    { 0x24E0, 0x0000, 0x0000, "NVIDIA GeForce RTX 3070 Ti Laptop GPU" },
@@ -915,6 +928,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x2531, 0x151d, 0x103c, "NVIDIA RTX A2000" },
    { 0x2531, 0x151d, 0x10de, "NVIDIA RTX A2000" },
    { 0x2531, 0x151d, 0x17aa, "NVIDIA RTX A2000" },
+    { 0x2544, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060" },
    { 0x2560, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Laptop GPU" },
    { 0x2563, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 Ti Laptop GPU" },
    { 0x2571, 0x1611, 0x1028, "NVIDIA RTX A2000 12GB" },
--- a/src/nvidia/inc/libraries/nvlog/nvlog.h
+++ b/src/nvidia/inc/libraries/nvlog/nvlog.h
@@ -86,8 +86,9 @@ NV_STATUS nvlogAllocBuffer(NvU32 size, NvU32 flags, NvU32 tag, NVLOG_BUFFER_HAND
 * @brief Deallocate a buffer with the given handle
 *
 * @param[in]   hBuffer     Handle of the buffer to deallocate
+ * @param[in]   bDeallocPreserved Deallocate preserved buffers
 */
-void nvlogDeallocBuffer(NVLOG_BUFFER_HANDLE hBuffer);
+void nvlogDeallocBuffer(NVLOG_BUFFER_HANDLE hBuffer, NvBool bDeallocPreserved);

 /**
 * @brief Write to a buffer with the given handle
--- a/src/nvidia/kernel/inc/vgpu/sdk-structures.h
+++ b/src/nvidia/kernel/inc/vgpu/sdk-structures.h
@@ -123,6 +123,7 @@ typedef struct vmiopd_SM_info {
 #define NV2080_CTRL_INTERNAL_MAX_TPC_PER_GPC_COUNT_v1C_03                       10
 #define NV2080_CTRL_INTERNAL_GR_MAX_GPC_v1C_03                                  12
 #define NV2080_CTRL_MC_GET_STATIC_INTR_TABLE_MAX_v1E_09                         32
+#define NV2080_CTRL_PERF_GPUMON_SAMPLE_COUNT_PERFMON_UTIL_v1F_0E                72

 // Defined this intermediate RM-RPC structure for making RPC call from Guest as
 // we have the restriction of passing max 4kb of data to plugin and the
--- a/src/nvidia/kernel/vgpu/nv/rpc.c
+++ b/src/nvidia/kernel/vgpu/nv/rpc.c
@@ -265,8 +265,11 @@ static NV_STATUS _issueRpcLarge
    // should not be called in broadcast mode
    NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE);

+    //
    // Copy the initial buffer
-    entryLength = NV_MIN(bufSize, pRpc->maxRpcSize);
+    // Temporary black magic WAR for bug 3594082: reducing the size by 1
+    //
+    entryLength = NV_MIN(bufSize, pRpc->maxRpcSize - 1);

    if ((NvU8 *)vgpu_rpc_message_header_v != pBuf8)
        portMemCopy(vgpu_rpc_message_header_v, entryLength, pBuf8, entryLength);
@@ -291,8 +294,11 @@ static NV_STATUS _issueRpcLarge
    remainingSize -= entryLength;
    pBuf8   += entryLength;

+    //
    // Copy the remaining buffers
-    entryLength = pRpc->maxRpcSize - sizeof(rpc_message_header_v);
+    // Temporary black magic WAR for bug 3594082: reducing the size by 1
+    //
+    entryLength = pRpc->maxRpcSize - sizeof(rpc_message_header_v) - 1;
    while (remainingSize != 0)
    {
        if (entryLength > remainingSize)
--- a/src/nvidia/src/kernel/diagnostics/nv_debug_dump.c
+++ b/src/nvidia/src/kernel/diagnostics/nv_debug_dump.c
@@ -448,6 +448,7 @@ nvdDumpDebugBuffers_IMPL
    NvP64 pUmdBuffer = NvP64_NULL;
    NvP64 priv = NvP64_NULL;
    NvU32 bufSize = 0;
+    NvU8 *dataBuffer = NULL;

    status = prbEncNestedStart(pPrbEnc, NVDEBUG_NVDUMP_DCL_MSG);
    if (status != NV_OK)
@@ -463,7 +464,20 @@ nvdDumpDebugBuffers_IMPL
        if (status != NV_OK)
            break;

-        status = prbAppendSubMsg(pPrbEnc, pCurrent->tag, NvP64_VALUE(pUmdBuffer), bufSize);
+        dataBuffer = (NvU8 *) portMemAllocStackOrHeap(bufSize);
+        if (dataBuffer == NULL)
+        {
+            status = NV_ERR_NO_MEMORY;
+            break;
+        }
+
+        // Copy UmdBuffer to prevent data races
+        portMemCopy(dataBuffer, bufSize, pUmdBuffer, bufSize);
+        portAtomicMemoryFenceFull();
+
+        status = prbAppendSubMsg(pPrbEnc, pCurrent->tag, dataBuffer, bufSize);
+
+        portMemFreeStackOrHeap(dataBuffer);

        // Unmap DebugBuffer address
        memdescUnmap(pCurrent->pMemDesc, NV_TRUE, // Kernel mapping?
@@ -522,6 +536,24 @@ prbAppendSubMsg
        header = (NVDUMP_SUB_ALLOC_HEADER *)pCurrent;
        subAlloc = pCurrent + sizeof(NVDUMP_SUB_ALLOC_HEADER);

+        // Check for out-of-bounds buffer access
+        if (pCurrent < buffer || subAlloc > (buffer + size))
+        {
+            status = NV_ERR_INVALID_ARGUMENT;
+            goto done;
+        }
+
+        if (!portSafeSubU16(header->end, header->start, (NvU16 *) &subMsgLen))
+        {
+            status = NV_ERR_INVALID_ARGUMENT;
+            goto done;
+        }
+
+        if ((subAlloc + subMsgLen) >= (buffer + size))
+        {
+            status = NV_ERR_INSUFFICIENT_RESOURCES;
+            goto done;
+        }
        // If valid, copy contents
        if (header->flags & NVDUMP_SUB_ALLOC_VALID)
        {
--- a/src/nvidia/src/kernel/diagnostics/nvlog.c
+++ b/src/nvidia/src/kernel/diagnostics/nvlog.c
@@ -103,7 +103,7 @@ nvlogDestroy()
    tlsShutdown();
    for (i = 0; i < NVLOG_MAX_BUFFERS; i++)
    {
-        nvlogDeallocBuffer(i);
+        nvlogDeallocBuffer(i, NV_TRUE);
    }
    if (NvLogLogger.mainLock != NULL)
    {
@@ -261,7 +261,8 @@ nvlogAllocBuffer
 void
 nvlogDeallocBuffer
 (
-    NVLOG_BUFFER_HANDLE hBuffer
+    NVLOG_BUFFER_HANDLE hBuffer,
+    NvBool bDeallocPreserved
 )
 {
    NVLOG_BUFFER *pBuffer;
@@ -271,6 +272,12 @@ nvlogDeallocBuffer

    pBuffer = NvLogLogger.pBuffers[hBuffer];

+    if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pBuffer->flags) &&
+        !bDeallocPreserved)
+    {
+        return;
+    }
+
    pBuffer->flags = FLD_SET_DRF(LOG_BUFFER, _FLAGS, _DISABLED,
                                 _YES, pBuffer->flags);

--- a/src/nvidia/src/kernel/gpu/bus/arch/maxwell/kern_bus_gm107.c
+++ b/src/nvidia/src/kernel/gpu/bus/arch/maxwell/kern_bus_gm107.c
@@ -2502,15 +2502,19 @@ kbusFlushSingle_GM107
            if (IS_GSP_CLIENT(pGpu))
            {
                //
-                // on GSP client, we only support PCIE_READ to do flush
-                // a sysmembar flush should call kbusSendSysmembarSingle_HAL explicitly
+                // on GSP client, we should use PCIE_READ to do video memory flush.
+                // A sysmembar flush that touches registers is done through RPC and has
+                // lower effeciency.  For cases where it needs sysmembar, the caller site
+                // should use kbusSendSysmembarSingle_HAL explicitly.
                //
-                NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_PATH);
-            }
-            else
-            {
-                return kbusSendSysmembarSingle_HAL(pGpu, pKernelBus);
+                NV_ASSERT(0);
+
+                // This will dump a stack trace to assist debug on certain
+                // platforms.
+                osAssertFailed();
            }
+
+            return kbusSendSysmembarSingle_HAL(pGpu, pKernelBus);
        }
    }

--- a/src/nvidia/src/kernel/gpu/bus/kern_bus_vbar2.c
+++ b/src/nvidia/src/kernel/gpu/bus/kern_bus_vbar2.c
@@ -862,6 +862,11 @@ kbusMapBar2Aperture_SCRATCH
    NvU32              flags
 )
 {
+    if (pMemDesc->Size >= NV_U32_MAX)
+    {
+        return NULL;
+    }
+
    return portMemAllocNonPaged((NvU32)pMemDesc->Size);
 }

--- a/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c
+++ b/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c
@@ -1075,6 +1075,7 @@ kchannelMap_IMPL
    RmClient *pRmClient = dynamicCast(pRsClient, RmClient);
    GpuResource *pGpuResource;

+    NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_OBJECT);
    NV_ASSERT_OR_RETURN(!pKernelChannel->bClientAllocatedUserD, NV_ERR_INVALID_REQUEST);

    rmStatus = gpuresGetByDeviceOrSubdeviceHandle(pRsClient,
@@ -3750,6 +3751,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
    Memory *pMemory;
    ContextDma *pContextDma;
    NvU32 addressSpace;
+    NvU64 notificationBufferSize;
    NV_STATUS status;

    hNotifier = pKernelChannel->hErrorContext;
@@ -3758,6 +3760,13 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
    NV_CHECK_OR_RETURN(LEVEL_INFO, index != NV_CHANNELGPFIFO_NOTIFICATION_TYPE_ERROR,
                     NV_ERR_INVALID_ARGUMENT);

+    // Check for integer overflows
+    if (((index + 1) < index) ||
+        !portSafeMulU64(index + 1, sizeof(NvNotification), &notificationBufferSize))
+    {
+        return NV_ERR_OUT_OF_RANGE;
+    }
+
    status = deviceGetByInstance(pClient, gpuGetDeviceInstance(pGpu), &pDevice);
    if (status != NV_OK)
        return NV_ERR_INVALID_DEVICE;
@@ -3766,7 +3775,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
    {
        addressSpace = memdescGetAddressSpace(pMemory->pMemDesc);

-        NV_CHECK_OR_RETURN(LEVEL_INFO, pMemory->Length >= ((index + 1) * sizeof(NvNotification)),
+        NV_CHECK_OR_RETURN(LEVEL_INFO, pMemory->Length >= notificationBufferSize,
                         NV_ERR_OUT_OF_RANGE);
        switch (addressSpace)
        {
@@ -3784,7 +3793,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
                                         &pDmaMappingInfo),
                    NV_ERR_GENERIC);

-                NV_CHECK_OR_RETURN(LEVEL_INFO, pDmaMappingInfo->pMemDesc->Size >= ((index + 1) * sizeof(NvNotification)),
+                NV_CHECK_OR_RETURN(LEVEL_INFO, pDmaMappingInfo->pMemDesc->Size >= notificationBufferSize,
                                 NV_ERR_OUT_OF_RANGE);
                break;
            }
@@ -3799,7 +3808,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
    }
    else if (NV_OK == ctxdmaGetByHandle(pClient, hNotifier, &pContextDma))
    {
-        NV_CHECK_OR_RETURN(LEVEL_INFO, pContextDma->Limit >= (((index + 1) * sizeof(NvNotification)) - 1),
+        NV_CHECK_OR_RETURN(LEVEL_INFO, pContextDma->Limit >= (notificationBufferSize - 1),
                         NV_ERR_OUT_OF_RANGE);
    }
    else
--- a/src/nvidia/src/kernel/gpu/gpu.c
+++ b/src/nvidia/src/kernel/gpu/gpu.c
@@ -1923,26 +1923,6 @@ gpuStatePreInit_IMPL
        }
    }

-    pGpu->boardInfo = portMemAllocNonPaged(sizeof(*pGpu->boardInfo));
-    if (pGpu->boardInfo)
-    {
-        // To avoid potential race of xid reporting with the control, zero it out
-        portMemSet(pGpu->boardInfo, '\0', sizeof(*pGpu->boardInfo));
-
-        RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
-
-        if (pRmApi->Control(pRmApi,
-                        pGpu->hInternalClient,
-                        pGpu->hInternalSubdevice,
-                        NV2080_CTRL_CMD_GPU_GET_OEM_BOARD_INFO,
-                        pGpu->boardInfo,
-                        sizeof(*pGpu->boardInfo)) != NV_OK)
-        {
-            portMemFree(pGpu->boardInfo);
-            pGpu->boardInfo = NULL;
-        }
-    }
-
    return rmStatus;
 }

@@ -2291,6 +2271,26 @@ gpuStatePostLoad
            goto gpuStatePostLoad_exit;
    }

+    pGpu->boardInfo = portMemAllocNonPaged(sizeof(*pGpu->boardInfo));
+    if (pGpu->boardInfo)
+    {
+        // To avoid potential race of xid reporting with the control, zero it out
+        portMemSet(pGpu->boardInfo, '\0', sizeof(*pGpu->boardInfo));
+
+        RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
+
+        if(pRmApi->Control(pRmApi,
+                           pGpu->hInternalClient,
+                           pGpu->hInternalSubdevice,
+                           NV2080_CTRL_CMD_GPU_GET_OEM_BOARD_INFO,
+                           pGpu->boardInfo,
+                           sizeof(*pGpu->boardInfo)) != NV_OK)
+        {
+            portMemFree(pGpu->boardInfo);
+            pGpu->boardInfo = NULL;
+        }
+    }
+
 gpuStatePostLoad_exit:
    return rmStatus;
 }
@@ -2326,6 +2326,9 @@ gpuStatePreUnload
    NvU32               curEngDescIdx;
    NV_STATUS           rmStatus = NV_OK;

+    portMemFree(pGpu->boardInfo);
+    pGpu->boardInfo = NULL;
+
    engDescriptorList = gpuGetUnloadEngineDescriptors(pGpu);
    numEngDescriptors = gpuGetNumEngDescriptors(pGpu);

@@ -2648,9 +2651,6 @@ gpuStateDestroy_IMPL
    _gpuFreeInternalObjects(pGpu);
    gpuDestroyGenericKernelFalconList(pGpu);

-    portMemFree(pGpu->boardInfo);
-    pGpu->boardInfo = NULL;
-
    portMemFree(pGpu->gspSupportedEngines);
    pGpu->gspSupportedEngines = NULL;

--- a/src/nvidia/src/kernel/gpu/gr/kernel_sm_debugger_session_ctrl.c
+++ b/src/nvidia/src/kernel/gpu/gr/kernel_sm_debugger_session_ctrl.c
@@ -229,7 +229,8 @@ _nv8deCtrlCmdReadWriteSurface
            }
            else if (traceArg.aperture == ADDR_FBMEM)
            {
-                memdescCreate(&pMemDesc, pGpu, curSize, 0, NV_TRUE, traceArg.aperture, NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE);
+                NV_ASSERT_OK_OR_RETURN(memdescCreate(&pMemDesc, pGpu, curSize, 0, NV_TRUE,
+                                                     traceArg.aperture, NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE));
                memdescDescribe(pMemDesc, traceArg.aperture, traceArg.pa, curSize);
            }

@@ -684,6 +685,10 @@ NV_STATUS ksmdbgssnCtrlCmdDebugExecRegOps_IMPL
    NV_STATUS status = NV_OK;
    NvBool isClientGspPlugin = NV_FALSE;

+    NV_CHECK_OR_RETURN(LEVEL_ERROR,
+        pParams->regOpCount <= NV83DE_CTRL_GPU_EXEC_REG_OPS_MAX_OPS,
+        NV_ERR_INVALID_ARGUMENT);
+
    // Check if User have permission to access register offset
    NV_CHECK_OK_OR_RETURN(LEVEL_INFO,
        gpuValidateRegOps(pGpu, pParams->regOps, pParams->regOpCount,
@@ -725,9 +730,11 @@ ksmdbgssnCtrlCmdDebugReadBatchMemory_IMPL
    {
        NV_STATUS localStatus = NV_OK;
        NvP64 pData = (NvP64)(((NvU8 *)pParams->pData) + pParams->entries[i].dataOffset);
+        NvU32 endingOffset;

        NV_CHECK_OR_ELSE(LEVEL_ERROR,
-            pParams->entries[i].dataOffset < pParams->dataLength,
+            portSafeAddU32(pParams->entries[i].dataOffset, pParams->entries[i].length, &endingOffset) &&
+            (endingOffset <= pParams->dataLength),
            localStatus = NV_ERR_INVALID_OFFSET;
            goto updateStatus; );

@@ -762,13 +769,18 @@ ksmdbgssnCtrlCmdDebugWriteBatchMemory_IMPL
    NV_STATUS status = NV_OK;
    NvU32 i;

+    NV_CHECK_OR_RETURN(LEVEL_ERROR, pParams->count <= MAX_ACCESS_MEMORY_OPS,
+                       NV_ERR_INVALID_ARGUMENT);
+
    for (i = 0; i < pParams->count; ++i)
    {
        NV_STATUS localStatus = NV_OK;
        NvP64 pData = (NvP64)(((NvU8 *)pParams->pData) + pParams->entries[i].dataOffset);
+        NvU32 endingOffset;

        NV_CHECK_OR_ELSE(LEVEL_ERROR,
-            (pParams->entries[i].dataOffset + pParams->entries[i].length) <= pParams->dataLength,
+            portSafeAddU32(pParams->entries[i].dataOffset, pParams->entries[i].length, &endingOffset) &&
+            (endingOffset <= pParams->dataLength),
            localStatus = NV_ERR_INVALID_OFFSET;
            goto updateStatus; );

--- a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c
+++ b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c
@@ -1047,7 +1047,7 @@ _kgspInitLibosLoggingStructures

        //
        // Setup logging memory for each task.
-        // Use MEMDESC_FLAGS_CPU_ONLY -- to early to call memdescMapIommu.
+        // Use MEMDESC_FLAGS_CPU_ONLY -- too early to call memdescMapIommu.
        //
        NV_ASSERT_OK_OR_GOTO(nvStatus,
            memdescCreate(&pLog->pTaskLogDescriptor,
@@ -1258,6 +1258,8 @@ kgspInitRm_IMPL
        return NV_ERR_INVALID_ARGUMENT;
    }

+    pKernelGsp->bInInit = NV_TRUE;
+
    // Need to hold the GPU instance lock in order to write to the RPC queue
    NV_ASSERT_OK_OR_GOTO(status,
        rmGpuGroupLockAcquire(pGpu->gpuInstance, GPU_LOCK_GRP_SUBDEVICE,
@@ -1278,7 +1280,7 @@ kgspInitRm_IMPL
    {
        KernelGspVbiosImg *pVbiosImg = NULL;

-		// Try and extract a VBIOS image.
+        // Try and extract a VBIOS image.
        status = kgspExtractVbiosFromRom_HAL(pGpu, pKernelGsp, &pVbiosImg);

        if (status == NV_OK)
@@ -1403,6 +1405,14 @@ kgspInitRm_IMPL
    NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, kgspStartLogPolling(pGpu, pKernelGsp), done);

 done:
+    pKernelGsp->bInInit = NV_FALSE;
+
+    if (status != NV_OK)
+    {
+        // Preserve any captured gsp-rm logs
+        libosPreserveLogs(&pKernelGsp->logDecode);
+    }
+
    if (gpusLockedMask != 0)
    {
        rmGpuGroupLockRelease(gpusLockedMask, GPUS_LOCK_FLAGS_NONE);
@@ -1520,7 +1530,7 @@ kgspDumpGspLogs_IMPL
    NvBool bSyncNvLog
 )
 {
-    if (pKernelGsp->pLogElf || bSyncNvLog)
+    if (pKernelGsp->bInInit || pKernelGsp->pLogElf || bSyncNvLog)
        libosExtractLogs(&pKernelGsp->logDecode, bSyncNvLog);
 }

--- a/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c
+++ b/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c
@@ -229,6 +229,10 @@ memdescCreate
            if (pMemoryManager && pMemoryManager->sysmemPageSize)
            {
                allocSize = RM_ALIGN_UP(allocSize, pMemoryManager->sysmemPageSize);
+                if (allocSize < Size)
+                {
+                    return NV_ERR_INVALID_ARGUMENT;
+                }
            }
        }

@@ -253,7 +257,10 @@ memdescCreate
            if ((AddressSpace == ADDR_SYSMEM || AddressSpace == ADDR_UNKNOWN) &&
                PhysicallyContiguous && (Alignment > RM_PAGE_SIZE))
            {
-                allocSize += (Alignment - RM_PAGE_SIZE);
+                if (!portSafeAddU64(allocSize, (Alignment - RM_PAGE_SIZE), &allocSize))
+                {
+                    return NV_ERR_INVALID_ARGUMENT;
+                }
            }
        }
    }
--- a/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c
+++ b/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c
@@ -1959,6 +1959,7 @@ memmgrFillComprInfo_IMPL
 {
    const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
        kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
+    NvU32 size;

    portMemSet(pComprInfo, 0, sizeof(*pComprInfo));

@@ -1969,10 +1970,15 @@ memmgrFillComprInfo_IMPL

    NV_ASSERT(compTagStartOffset != ~(NvU32)0);

+    if (!portSafeMulU32(pageSize, pageCount, &size))
+    {
+        return NV_ERR_INVALID_ARGUMENT;
+    }
+
    pComprInfo->compPageShift = pMemorySystemConfig->comprPageShift;
    pComprInfo->compTagLineMin = compTagStartOffset;
    pComprInfo->compPageIndexLo = (NvU32)(surfOffset >> pComprInfo->compPageShift);
-    pComprInfo->compPageIndexHi = (NvU32)((surfOffset + pageSize * pageCount - 1) >> pComprInfo->compPageShift);
+    pComprInfo->compPageIndexHi = (NvU32)((surfOffset + size - 1) >> pComprInfo->compPageShift);
    pComprInfo->compTagLineMultiplier = 1;

    return NV_OK;
--- a/src/nvidia/src/kernel/gpu/mmu/arch/ampere/kern_gmmu_ga100.c
+++ b/src/nvidia/src/kernel/gpu/mmu/arch/ampere/kern_gmmu_ga100.c
@@ -89,6 +89,7 @@ kgmmuValidateFabricBaseAddress_GA100
    OBJGPU        *pGpu = ENG_GET_GPU(pKernelGmmu);
    MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
    NvU64 fbSizeBytes;
+    NvU64          fbUpperLimit;

    fbSizeBytes = pMemoryManager->Ram.fbTotalMemSizeMb << 20;

@@ -106,9 +107,14 @@ kgmmuValidateFabricBaseAddress_GA100

    // Align fbSize to mapslot size.
    fbSizeBytes = RM_ALIGN_UP(fbSizeBytes, NVBIT64(36));
+    // Check for integer overflow
+    if (!portSafeAddU64(fabricBaseAddr, fbSizeBytes, &fbUpperLimit))
+    {
+        return NV_ERR_INVALID_ARGUMENT;
+    }

    // Make sure the address range doesn't go beyond the limit, (2K * 64GB).
-    if ((fabricBaseAddr + fbSizeBytes) > NVBIT64(47))
+    if (fbUpperLimit > NVBIT64(47))
    {
        return NV_ERR_INVALID_ARGUMENT;
    }
--- a/src/nvidia/src/kernel/gpu/nvlink/arch/ampere/kernel_nvlink_ga100.c
+++ b/src/nvidia/src/kernel/gpu/nvlink/arch/ampere/kernel_nvlink_ga100.c
@@ -184,6 +184,7 @@ knvlinkValidateFabricBaseAddress_GA100
 {
    MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
    NvU64          fbSizeBytes;
+    NvU64          fbUpperLimit;

    fbSizeBytes = pMemoryManager->Ram.fbTotalMemSizeMb << 20;

@@ -202,8 +203,14 @@ knvlinkValidateFabricBaseAddress_GA100
    // Align fbSize to mapslot size.
    fbSizeBytes = RM_ALIGN_UP(fbSizeBytes, NVBIT64(36));

+    // Check for integer overflow
+    if (!portSafeAddU64(fabricBaseAddr, fbSizeBytes, &fbUpperLimit))
+    {
+        return NV_ERR_INVALID_ARGUMENT;
+    }
+
    // Make sure the address range doesn't go beyond the limit, (2K * 64GB).
-    if ((fabricBaseAddr + fbSizeBytes) > NVBIT64(47))
+    if (fbUpperLimit > NVBIT64(47))
    {
        return NV_ERR_INVALID_ARGUMENT;
    }
--- a/src/nvidia/src/kernel/gpu/timer/timer.c
+++ b/src/nvidia/src/kernel/gpu/timer/timer.c
@@ -560,7 +560,7 @@ NV_STATUS tmrEventScheduleRel_IMPL
    }
    else
    {
-        AbsTime = currentTime + RelTime;
+        NV_CHECK_OR_RETURN(LEVEL_ERROR, portSafeAddU64(currentTime, RelTime, &AbsTime), NV_ERR_INVALID_ARGUMENT);
    }

    return tmrEventScheduleAbs(pTmr, pEvent, AbsTime);
@@ -588,7 +588,7 @@ NV_STATUS tmrScheduleCallbackRel_IMPL
    if (rmStatus != NV_OK)
        return rmStatus;

-    AbsTime = currentTime + RelTime;
+    NV_CHECK_OR_RETURN(LEVEL_ERROR, portSafeAddU64(currentTime, RelTime, &AbsTime), NV_ERR_INVALID_ARGUMENT);

    return tmrScheduleCallbackAbs(pTmr, Proc, Object, AbsTime, Flags, ChId);
 }
--- a/src/nvidia/src/kernel/rmapi/mapping_cpu.c
+++ b/src/nvidia/src/kernel/rmapi/mapping_cpu.c
@@ -181,6 +181,8 @@ memMap_IMPL
    NvBool bBroadcast;
    NvU64 mapLimit;
    NvBool bIsSysmem = NV_FALSE;
+    NvBool bSkipSizeCheck = (DRF_VAL(OS33, _FLAGS, _SKIP_SIZE_CHECK, pMapParams->flags) ==
+                             NVOS33_FLAGS_SKIP_SIZE_CHECK_ENABLE);

    NV_ASSERT_OR_RETURN(RMCFG_FEATURE_KERNEL_RM, NV_ERR_NOT_SUPPORTED);

@@ -242,14 +244,18 @@ memMap_IMPL
        return NV_ERR_INVALID_LIMIT;
    }

+    if (bSkipSizeCheck && (pCallContext->secInfo.privLevel < RS_PRIV_LEVEL_KERNEL))
+    {
+        return NV_ERR_INSUFFICIENT_PERMISSIONS;
+    }
+
    //
    // See bug #140807 and #150889 - we need to pad memory mappings to past their
    // actual allocation size (to PAGE_SIZE+1) because of a buggy ms function so
    // skip the allocation size sanity check so the map operation still succeeds.
    //
-    if ((DRF_VAL(OS33, _FLAGS, _SKIP_SIZE_CHECK, pMapParams->flags) == NVOS33_FLAGS_SKIP_SIZE_CHECK_DISABLE) &&
-        (!portSafeAddU64(pMapParams->offset, pMapParams->length, &mapLimit) ||
-         (mapLimit > pMemoryInfo->Length)))
+    if (!portSafeAddU64(pMapParams->offset, pMapParams->length, &mapLimit) ||
+        (!bSkipSizeCheck && (mapLimit > pMemoryInfo->Length)))
    {
        return NV_ERR_INVALID_LIMIT;
    }
--- a/version.mk
+++ b/version.mk
@@ -1,4 +1,4 @@
-NVIDIA_VERSION = 515.65.07
+NVIDIA_VERSION = 515.86.01

 # This file.
 VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))
Author	SHA1	Message	Date
Andy Ritger	c700e8f91c	515.86.01	2022-11-22 10:04:21 -08:00
Andy Ritger	fe0728787f	515.76	2022-09-20 13:54:59 -07:00