590.48.01

590.44.01
580.105.08
2026-01-28 03:59:48 +00:00 · 2025-12-18 09:16:33 -08:00 · 2025-12-02 15:32:25 -08:00 · 2025-11-04 12:45:59 -08:00
971 changed files with 423839 additions and 408458 deletions
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source

 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 580.94.11.
+version 590.48.01.


 ## How to Build
@@ -17,7 +17,7 @@ as root:

 Note that the kernel modules built here must be used with GSP
 firmware and user-space NVIDIA GPU driver components from a corresponding
-580.94.11 driver release.  This can be achieved by installing
+590.48.01 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,

@@ -185,7 +185,7 @@ table below).
 For details on feature support and limitations, see the NVIDIA GPU driver
 end user README here:

-https://us.download.nvidia.com/XFree86/Linux-x86_64/580.94.11/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/590.48.01/README/kernel_open.html

 For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
 Package for more details.
@@ -959,9 +959,13 @@ Subsystem Device ID.
 | NVIDIA RTX PRO 6000 Blackwell Workstation Edition       | 2BB1 10DE 204B |
 | NVIDIA RTX PRO 6000 Blackwell Workstation Edition       | 2BB1 17AA 204B |
 | NVIDIA RTX PRO 5000 Blackwell                           | 2BB3 1028 204D |
+| NVIDIA RTX PRO 5000 72GB Blackwell                      | 2BB3 1028 227A |
 | NVIDIA RTX PRO 5000 Blackwell                           | 2BB3 103C 204D |
+| NVIDIA RTX PRO 5000 72GB Blackwell                      | 2BB3 103C 227A |
 | NVIDIA RTX PRO 5000 Blackwell                           | 2BB3 10DE 204D |
+| NVIDIA RTX PRO 5000 72GB Blackwell                      | 2BB3 10DE 227A |
 | NVIDIA RTX PRO 5000 Blackwell                           | 2BB3 17AA 204D |
+| NVIDIA RTX PRO 5000 72GB Blackwell                      | 2BB3 17AA 227A |
 | NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 1028 204C |
 | NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 103C 204C |
 | NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 10DE 204C |
@@ -1008,6 +1012,7 @@ Subsystem Device ID.
 | NVIDIA RTX PRO 500 Blackwell Generation Laptop GPU      | 2DB9           |
 | NVIDIA GeForce RTX 5050 Laptop GPU                      | 2DD8           |
 | NVIDIA RTX PRO 500 Blackwell Embedded GPU               | 2DF9           |
+| NVIDIA GB10                                             | 2E12 10DE 21EC |
 | NVIDIA GeForce RTX 5070                                 | 2F04           |
 | NVIDIA GeForce RTX 5070 Ti Laptop GPU                   | 2F18           |
 | NVIDIA RTX PRO 3000 Blackwell Generation Laptop GPU     | 2F38           |
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
 ccflags-y += -I$(src)
 ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
 ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
-ccflags-y += -DNV_VERSION_STRING=\"580.94.11\"
+ccflags-y += -DNV_VERSION_STRING=\"590.48.01\"

 # Include and link Tegra out-of-tree modules.
 ifneq ($(wildcard /usr/src/nvidia/nvidia-oot),)
@@ -187,6 +187,7 @@ NV_CONFTEST_CFLAGS += $(filter -std=%,$(KBUILD_CFLAGS))
 NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
 NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
 NV_CONFTEST_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types,)
+NV_CONFTEST_CFLAGS += $(call cc-option,-fms-extensions,)
 NV_CONFTEST_CFLAGS += -Wno-error

 NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
--- a/kernel-open/common/inc/controller/sbios_table_version.h
+++ b/kernel-open/common/inc/controller/sbios_table_version.h
@@ -0,0 +1,53 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SBIOS_TABLE_VERSION_H
+#define SBIOS_TABLE_VERSION_H
+
+#define CONTROLLER_SBIOS_TABLE_VERSION_10                   (0x10)
+#define CONTROLLER_SBIOS_TABLE_VERSION_20                   (0x20)
+#define CONTROLLER_SBIOS_TABLE_VERSION_21                   (0x21)
+#define CONTROLLER_SBIOS_TABLE_VERSION_22                   (0x22)
+#define CONTROLLER_SBIOS_TABLE_VERSION_23                   (0x23)
+#define CONTROLLER_SBIOS_TABLE_VERSION_24                   (0x24)
+#define CONTROLLER_SBIOS_TABLE_VERSION_25                   (0x25)
+#define CONTROLLER_SBIOS_TABLE_MAX_ENTRIES                  (8)
+
+// NOTE: When adding a new version, make sure to update MAX_VERSION accordingly.
+#define CONTROLLER_SBIOS_TABLE_MAX_VERSION                  (0x25)
+
+/*!
+ * Layout of Controller 2x data used for static config
+ */
+#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_20                          (0x20)
+#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_21                          (0x21)
+#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_22                          (0x22)
+#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_23                          (0x23)
+#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_24                          (0x24)
+#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_25                          (0x25)
+#define NVPCF_CONTROLLER_STATIC_TABLE_MAX_ENTRIES                         (8)
+
+// NOTE: When adding a new version, make sure to update MAX_VERSION accordingly.
+#define NVPCF_CONTROLLER_STATIC_TABLE_MAX_VERSION                         (0x25)
+
+#endif // SBIOS_TABLE_VERSION_H
--- a/kernel-open/common/inc/nv-hash.h
+++ b/kernel-open/common/inc/nv-hash.h
@@ -24,7 +24,6 @@
 #define __NV_HASH_H__

 #include "conftest.h"
-#include "nv-list-helpers.h"
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/hash.h>
--- a/kernel-open/common/inc/nv-ioctl.h
+++ b/kernel-open/common/inc/nv-ioctl.h
@@ -26,8 +26,10 @@
 #define NV_IOCTL_H

 #include <nv-ioctl-numbers.h>
+#include <nv-ioctl-numa.h>
 #include <nvtypes.h>

+
 typedef struct {
    NvU32    domain;        /* PCI domain number   */
    NvU8     bus;           /* PCI bus number      */
@@ -113,7 +115,7 @@ typedef struct nv_ioctl_query_device_intr
 {
    NvU32 intrStatus NV_ALIGN_BYTES(4);
    NvU32 status;
-} nv_ioctl_query_device_intr;
+} nv_ioctl_query_device_intr_t;

 /* system parameters that the kernel driver may use for configuration */
 typedef struct nv_ioctl_sys_params
--- a/kernel-open/common/inc/nv-linux.h
+++ b/kernel-open/common/inc/nv-linux.h
@@ -102,17 +102,6 @@
 #include <linux/dma-buf.h>
 #endif

-#if defined(NV_DRM_AVAILABLE)
-#include <drm/drm_device.h>
-#include <drm/drm_drv.h>
-
-#if defined(NV_DRM_DRMP_H_PRESENT)
-#include <drm/drmP.h>
-#endif
-
-#include <drm/drm_gem.h>
-#endif /* NV_DRM_AVAILABLE */
-
 /* task and signal-related items */
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
@@ -141,8 +130,6 @@
 #include <asm/bitops.h>             /* __set_bit()                      */
 #include <linux/time.h>             /* FD_SET()                         */

-#include "nv-list-helpers.h"
-
 /*
 * Use current->cred->euid, instead of calling current_euid().
 * The latter can pull in the GPL-only debug_lockdep_rcu_enabled()
@@ -274,14 +261,8 @@ extern int nv_pat_mode;
            user_function, NULL, args)
 #endif

-#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL)
-#define NV_CONFIG_PREEMPT_RT 1
-#endif
-
 #define NV_PAGE_COUNT(page) \
  ((unsigned int)page_count(page))
-#define NV_GET_PAGE_FLAGS(page_ptr) \
-  (NV_GET_PAGE_STRUCT(page_ptr->phys_addr)->flags)

 #if !defined(DEBUG) && defined(__GFP_NOWARN)
 #define NV_GFP_KERNEL (GFP_KERNEL | __GFP_NOWARN)
@@ -298,9 +279,9 @@ extern int nv_pat_mode;
 * such as Linux/x86-64; the alternative is to use an IOMMU such
 * as the one implemented with the K8 GART, if available.
 */
-#define NV_GFP_DMA32 (NV_GFP_KERNEL | GFP_DMA32)
+#define NV_GFP_DMA32 (GFP_DMA32)
 #else
-#define NV_GFP_DMA32 (NV_GFP_KERNEL)
+#define NV_GFP_DMA32 0
 #endif

 #if defined(NVCPU_AARCH64) || defined(NVCPU_RISCV64)
@@ -388,11 +369,7 @@ static inline void nv_vfree(void *ptr, NvU64 size)

 static inline void *nv_ioremap(NvU64 phys, NvU64 size)
 {
-#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_PRESENT)
-    void *ptr = ioremap_driver_hardened(phys, size);
-#else
    void *ptr = ioremap(phys, size);
-#endif
    NV_MEMDBG_ADD(ptr, size);
    return ptr;
 }
@@ -405,9 +382,7 @@ static inline void *nv_ioremap_nocache(NvU64 phys, NvU64 size)
 static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
 {
    void *ptr = NULL;
-#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_CACHE_SHARED_PRESENT)
-    ptr = ioremap_cache_shared(phys, size);
-#elif defined(NV_IOREMAP_CACHE_PRESENT)
+#if defined(NV_IOREMAP_CACHE_PRESENT)
    ptr = ioremap_cache(phys, size);
 #else
    return nv_ioremap(phys, size);
@@ -421,9 +396,7 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
 static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
 {
    void *ptr = NULL;
-#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT)
-    ptr = ioremap_driver_hardened_wc(phys, size);
-#elif defined(NV_IOREMAP_WC_PRESENT)
+#if defined(NV_IOREMAP_WC_PRESENT)
    ptr = ioremap_wc(phys, size);
 #else
    return nv_ioremap_nocache(phys, size);
@@ -465,13 +438,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
        NV_MEMDBG_ADD(ptr, size);             \
    }

-#if defined(__GFP_RETRY_MAYFAIL)
 #define NV_GFP_NO_OOM (NV_GFP_KERNEL | __GFP_RETRY_MAYFAIL)
-#elif defined(__GFP_NORETRY)
-#define NV_GFP_NO_OOM (NV_GFP_KERNEL | __GFP_NORETRY)
-#else
-#define NV_GFP_NO_OOM (NV_GFP_KERNEL)
-#endif

 #define NV_KMALLOC_NO_OOM(ptr, size) \
    { \
@@ -528,22 +495,12 @@ static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot)
 #endif
 #endif

-#define NV_GET_CURRENT_PROCESS()        current->tgid
-#define NV_IN_ATOMIC()                  in_atomic()
-#define NV_COPY_TO_USER(to, from, n)    copy_to_user(to, from, n)
-#define NV_COPY_FROM_USER(to, from, n)  copy_from_user(to, from, n)
-
 #define NV_IS_SUSER()                   capable(CAP_SYS_ADMIN)
-#define NV_CLI()                        local_irq_disable()
-#define NV_SAVE_FLAGS(eflags)           local_save_flags(eflags)
-#define NV_RESTORE_FLAGS(eflags)        local_irq_restore(eflags)
-#define NV_MAY_SLEEP()                  (!irqs_disabled() && !in_interrupt() && !NV_IN_ATOMIC())
+#define NV_MAY_SLEEP()                  (!irqs_disabled() && !in_interrupt() && !in_atomic())
 #define NV_MODULE_PARAMETER(x)          module_param(x, int, 0)
 #define NV_MODULE_STRING_PARAMETER(x)   module_param(x, charp, 0)
 #undef  MODULE_PARM

-#define NV_NUM_CPUS()                   num_possible_cpus()
-
 #define NV_HAVE_MEMORY_ENCRYPT_DECRYPT 0

 #if defined(NVCPU_X86_64) && \
@@ -596,7 +553,6 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
 #endif
 }

-#define NV_GET_OFFSET_IN_PAGE(phys_page) offset_in_page(phys_page)
 #define NV_GET_PAGE_STRUCT(phys_page) virt_to_page(__va(phys_page))
 #define NV_VMA_PGOFF(vma)             ((vma)->vm_pgoff)
 #define NV_VMA_SIZE(vma)              ((vma)->vm_end - (vma)->vm_start)
@@ -693,9 +649,9 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
 #define NV_PRINT_AT(nv_debug_level,at)                                           \
    {                                                                            \
        nv_printf(nv_debug_level,                                                \
-            "NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, "                    \
+            "NVRM: VM: %s:%d: 0x%p, %d page(s), count = %lld, "                    \
            "page_table = 0x%p\n",  __FUNCTION__, __LINE__, at,                  \
-            at->num_pages, NV_ATOMIC_READ(at->usage_count),                      \
+            at->num_pages, (long long)atomic64_read(&at->usage_count),                      \
            at->page_table);                                                     \
    }

@@ -711,13 +667,6 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
 # define minor(x) MINOR(x)
 #endif

-#if !defined(PCI_COMMAND_SERR)
-#define PCI_COMMAND_SERR            0x100
-#endif
-#if !defined(PCI_COMMAND_INTX_DISABLE)
-#define PCI_COMMAND_INTX_DISABLE    0x400
-#endif
-
 #ifndef PCI_CAP_ID_EXP
 #define PCI_CAP_ID_EXP 0x10
 #endif
@@ -970,7 +919,7 @@ struct nv_dma_buf
 typedef struct nv_alloc_s {
    struct nv_alloc_s *next;
    struct device     *dev;
-    atomic_t       usage_count;
+    atomic64_t       usage_count;
    struct {
        NvBool contig      : 1;
        NvBool guest       : 1;
@@ -983,6 +932,7 @@ typedef struct nv_alloc_s {
        NvBool unencrypted : 1;
        NvBool coherent    : 1;
        NvBool carveout    : 1;
+        NvBool pool        : 1;
    } flags;
    unsigned int   cache_type;
    unsigned int   num_pages;
@@ -1143,14 +1093,18 @@ typedef struct nv_dma_map_s {
         i++, sm = &dm->mapping.discontig.submaps[i])

 /*
- * On 4K ARM kernels, use max submap size a multiple of 64K to keep nv-p2p happy.
- * Despite 4K OS pages, we still use 64K P2P pages due to dependent modules still using 64K.
- * Instead of using (4G-4K), use max submap size as (4G-64K) since the mapped IOVA range
- * must be aligned at 64K boundary.
+ * On 4K ARM kernels, use max submap size a multiple of 2M to avoid breaking up 2M page size
+ *  sysmem allocations. 
+ *
+ * Instead of using (4G-4K), use max submap size as (4G-2M) since the mapped IOVA range
+ * must be aligned at 2M boundary.
+ *
+ * Bug 5401803: Tracks migrating away from making IOMMU mappings using submaps in favor of
+ * using sg_chain() to chain a single large scatterlist.
 */
 #if defined(CONFIG_ARM64_4K_PAGES)
 #define NV_DMA_U32_MAX_4K_PAGES           ((NvU32)((NV_U32_MAX >> PAGE_SHIFT) + 1))
-#define NV_DMA_SUBMAP_MAX_PAGES           ((NvU32)(NV_DMA_U32_MAX_4K_PAGES - 16))
+#define NV_DMA_SUBMAP_MAX_PAGES           ((NvU32)(NV_DMA_U32_MAX_4K_PAGES - 512))
 #else
 #define NV_DMA_SUBMAP_MAX_PAGES           ((NvU32)(NV_U32_MAX >> PAGE_SHIFT))
 #endif
@@ -1294,7 +1248,8 @@ struct nv_pci_tegra_devfreq_dev;
 typedef struct nv_linux_state_s {
    nv_state_t nv_state;

-    atomic_t usage_count;
+    atomic64_t usage_count;
+
    NvU32    suspend_count;

    struct device  *dev;
@@ -1470,6 +1425,8 @@ typedef struct nv_linux_state_s {

    int (*devfreq_suspend)(struct device *dev);
    int (*devfreq_resume)(struct device *dev);
+    int (*devfreq_enable_boost)(struct device *dev, unsigned int duration);
+    int (*devfreq_disable_boost)(struct device *dev);
 #endif
 } nv_linux_state_t;

@@ -1640,6 +1597,7 @@ extern NvU32 NVreg_EnableUserNUMAManagement;
 extern NvU32 NVreg_RegisterPCIDriver;
 extern NvU32 NVreg_RegisterPlatformDeviceDriver;
 extern NvU32 NVreg_EnableResizableBar;
+extern NvU32 NVreg_TegraGpuPgMask;
 extern NvU32 NVreg_EnableNonblockingOpen;

 extern NvU32 num_probed_nv_devices;
@@ -1669,9 +1627,9 @@ static inline NvBool nv_alloc_release(nv_linux_file_private_t *nvlfp, nv_alloc_t
 {
    NV_PRINT_AT(NV_DBG_MEMINFO, at);

-    if (NV_ATOMIC_DEC_AND_TEST(at->usage_count))
+    if (atomic64_dec_and_test(&at->usage_count))
    {
-        NV_ATOMIC_INC(at->usage_count);
+        atomic64_inc(&at->usage_count);

        at->next = nvlfp->free_list;
        nvlfp->free_list = at;
@@ -1697,10 +1655,7 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
    return flags;
 }

-#define MODULE_BASE_NAME "nvidia"
-#define MODULE_INSTANCE_NUMBER 0
-#define MODULE_INSTANCE_STRING ""
-#define MODULE_NAME MODULE_BASE_NAME MODULE_INSTANCE_STRING
+#define MODULE_NAME "nvidia"

 NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32, const char*);
 NV_STATUS nv_imp_icc_get(nv_state_t *nv);
--- a/kernel-open/common/inc/nv-list-helpers.h
+++ b/kernel-open/common/inc/nv-list-helpers.h
@@ -26,45 +26,12 @@
 #include <linux/list.h>
 #include "conftest.h"

-/*
- * list_first_entry_or_null added by commit 6d7581e62f8b ("list: introduce
- * list_first_entry_or_null") in v3.10 (2013-05-29).
- */
-#if !defined(list_first_entry_or_null)
-    #define list_first_entry_or_null(ptr, type, member) \
-        (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL)
-#endif
-
-/*
- * Added by commit 93be3c2eb337 ("list: introduce list_last_entry(), use
- * list_{first,last}_entry()") in v3.13 (2013-11-12).
- */
-#if !defined(list_last_entry)
-    #define list_last_entry(ptr, type, member) \
-        list_entry((ptr)->prev, type, member)
-#endif
-
 /* list_last_entry_or_null() doesn't actually exist in the kernel */
 #if !defined(list_last_entry_or_null)
    #define list_last_entry_or_null(ptr, type, member) \
        (!list_empty(ptr) ? list_last_entry(ptr, type, member) : NULL)
 #endif

-/*
- * list_prev_entry() and list_next_entry added by commit 008208c6b26f
- * ("list: introduce list_next_entry() and list_prev_entry()") in
- * v3.13 (2013-11-12).
- */
-#if !defined(list_prev_entry)
-    #define list_prev_entry(pos, member) \
-        list_entry((pos)->member.prev, typeof(*(pos)), member)
-#endif
-
-#if !defined(list_next_entry)
-    #define list_next_entry(pos, member) \
-        list_entry((pos)->member.next, typeof(*(pos)), member)
-#endif
-
 #if !defined(NV_LIST_IS_FIRST_PRESENT)
    static inline int list_is_first(const struct list_head *list,
                                    const struct list_head *head)
--- a/kernel-open/common/inc/nv-lock.h
+++ b/kernel-open/common/inc/nv-lock.h
@@ -32,18 +32,6 @@
 #include <linux/semaphore.h>
 #include <linux/sched/signal.h> /* signal_pending */

-#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL)
-typedef raw_spinlock_t            nv_spinlock_t;
-#define NV_DEFINE_SPINLOCK(lock)  DEFINE_RAW_SPINLOCK(lock)
-#define NV_SPIN_LOCK_INIT(lock)   raw_spin_lock_init(lock)
-#define NV_SPIN_LOCK_IRQ(lock)    raw_spin_lock_irq(lock)
-#define NV_SPIN_UNLOCK_IRQ(lock)  raw_spin_unlock_irq(lock)
-#define NV_SPIN_LOCK_IRQSAVE(lock,flags) raw_spin_lock_irqsave(lock,flags)
-#define NV_SPIN_UNLOCK_IRQRESTORE(lock,flags) raw_spin_unlock_irqrestore(lock,flags)
-#define NV_SPIN_LOCK(lock)        raw_spin_lock(lock)
-#define NV_SPIN_UNLOCK(lock)      raw_spin_unlock(lock)
-#define NV_SPIN_UNLOCK_WAIT(lock) raw_spin_unlock_wait(lock)
-#else
 typedef spinlock_t                nv_spinlock_t;
 #define NV_DEFINE_SPINLOCK(lock)  DEFINE_SPINLOCK(lock)
 #define NV_SPIN_LOCK_INIT(lock)   spin_lock_init(lock)
@@ -54,7 +42,6 @@ typedef spinlock_t                nv_spinlock_t;
 #define NV_SPIN_LOCK(lock)        spin_lock(lock)
 #define NV_SPIN_UNLOCK(lock)      spin_unlock(lock)
 #define NV_SPIN_UNLOCK_WAIT(lock) spin_unlock_wait(lock)
-#endif

 #define NV_INIT_MUTEX(mutex) sema_init(mutex, 1)

--- a/kernel-open/common/inc/nv-mm.h
+++ b/kernel-open/common/inc/nv-mm.h
@@ -196,14 +196,33 @@ static inline struct rw_semaphore *nv_mmap_get_lock(struct mm_struct *mm)
 * Commit 45ad9f5290dc updated vma_start_write() to call __vma_start_write().
 */
 void nv_vma_start_write(struct vm_area_struct *);
+
+static inline void nv_vma_flags_set_word(struct vm_area_struct *vma, unsigned long flags)
+{
+    nv_vma_start_write(vma);
+#if defined(NV_VMA_FLAGS_SET_WORD_PRESENT)
+    vma_flags_set_word(&vma->flags, flags);
+#else
+    ACCESS_PRIVATE(vma, __vm_flags) |= flags;
 #endif
+}
+
+static inline void nv_vma_flags_clear_word(struct vm_area_struct *vma, unsigned long flags)
+{
+    nv_vma_start_write(vma);
+#if defined(NV_VMA_FLAGS_SET_WORD_PRESENT)
+    vma_flags_clear_word(&vma->flags, flags);
+#else
+    ACCESS_PRIVATE(vma, __vm_flags) &= ~flags;
+#endif
+}
+#endif // !NV_CAN_CALL_VMA_START_WRITE

 static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
 {
 #if !NV_CAN_CALL_VMA_START_WRITE
-    nv_vma_start_write(vma);
-    ACCESS_PRIVATE(vma, __vm_flags) |= flags;
-#elif defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
+    nv_vma_flags_set_word(vma, flags);
+#elif defined(NV_VM_FLAGS_SET_PRESENT)
    vm_flags_set(vma, flags);
 #else
    vma->vm_flags |= flags;
@@ -213,9 +232,8 @@ static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
 static inline void nv_vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags)
 {
 #if !NV_CAN_CALL_VMA_START_WRITE
-    nv_vma_start_write(vma);
-    ACCESS_PRIVATE(vma, __vm_flags) &= ~flags;
-#elif defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)
+    nv_vma_flags_clear_word(vma, flags);
+#elif defined(NV_VM_FLAGS_SET_PRESENT)
    vm_flags_clear(vma, flags);
 #else
    vma->vm_flags &= ~flags;
--- a/kernel-open/common/inc/nv-msi.h
+++ b/kernel-open/common/inc/nv-msi.h
@@ -47,9 +47,6 @@ void    NV_API_CALL nv_init_msi         (nv_state_t *);
 void    NV_API_CALL nv_init_msix        (nv_state_t *);
 NvS32   NV_API_CALL nv_request_msix_irq (nv_linux_state_t *);

-#define NV_PCI_MSIX_FLAGS         2
-#define NV_PCI_MSIX_FLAGS_QSIZE   0x7FF
-
 static inline void nv_free_msix_irq(nv_linux_state_t *nvl)
 {
    int i;
@@ -67,17 +64,8 @@ static inline int nv_get_max_irq(struct pci_dev *pci_dev)
    NvU16 ctrl;

    cap_ptr = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
-    /*
-     * The 'PCI_MSIX_FLAGS' was added in 2.6.21-rc3 by:
-     * 2007-03-05 f5f2b13129a6541debf8851bae843cbbf48298b7
-     */
-#if defined(PCI_MSIX_FLAGS)
    pci_read_config_word(pci_dev, cap_ptr + PCI_MSIX_FLAGS, &ctrl);
    nvec = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1;
-#else
-    pci_read_config_word(pci_dev, cap_ptr + NV_PCI_MSIX_FLAGS, &ctrl);
-    nvec = (ctrl & NV_PCI_MSIX_FLAGS_QSIZE) + 1;
-#endif

    return nvec;
 }
--- a/kernel-open/common/inc/nv-pgprot.h
+++ b/kernel-open/common/inc/nv-pgprot.h
@@ -73,31 +73,22 @@ extern NvBool nvos_is_chipset_io_coherent(void);

 #define NV_PGPROT_UNCACHED_DEVICE(old_prot)     pgprot_noncached(old_prot)
 #if defined(NVCPU_AARCH64)
-#define NV_PROT_WRITE_COMBINED_DEVICE   (PROT_DEFAULT | PTE_PXN | PTE_UXN |   \
-                                         PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot)                             \
-    __pgprot_modify(old_prot, PTE_ATTRINDX_MASK, NV_PROT_WRITE_COMBINED_DEVICE)
 #define NV_PGPROT_WRITE_COMBINED(old_prot)      NV_PGPROT_UNCACHED(old_prot)
 #define NV_PGPROT_READ_ONLY(old_prot)                                         \
            __pgprot_modify(old_prot, 0, PTE_RDONLY)
 #elif defined(NVCPU_X86_64)
 #define NV_PGPROT_UNCACHED_WEAK(old_prot)       pgprot_noncached_weak(old_prot)
-#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot)                             \
-    pgprot_modify_writecombine(old_prot)
 #define NV_PGPROT_WRITE_COMBINED(old_prot)                                    \
-    NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot)
+    pgprot_modify_writecombine(old_prot)
 #define NV_PGPROT_READ_ONLY(old_prot)                                         \
    __pgprot(pgprot_val((old_prot)) & ~_PAGE_RW)
 #elif defined(NVCPU_RISCV64)
-#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot)                             \
+#define NV_PGPROT_WRITE_COMBINED(old_prot)                                    \
    pgprot_writecombine(old_prot)
-/* Don't attempt to mark sysmem pages as write combined on riscv */
-#define NV_PGPROT_WRITE_COMBINED(old_prot)     old_prot
 #define NV_PGPROT_READ_ONLY(old_prot)                                         \
            __pgprot(pgprot_val((old_prot)) & ~_PAGE_WRITE)
 #else
 /* Writecombine is not supported */
-#undef NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot)
 #undef NV_PGPROT_WRITE_COMBINED(old_prot)
 #define NV_PGPROT_READ_ONLY(old_prot)
 #endif
--- a/kernel-open/common/inc/nv-proto.h
+++ b/kernel-open/common/inc/nv-proto.h
@@ -57,8 +57,6 @@ int         nv_uvm_init                 (void);
 void        nv_uvm_exit                 (void);
 NV_STATUS   nv_uvm_suspend              (void);
 NV_STATUS   nv_uvm_resume               (void);
-void        nv_uvm_notify_start_device  (const NvU8 *uuid);
-void        nv_uvm_notify_stop_device   (const NvU8 *uuid);
 NV_STATUS   nv_uvm_event_interrupt      (const NvU8 *uuid);
 NV_STATUS   nv_uvm_drain_P2P            (const NvU8 *uuid);
 NV_STATUS   nv_uvm_resume_P2P           (const NvU8 *uuid);
--- a/kernel-open/common/inc/nv-time.h
+++ b/kernel-open/common/inc/nv-time.h
@@ -36,6 +36,19 @@
 #define NV_MAX_ISR_DELAY_MS           (NV_MAX_ISR_DELAY_US / 1000)
 #define NV_NSECS_TO_JIFFIES(nsec)     ((nsec) * HZ / 1000000000)

+/*
+ * in_hardirq() was added in v5.11-rc1 (2020-12-15) to replace in_irq().
+ * Fall back to in_irq() for older kernels that don't have in_hardirq().
+ */
+static inline NvBool nv_in_hardirq(void)
+{
+#if defined(in_hardirq)
+    return in_hardirq();
+#else
+    return in_irq();
+#endif
+}
+
 #if !defined(NV_KTIME_GET_RAW_TS64_PRESENT)
 static inline void ktime_get_raw_ts64(struct timespec64 *ts64)
 {
@@ -82,7 +95,7 @@ static inline NV_STATUS nv_sleep_us(unsigned int us)
    ktime_get_raw_ts64(&tm1);
 #endif

-    if (in_irq() && (us > NV_MAX_ISR_DELAY_US))
+    if (nv_in_hardirq() && (us > NV_MAX_ISR_DELAY_US))
        return NV_ERR_GENERIC;

    mdelay_safe_msec = us / 1000;
@@ -127,7 +140,7 @@ static inline NV_STATUS nv_sleep_ms(unsigned int ms)
    tm_start = tm_aux;
 #endif

-    if (in_irq() && (ms > NV_MAX_ISR_DELAY_MS))
+    if (nv_in_hardirq() && (ms > NV_MAX_ISR_DELAY_MS))
    {
        return NV_ERR_GENERIC;
    }
--- a/kernel-open/common/inc/nv.h
+++ b/kernel-open/common/inc/nv.h
@@ -86,6 +86,8 @@ extern const NvBool nv_is_rm_firmware_supported_os;

 #define NV_RM_DEVICE_INTR_ADDRESS 0x100

+#define NV_TEGRA_PCI_IGPU_PG_MASK_DEFAULT 0xFFFFFFFF
+
 /*
 * Clock domain identifier, which is used for fetching the engine
 * load backed by the specified clock domain for Tegra platforms
@@ -413,6 +415,7 @@ typedef struct nv_soc_irq_info_s {

 #define NV_MAX_SOC_IRQS              10
 #define NV_MAX_DPAUX_NUM_DEVICES     4
+#define NV_MAX_DPAUX_DEV_NAME_SIZE   10

 #define NV_MAX_SOC_DPAUX_NUM_DEVICES 4

@@ -429,6 +432,12 @@ typedef struct nv_phys_addr_range
    NvU64 len;
 } nv_phys_addr_range_t;

+typedef struct
+{
+    char vbios_version[15];
+    char firmware_version[64];
+} nv_cached_gpu_info_t;
+
 typedef struct nv_state_t
 {
    void  *priv;                    /* private data */
@@ -465,6 +474,7 @@ typedef struct nv_state_t
    NvU32  num_dpaux_instance;
    NvU32  interrupt_line;
    NvU32  dpaux_irqs[NV_MAX_DPAUX_NUM_DEVICES];
+    char   dpaux_devname[NV_MAX_DPAUX_NUM_DEVICES][NV_MAX_DPAUX_DEV_NAME_SIZE];
    nv_soc_irq_info_t soc_irq_info[NV_MAX_SOC_IRQS];
    NvS32 current_soc_irq;
    NvU32 num_soc_irqs;
@@ -481,6 +491,7 @@ typedef struct nv_state_t
    NvBool is_tegra_pci_igpu;
    NvBool supports_tegra_igpu_rg;
    NvBool is_tegra_pci_igpu_rg_enabled;
+    NvU32 tegra_pci_igpu_pg_mask;

    NvBool primary_vga;

@@ -588,8 +599,12 @@ typedef struct nv_state_t
    /* Console is managed by drm drivers or NVKMS */
    NvBool client_managed_console;

-    /* Bool to check if power management is unsupported */
+    /* Struct to cache the gpu info details */
+    nv_cached_gpu_info_t cached_gpu_info;
+
+    /* Bool to check if power management is supported */
    NvBool is_pm_unsupported;
+
 } nv_state_t;

 #define NVFP_TYPE_NONE       0x0
@@ -651,7 +666,7 @@ typedef struct UvmGpuPagingChannelInfo_tag          *nvgpuPagingChannelInfo_t;
 typedef enum   UvmPmaGpuMemoryType_tag               nvgpuGpuMemoryType_t;
 typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU64, NvU64 *, NvU32, NvU64, NvU64, nvgpuGpuMemoryType_t);
 typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemoryType_t);
-
+typedef struct UvmGpuAccessBitsBufferAlloc_tag      *nvgpuAccessBitBufferAlloc_t;
 /*
 * flags
 */
@@ -988,6 +1003,7 @@ NvBool    NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);

 void      NV_API_CALL nv_get_updated_emu_seg(NvU32 *start, NvU32 *end);
 void      NV_API_CALL nv_get_screen_info(nv_state_t *, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64 *);
+void      NV_API_CALL nv_set_gpu_pg_mask(nv_state_t *);

 struct dma_buf;
 typedef struct nv_dma_buf nv_dma_buf_t;
@@ -1107,16 +1123,15 @@ NvBool     NV_API_CALL  rm_isr                   (nvidia_stack_t *, nv_state_t *
 void       NV_API_CALL  rm_isr_bh                (nvidia_stack_t *, nv_state_t *);
 void       NV_API_CALL  rm_isr_bh_unlocked       (nvidia_stack_t *, nv_state_t *);
 NvBool     NV_API_CALL  rm_is_msix_allowed       (nvidia_stack_t *, nv_state_t *);
+NvBool     NV_API_CALL  rm_wait_for_bar_firewall (nvidia_stack_t *, NvU32 domain, NvU8 bus, NvU8 device, NvU8 function, NvU16 devId, NvU16 subsystemId);
 NV_STATUS  NV_API_CALL  rm_pmu_perfmon_get_load  (nvidia_stack_t *, nv_state_t *, NvU32 *, TEGRASOC_DEVFREQ_CLK);
 NV_STATUS  NV_API_CALL  rm_power_management      (nvidia_stack_t *, nv_state_t *, nv_pm_action_t);
 NV_STATUS  NV_API_CALL  rm_stop_user_channels    (nvidia_stack_t *, nv_state_t *);
 NV_STATUS  NV_API_CALL  rm_restart_user_channels (nvidia_stack_t *, nv_state_t *);
 NV_STATUS  NV_API_CALL  rm_save_low_res_mode     (nvidia_stack_t *, nv_state_t *);
-void       NV_API_CALL  rm_get_vbios_version     (nvidia_stack_t *, nv_state_t *, char *);
 char*      NV_API_CALL  rm_get_gpu_uuid          (nvidia_stack_t *, nv_state_t *);
 const NvU8* NV_API_CALL rm_get_gpu_uuid_raw      (nvidia_stack_t *, nv_state_t *);
 void       NV_API_CALL  rm_set_rm_firmware_requested(nvidia_stack_t *, nv_state_t *);
-void       NV_API_CALL  rm_get_firmware_version  (nvidia_stack_t *, nv_state_t *, char *, NvLength);
 void       NV_API_CALL  rm_cleanup_file_private  (nvidia_stack_t *, nv_state_t *, nv_file_private_t *);
 void       NV_API_CALL  rm_unbind_lock           (nvidia_stack_t *, nv_state_t *);
 NV_STATUS  NV_API_CALL  rm_read_registry_dword   (nvidia_stack_t *, nv_state_t *, const char *, NvU32 *);
--- a/kernel-open/common/inc/nv_mig_types.h
+++ b/kernel-open/common/inc/nv_mig_types.h
@@ -33,6 +33,12 @@ typedef NvU32 MIGDeviceId;

 #define NO_MIG_DEVICE 0L

+/* Convert a MIGDeviceId into a 0-based per-GPU subdevice index. */
+#define MIG_DEVICE_ID_SUBDEV_MASK  0xf0000000
+#define MIG_DEVICE_ID_SUBDEV_SHIFT 28
+
+#define MIG_DEVICE_ID_TO_SUBDEV(migDeviceId) (((migDeviceId) & MIG_DEVICE_ID_SUBDEV_MASK) >> MIG_DEVICE_ID_SUBDEV_SHIFT)
+
 #ifdef __cplusplus
 }
 #endif
--- a/kernel-open/common/inc/nv_uvm_interface.h
+++ b/kernel-open/common/inc/nv_uvm_interface.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2013-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -29,7 +29,7 @@
 #define _NV_UVM_INTERFACE_H_

 // Forward references, to break circular header file dependencies:
-struct UvmOpsUvmEvents;
+struct UvmEventsLinux;

 #if defined(NVIDIA_UVM_ENABLED)

@@ -1008,6 +1008,65 @@ NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(UvmGpuFaultInfo *pFaultInfo,
 */
 NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo,
                                             NvBool bEnable);
+/*******************************************************************************
+    nvUvmInterfaceAccessBitsBufAlloc
+
+    This function allocates a buffer for access bits.
+
+    Arguments:
+        device[IN]           - Device handle associated with the gpu
+        pAccessBitsInfo[OUT] - Information provided by RM for access bits handling
+
+    Error codes:
+      NV_ERR_INVALID_ARGUMENT   - If the parameter/s is invalid.
+      NV_ERR_NO_MEMORY          - If the memory allocation fails.
+      NV_ERR_GENERIC            - Unexpected error. We try hard to
+                                  avoid returning this error code, 
+                                  because it is not very informative.
+*/
+NV_STATUS nvUvmInterfaceAccessBitsBufAlloc(uvmGpuDeviceHandle device,
+                                           UvmGpuAccessBitsBufferAlloc* pAccessBitsInfo);
+
+/*******************************************************************************
+    nvUvmInterfaceAccessBitsBufFree
+
+    This function frees the buffer used for access bits.
+
+    Arguments:
+        device[IN]           - Device handle associated with the gpu
+        pAccessBitsInfo[IN]  - Information containing the access bits buffer handle to be freed
+
+    Error codes:
+      NV_ERR_INVALID_ARGUMENT   - If the parameter/s is invalid.
+      NV_ERR_GENERIC            - Unexpected error. We try hard to
+                                  avoid returning this error code, 
+                                  because it is not very informative.
+*/
+NV_STATUS nvUvmInterfaceAccessBitsBufFree(uvmGpuDeviceHandle device,
+                                          UvmGpuAccessBitsBufferAlloc* pAccessBitsInfo);
+
+/*******************************************************************************
+    nvUvmInterfaceAccessBitsDump
+
+    This function get the access bits information in accordance with the mode
+    requested and stores it in the buffer provided by the client.
+
+    Arguments:
+        device[IN]               - Device handle associated with the gpu
+        pAccessBitsInfo[IN/OUT]  - Information containing the access bits buffer 
+                                   handle to be used for dumping the access bits
+                                   and the buffer where the dumped data will be stored
+        mode[IN]                 - Mode in which the access bits are dumped
+
+    Error codes:
+      NV_ERR_INVALID_ARGUMENT   - If the parameter/s is invalid.
+      NV_ERR_GENERIC            - Unexpected error. We try hard to
+                                  avoid returning this error code, 
+                                  because it is not very informative.
+*/
+NV_STATUS nvUvmInterfaceAccessBitsDump(uvmGpuDeviceHandle device,
+                                       UvmGpuAccessBitsBufferAlloc* pAccessBitsInfo,
+                                       UVM_ACCESS_BITS_DUMP_MODE mode);

 /*******************************************************************************
    nvUvmInterfaceInitAccessCntrInfo
@@ -1086,20 +1145,20 @@ NV_STATUS nvUvmInterfaceDisableAccessCntr(uvmGpuDeviceHandle device,
                                          UvmGpuAccessCntrInfo *pAccessCntrInfo);

 //
-// Called by the UVM driver to register operations with RM. Only one set of
+// Called by the UVM driver to register event callbacks with RM. Only one set of
 // callbacks can be registered by any driver at a time. If another set of
 // callbacks was already registered, NV_ERR_IN_USE is returned.
 //
-NV_STATUS nvUvmInterfaceRegisterUvmCallbacks(struct UvmOpsUvmEvents *importedUvmOps);
+NV_STATUS nvUvmInterfaceRegisterUvmEvents(struct UvmEventsLinux *importedEvents);

 //
-// Counterpart to nvUvmInterfaceRegisterUvmCallbacks. This must only be called
-// if nvUvmInterfaceRegisterUvmCallbacks returned NV_OK.
+// Counterpart to nvUvmInterfaceRegisterUvmEvents. This must only be called if
+// nvUvmInterfaceRegisterUvmEvents returned NV_OK.
 //
 // Upon return, the caller is guaranteed that any outstanding callbacks are done
 // and no new ones will be invoked.
 //
-void nvUvmInterfaceDeRegisterUvmOps(void);
+void nvUvmInterfaceDeRegisterUvmEvents(void);

 /*******************************************************************************
    nvUvmInterfaceGetNvlinkInfo
--- a/kernel-open/common/inc/nv_uvm_types.h
+++ b/kernel-open/common/inc/nv_uvm_types.h
@@ -221,9 +221,11 @@ typedef struct UvmGpuChannelInstanceInfo_tag
    // Ampere+ GPUs
    volatile NvU32 *pChramChannelRegister;

-    // Out: Address of the Runlist PRI Base Register required to ring the
-    // doorbell after clearing the faulted bit.
-    volatile NvU32 *pRunlistPRIBaseRegister;
+    // Out: Address of the doorbell.
+    volatile NvU32 *workSubmissionOffset;
+
+    // Out: channel handle required to ring the doorbell.
+    NvU32 workSubmissionToken;

    // Out: SMC engine id to which the GR channel is bound, or zero if the GPU
    // does not support SMC or it is a CE channel
@@ -365,6 +367,9 @@ typedef struct
    // True if the CE supports encryption
    NvBool secure:1;

+    // True if the CE can be used for fast scrub
+    NvBool scrub:1;
+
    // Mask of physical CEs assigned to this LCE
    //
    // The value returned by RM for this field may change when a GPU is
@@ -383,6 +388,7 @@ typedef enum
 {
    UVM_LINK_TYPE_NONE,
    UVM_LINK_TYPE_PCIE,
+    UVM_LINK_TYPE_PCIE_BAR1,
    UVM_LINK_TYPE_NVLINK_1,
    UVM_LINK_TYPE_NVLINK_2,
    UVM_LINK_TYPE_NVLINK_3,
@@ -539,6 +545,12 @@ typedef struct UvmGpuP2PCapsParams_tag
    // Size is 0 if bar1 p2p is not supported.
    NvU64 bar1DmaAddress[2];
    NvU64 bar1DmaSize[2];
+
+    // True if GPU i can use PCIe atomics on locations in GPU[i-1]
+    // BAR1. This implies that GPU[i] can issue PCIe atomics,
+    // GPU[i-1] can accept PCIe atomics, and the bus interconnect
+    // between the two GPUs can correctly route PCIe atomics.
+    NvBool bar1PcieAtomics[2];
 } UvmGpuP2PCapsParams;

 // Platform-wide information
@@ -830,11 +842,7 @@ typedef NV_STATUS (*uvmEventServiceInterrupt_t) (void *pDeviceObject,
        NV_OK if the UVM driver handled the interrupt
        NV_ERR_NO_INTR_PENDING if the interrupt is not for the UVM driver
 */
-#if defined (__linux__)
 typedef NV_STATUS (*uvmEventIsrTopHalf_t) (const NvProcessorUuid *pGpuUuidStruct);
-#else
-typedef void (*uvmEventIsrTopHalf_t) (void);
-#endif

 /*******************************************************************************
    uvmEventDrainP2P
@@ -871,20 +879,24 @@ typedef NV_STATUS (*uvmEventDrainP2P_t) (const NvProcessorUuid *pGpuUuidStruct);
 */
 typedef NV_STATUS (*uvmEventResumeP2P_t) (const NvProcessorUuid *pGpuUuidStruct);

-struct UvmOpsUvmEvents
+struct UvmEventsLinux
+{
+    uvmEventIsrTopHalf_t isrTopHalf;
+    uvmEventSuspend_t suspend;
+    uvmEventResume_t resume;
+    uvmEventDrainP2P_t drainP2P;
+    uvmEventResumeP2P_t resumeP2P;
+};
+
+struct UvmEventsWindows
 {
-    uvmEventSuspend_t     suspend;
-    uvmEventResume_t      resume;
    uvmEventStartDevice_t startDevice;
-    uvmEventStopDevice_t  stopDevice;
-    uvmEventIsrTopHalf_t  isrTopHalf;
+    uvmEventStopDevice_t stopDevice;
 #if defined (_WIN32)
    uvmEventWddmResetDuringTimeout_t wddmResetDuringTimeout;
    uvmEventWddmRestartAfterTimeout_t wddmRestartAfterTimeout;
    uvmEventServiceInterrupt_t serviceInterrupt;
 #endif
-    uvmEventDrainP2P_t drainP2P;
-    uvmEventResumeP2P_t resumeP2P;
 };

 #define UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES 32
@@ -1043,6 +1055,22 @@ typedef struct UvmGpuAccessCntrConfig_tag
    NvU32 threshold;
 } UvmGpuAccessCntrConfig;

+typedef enum
+{
+    UVM_ACCESS_BITS_DUMP_MODE_AGGREGATE = 0,
+    UVM_ACCESS_BITS_DUMP_MODE_DIFF = 1,
+    UVM_ACCESS_BITS_DUMP_MODE_CURRENT = 2,
+} UVM_ACCESS_BITS_DUMP_MODE;
+
+typedef struct UvmGpuAccessBitsBufferAlloc_tag
+{
+    NvHandle accessBitsBufferHandle;
+    NvBool bDirtyBits;
+    NvU32 granularity;
+    NV_DECLARE_ALIGNED(NvU64 enabledMask[64], 8);
+    NV_DECLARE_ALIGNED(NvU64 currentBits[64], 8);
+} UvmGpuAccessBitsBufferAlloc;
+
 //
 // When modifying this enum, make sure they are compatible with the mirrored
 // MEMORY_PROTECTION enum in phys_mem_allocator.h.
@@ -1080,6 +1108,7 @@ typedef UvmGpuPagingChannel *gpuPagingChannelHandle;
 typedef UvmGpuPagingChannelInfo gpuPagingChannelInfo;
 typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams;
 typedef UvmPmaAllocationOptions gpuPmaAllocationOptions;
+typedef UvmGpuAccessBitsBufferAlloc gpuAccessBitsBufferAlloc;

 typedef struct UvmCslIv
 {
--- a/kernel-open/common/inc/nv_uvm_user_types.h
+++ b/kernel-open/common/inc/nv_uvm_user_types.h
@@ -142,6 +142,8 @@ typedef enum {
    UvmGpuCompressionTypeCount = 2
 } UvmGpuCompressionType;

+#define UVM_PMA_MAX_LOCALIZED_REGION_COUNT    2
+
 //
 // Mirrored in PMA (PMA_STATS)
 //
@@ -153,6 +155,9 @@ typedef struct UvmPmaStatistics_tag
    volatile NvU64 numPages2mProtected;       // PMA-wide 2MB pages count in protected memory
    volatile NvU64 numFreePages64kProtected;  // PMA-wide free 64KB page count in protected memory
    volatile NvU64 numFreePages2mProtected;   // PMA-wide free 2MB pages count in protected memory
+    volatile NvU64 numPages2mLocalizable[UVM_PMA_MAX_LOCALIZED_REGION_COUNT];  // Localizable free 64KB per-uGPU frame count
+    volatile NvU64 numFreePages64kLocalizable[UVM_PMA_MAX_LOCALIZED_REGION_COUNT];  // Localizable free 64KB per-uGPU frame count
+    volatile NvU64 numFreePages2mLocalizable[UVM_PMA_MAX_LOCALIZED_REGION_COUNT];  // Localizable free 64KB per-uGPU frame count
 } UvmPmaStatistics;

 typedef enum
--- a/kernel-open/common/inc/nvkms-kapi.h
+++ b/kernel-open/common/inc/nvkms-kapi.h
@@ -174,10 +174,7 @@ struct NvKmsKapiDeviceResourcesInfo {

        NvBool  supportsSyncpts;

-        NvBool requiresVrrSemaphores;
-
-        NvBool  supportsInputColorRange;
-        NvBool  supportsInputColorSpace;
+        NvBool contiguousPhysicalMappings;
    } caps;

    NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
@@ -451,7 +448,6 @@ struct NvKmsKapiHeadReplyConfig {
 struct NvKmsKapiModeSetReplyConfig {
    enum NvKmsFlipResult flipResult;
    NvBool vrrFlip;
-    NvS32 vrrSemaphoreIndex;
    struct NvKmsKapiHeadReplyConfig
        headReplyConfig[NVKMS_KAPI_MAX_HEADS];
 };
@@ -1550,22 +1546,6 @@ struct NvKmsKapiFunctionsTable {
        NvU32 semaphoreIndex
    );

-    /*!
-     * Signal the VRR semaphore at the specified index from the CPU.
-     * If device does not support VRR semaphores, this is a no-op.
-     * Returns true if signal is success or no-op, otherwise returns false.
-     *
-     * \param [in]  device  A device allocated using allocateDevice().
-     *
-     * \param [in]  index   The VRR semaphore index to be signalled.
-     */
-    NvBool
-    (*signalVrrSemaphore)
-    (
-        struct NvKmsKapiDevice *device,
-        NvS32 index
-    );
-
    /*!
     * Check or wait on a head's LUT notifier.
     *
--- a/kernel-open/common/inc/os-interface.h
+++ b/kernel-open/common/inc/os-interface.h
@@ -213,6 +213,7 @@ int         NV_API_CALL  os_nv_cap_validate_and_dup_fd    (const nv_cap_t *, int
 void        NV_API_CALL  os_nv_cap_close_fd               (int);
 NvS32       NV_API_CALL  os_imex_channel_get              (NvU64);
 NvS32       NV_API_CALL  os_imex_channel_count            (void);
+NV_STATUS   NV_API_CALL  os_tegra_igpu_perf_boost         (void *, NvBool, NvU32);

 NV_STATUS   NV_API_CALL  os_get_tegra_platform       (NvU32 *);
 enum os_pci_req_atomics_type {
--- a/kernel-open/common/inc/rm-gpu-ops.h
+++ b/kernel-open/common/inc/rm-gpu-ops.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -83,6 +83,9 @@ NV_STATUS  NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvg
 NV_STATUS  NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
 NV_STATUS  NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, const nvgpuAccessCntrConfig_t *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_disable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
+NV_STATUS  NV_API_CALL rm_gpu_ops_access_bits_buffer_alloc(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessBitBufferAlloc_t);
+NV_STATUS  NV_API_CALL rm_gpu_ops_access_bits_buffer_free(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessBitBufferAlloc_t);
+NV_STATUS  NV_API_CALL rm_gpu_ops_access_bits_dump(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessBitBufferAlloc_t, UVM_ACCESS_BITS_DUMP_MODE);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32, NvU64 *);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_unset_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_get_nvlink_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuNvlinkInfo_t);
--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
@@ -669,50 +669,6 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_IOREMAP_WC_PRESENT" "" "functions"
        ;;

-        ioremap_driver_hardened)
-            #
-            # Determine if the ioremap_driver_hardened() function is present.
-            # It does not exist on all architectures.
-            # TODO: Update the commit ID once the API is upstreamed.
-            #
-            CODE="
-            #include <asm/io.h>
-            void conftest_ioremap_driver_hardened(void) {
-                ioremap_driver_hardened();
-            }"
-
-            compile_check_conftest "$CODE" "NV_IOREMAP_DRIVER_HARDENED_PRESENT" "" "functions"
-        ;;
-
-        ioremap_driver_hardened_wc)
-            #
-            # Determine if the ioremap_driver_hardened_wc() function is present.
-            # It does not exist on all architectures.
-            # TODO: Update the commit ID once the API is upstreamed.
-            #
-            CODE="
-            #include <asm/io.h>
-            void conftest_ioremap_driver_hardened_wc(void) {
-                ioremap_driver_hardened_wc();
-            }"
-
-            compile_check_conftest "$CODE" "NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT" "" "functions"
-        ;;
-
-        ioremap_cache_shared)
-            #
-            # Determine if the ioremap_cache_shared() function is present.
-            # It does not exist on all architectures.
-            # TODO: Update the commit ID once the API is upstreamed.
-            #
-            CODE="
-            #include <asm/io.h>
-            void conftest_ioremap_cache_shared(void) {
-                ioremap_cache_shared();
-            }"
-
-            compile_check_conftest "$CODE" "NV_IOREMAP_CACHE_SHARED_PRESENT" "" "functions"
-        ;;
        dom0_kernel_present)
            # Add config parameter if running on DOM0.
            if [ -n "$VGX_BUILD" ]; then
@@ -1330,33 +1286,54 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_EVENTFD_SIGNAL_HAS_COUNTER_ARG" "" "types"
        ;;

-        drm_available)
-            # Determine if the DRM subsystem is usable
+        get_dev_pagemap_has_pgmap_arg)
+            #
+            # Determine if the get_dev_pagemap() function has an additional
+            # 'pgmap' argument.
+            #
+            # This argument was removed by commit dd57f5feb19a
+            # (mm/memremap: remove unused get_dev_pagemap() parameter)
+            # in linux-next, expected in v6.18.
+            #
            CODE="
-            #if defined(NV_DRM_DRMP_H_PRESENT)
-            #include <drm/drmP.h>
-            #endif
+            #include <linux/memremap.h>

-            #include <drm/drm_drv.h>
+            struct dev_pagemap *get_dev_pagemap_has_pgmap_arg(void) {
+                struct dev_pagemap *pgmap;

-            #if !defined(CONFIG_DRM) && !defined(CONFIG_DRM_MODULE) && !defined(__FreeBSD__)
-            #error DRM not enabled
-            #endif
-
-            void conftest_drm_available(void) {
-                struct drm_driver drv;
-
-                /* 2013-10-02 1bb72532ac260a2d3982b40bdd4c936d779d0d16 */
-                (void)drm_dev_alloc;
-
-                /* 2013-10-02 c22f0ace1926da399d9a16dfaf09174c1b03594c */
-                (void)drm_dev_register;
-
-                /* 2013-10-02 c3a49737ef7db0bdd4fcf6cf0b7140a883e32b2a */
-                (void)drm_dev_unregister;
+                get_dev_pagemap(0, pgmap);
            }"

-            compile_check_conftest "$CODE" "NV_DRM_AVAILABLE" "" "generic"
+            compile_check_conftest "$CODE" "NV_GET_DEV_PAGEMAP_HAS_PGMAP_ARG" "" "types"
+        ;;
+
+        drm_sysfs_connector_property_event)
+            #
+            # Determine if drm_sysfs_connector_property_event() is present.
+            #
+            # Commit 0cf8d292ba5e ("drm/sysfs: rename drm_sysfs_connector_status_event()")
+            # renamed drm_sysfs_connector_status_event() to
+            # drm_sysfs_connector_property_event() in Linux v6.5.
+            #
+            CODE="
+            #include <drm/drm_sysfs.h>
+            void conftest_drm_sysfs_connector_property_event(void) {
+                drm_sysfs_connector_property_event();
+            }"
+            compile_check_conftest "$CODE" "NV_DRM_SYSFS_CONNECTOR_PROPERTY_EVENT_PRESENT" "" "functions"
+        ;;
+
+        drm_sysfs_connector_status_event)
+            #
+            # Determine if drm_sysfs_connector_status_event() is present.
+            #
+            #
+            CODE="
+            #include <drm/drm_sysfs.h>
+            void conftest_drm_sysfs_connector_status_event(void) {
+                drm_sysfs_connector_status_event();
+            }"
+            compile_check_conftest "$CODE" "NV_DRM_SYSFS_CONNECTOR_STATUS_EVENT_PRESENT" "" "functions"
        ;;

        pde_data)
@@ -1437,71 +1414,6 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_VMF_INSERT_PFN_PROT_PRESENT" "" "functions"
        ;;

-        drm_atomic_available)
-            #
-            # Determine if the DRM atomic modesetting subsystem is usable
-            #
-            # Added by commit 036ef5733ba4
-            # ("drm/atomic: Allow drivers to subclass drm_atomic_state, v3") in
-            # v4.2 (2018-05-18).
-            #
-            # Make conftest more robust by adding test for
-            # drm_atomic_set_mode_prop_for_crtc(), this function added by
-            # commit 955f3c334f0f ("drm/atomic: Add MODE_ID property") in v4.2
-            # (2015-05-25). If the DRM atomic modesetting subsystem is
-            # back ported to Linux kernel older than v4.2, then commit
-            # 955f3c334f0f must be back ported in order to get NVIDIA-DRM KMS
-            # support.
-            # Commit 72fdb40c1a4b ("drm: extract drm_atomic_uapi.c") in v4.20
-            # (2018-09-05), moved drm_atomic_set_mode_prop_for_crtc() function
-            # prototype from drm/drm_atomic.h to drm/drm_atomic_uapi.h.
-            #
-            echo "$CONFTEST_PREAMBLE
-            #if defined(NV_DRM_DRMP_H_PRESENT)
-            #include <drm/drmP.h>
-            #endif
-            #include <drm/drm_atomic.h>
-            #if !defined(CONFIG_DRM) && !defined(CONFIG_DRM_MODULE) && !defined(__FreeBSD__)
-            #error DRM not enabled
-            #endif
-            void conftest_drm_atomic_modeset_available(void) {
-                size_t a;
-
-                a = offsetof(struct drm_mode_config_funcs, atomic_state_alloc);
-            }" > conftest$$.c;
-
-            $CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
-            rm -f conftest$$.c
-
-            if [ -f conftest$$.o ]; then
-                rm -f conftest$$.o
-
-                echo "$CONFTEST_PREAMBLE
-                #if defined(NV_DRM_DRMP_H_PRESENT)
-                #include <drm/drmP.h>
-                #endif
-                #include <drm/drm_atomic.h>
-                #if defined(NV_DRM_DRM_ATOMIC_UAPI_H_PRESENT)
-                #include <drm/drm_atomic_uapi.h>
-                #endif
-                void conftest_drm_atomic_set_mode_prop_for_crtc(void) {
-                    drm_atomic_set_mode_prop_for_crtc();
-                }" > conftest$$.c;
-
-                $CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
-                rm -f conftest$$.c
-
-                if [ -f conftest$$.o ]; then
-                    rm -f conftest$$.o
-                    echo "#undef NV_DRM_ATOMIC_MODESET_AVAILABLE" | append_conftest "generic"
-                else
-                    echo "#define NV_DRM_ATOMIC_MODESET_AVAILABLE" | append_conftest "generic"
-                fi
-            else
-                echo "#undef NV_DRM_ATOMIC_MODESET_AVAILABLE" | append_conftest "generic"
-            fi
-        ;;
-
        drm_driver_has_legacy_dev_list)
            #
            # Determine if the 'drm_driver' structure has a 'legacy_dev_list' field.
@@ -2202,6 +2114,35 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_GET_BACKLIGHT_DEVICE_BY_NAME_PRESENT" "" "functions"
        ;;

+        dma_map_ops_has_map_phys)
+            #
+            # Determine if .map_phys exists in struct dma_map_ops.
+            #
+            # Commit 14cb413af00c ("dma-mapping: remove unused mapping resource callbacks")
+            # removed .map_resource operation and replaced it with .map_phys.
+            #
+            echo "$CONFTEST_PREAMBLE
+            #include <linux/dma-map-ops.h>
+            int conftest_dma_map_ops_has_map_phys(void) {
+                return offsetof(struct dma_map_ops, map_phys);
+            }
+            int conftest_dma_map_ops_has_unmap_phys(void) {
+                return offsetof(struct dma_map_ops, unmap_phys);
+            }" > conftest$$.c
+
+            $CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
+            rm -f conftest$$.c
+
+            if [ -f conftest$$.o ]; then
+                echo "#define NV_DMA_MAP_OPS_HAS_MAP_PHYS" | append_conftest "types"
+                rm -f conftest$$.o
+                return
+            else
+                echo "#undef NV_DMA_MAP_OPS_HAS_MAP_PHYS" | append_conftest "types"
+                return
+            fi
+        ;;
+
        dma_buf_ops_has_map)
            #
            # Determine if .map exists in dma_buf_ops.
@@ -2330,6 +2271,7 @@ compile_test() {
            # drm_helper_mode_fill_fb_struct()") in linux-next
            # (2025-07-16)
            CODE="
+            #include <linux/stddef.h>
            #include <drm/drm_modeset_helper.h>

            void conftest_drm_fill_fb_struct_takes_format_info(void) {
@@ -2444,6 +2386,23 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_PM_RUNTIME_AVAILABLE" "" "generic"
        ;;

+        pm_domain_available)
+            #
+            # Determine whether dev_pm_genpd_suspend() exists.
+            #
+            # This was added to the kernel in commit fc51989062138
+            # ("PM: domains: Rename pm_genpd_syscore_poweroff|poweron()")
+            # in v5.11-rc1 (2020-11-10),
+            #
+            CODE="
+            #include <linux/pm_domain.h>
+            void pm_domain_conftest(void) {
+                dev_pm_genpd_suspend();
+            }"
+
+            compile_check_conftest "$CODE" "NV_PM_DOMAIN_AVAILABLE" "" "functions"
+        ;;
+
        dma_direct_map_resource)
            #
            # Determine whether dma_is_direct() exists.
@@ -2617,31 +2576,6 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS" "" "types"
        ;;

-        drm_format_num_planes)
-            #
-            # Determine if drm_format_num_planes() function is present.
-            #
-            # The drm_format_num_planes() function was added by commit
-            # d0d110e09629 drm: Add drm_format_num_planes() utility function in
-            # v3.3 (2011-12-20). Prototype was moved from drm_crtc.h to
-            # drm_fourcc.h by commit ae4df11a0f53 (drm: Move format-related
-            # helpers to drm_fourcc.c) in v4.8 (2016-06-09).
-            # drm_format_num_planes() has been removed by commit 05c452c115bf
-            # (drm: Remove users of drm_format_num_planes) removed v5.3
-            # (2019-05-16).
-            #
-            CODE="
-            #include <drm/drm_crtc.h>
-            #include <drm/drm_fourcc.h>
-
-            void conftest_drm_format_num_planes(void) {
-                drm_format_num_planes();
-            }
-            "
-
-            compile_check_conftest "$CODE" "NV_DRM_FORMAT_NUM_PLANES_PRESENT" "" "functions"
-        ;;
-
        drm_gem_object_has_resv)
            #
            # Determine if the 'drm_gem_object' structure has a 'resv' field.
@@ -3712,6 +3646,90 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_DEVM_CLK_BULK_GET_ALL_PRESENT" "" "functions"
        ;;

+        thermal_zone_for_each_trip)
+            #
+            # Determine if thermal_zone_for_each_trip() function is present
+            #
+            # Added by commit a56cc0a83385 ("thermal: core: Add function to
+            # walk trips under zone lock") in v6.6-rc3
+            #
+            CODE="
+            #include <linux/thermal.h>
+            void conftest_thermal_zone_for_each_trip(void)
+            {
+                thermal_zone_for_each_trip();
+            }
+            "
+            compile_check_conftest "$CODE" "NV_THERMAL_ZONE_FOR_EACH_TRIP_PRESENT" "" "functions"
+        ;;
+
+        thermal_bind_cdev_to_trip)
+            #
+            # Determine if thermal_bind_cdev_to_trip() function is present
+            #
+            # Added by commit d069ed6b752f ("thermal: core: Allow trip
+            # pointers to be used for cooling device binding") in v6.6-rc3
+            #
+            CODE="
+            #include <linux/thermal.h>
+            void conftest_thermal_bind_cdev_to_trip(void)
+            {
+                thermal_bind_cdev_to_trip();
+            }
+            "
+            compile_check_conftest "$CODE" "NV_THERMAL_BIND_CDEV_TO_TRIP_PRESENT" "" "functions"
+        ;;
+
+        thermal_unbind_cdev_from_trip)
+            #
+            # Determine if thermal_unbind_cdev_from_trip() function is present
+            #
+            # Added by commit d069ed6b752f ("thermal: core: Allow trip
+            # pointers to be used for cooling device binding") in v6.6-rc3
+            #
+            CODE="
+            #include <linux/thermal.h>
+            void conftest_thermal_unbind_cdev_from_trip(void)
+            {
+                thermal_unbind_cdev_from_trip();
+            }
+            "
+            compile_check_conftest "$CODE" "NV_THERMAL_UNBIND_CDEV_FROM_TRIP_PRESENT" "" "functions"
+        ;;
+
+        update_devfreq)
+            #
+            # Determine if update_devfreq() function is present
+            #
+            # Added by commit b596d895fa29 ("PM / devfreq: Make update_devfreq()
+            # public") in v4.20
+            #
+            CODE="
+            #include <linux/devfreq.h>
+            void conftest_update_devfreq(void)
+            {
+                update_devfreq();
+            }
+            "
+            compile_check_conftest "$CODE" "NV_UPDATE_DEVFREQ_PRESENT" "" "functions"
+        ;;
+
+        devfreq_dev_profile_has_is_cooling_device)
+            #
+            # Determine if the 'devfreq_dev_profile' structure has 'is_cooling_device'
+            #
+            # Added by commit 1224451bb6f93 ("PM / devfreq: Register devfreq as a cooling device
+            # on demand") in v5.12-rc1
+            #
+            CODE="
+            #include <linux/devfreq.h>
+            int conftest_devfreq_dev_profile_has_is_cooling_device(void) {
+                return offsetof(struct devfreq_dev_profile, is_cooling_device);
+            }
+            "
+            compile_check_conftest "$CODE" "NV_DEVFREQ_DEV_PROFILE_HAS_IS_COOLING_DEVICE" "" "types"
+        ;;
+
        devfreq_has_freq_table)
            #
            # Determine if the 'devfreq' structure has 'freq_table'
@@ -3729,6 +3747,38 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_DEVFREQ_HAS_FREQ_TABLE" "" "types"
        ;;

+        devfreq_has_suspend_freq)
+            #
+            # Determine if the 'devfreq' structure has 'suspend_freq'
+            #
+            # Commit 83f8ca45afbf ("PM / devfreq: add support for
+            # suspend/resume of a devfreq device") updated the devfreq
+            # and add the suspend_freq field in v5.0.
+            #
+            CODE="
+            #include <linux/devfreq.h>
+            int conftest_devfreq_has_suspend_freq(void) {
+                return offsetof(struct devfreq, suspend_freq);
+            }
+            "
+            compile_check_conftest "$CODE" "NV_DEVFREQ_HAS_SUSPEND_FREQ" "" "types"
+        ;;
+
+        bpmp_mrq_has_strap_set)
+            #
+            # Determine if STRAP_SET is present in the bpmp MRQ ABI.
+            #
+            # STRAP_SET was added by commit 4bef358c9071 ("soc/tegra:
+            #bpmp: Update ABI header") in v5.0.
+            #
+            CODE="
+            #include <stdint.h>
+            #include <soc/tegra/bpmp-abi.h>
+            int bpmp_mrq_has_strap = STRAP_SET;
+            "
+            compile_check_conftest "$CODE" "NV_BPMP_MRQ_HAS_STRAP_SET" "" "types"
+        ;;
+
        dma_resv_add_fence)
            #
            # Determine if the dma_resv_add_fence() function is present.
@@ -3917,6 +3967,27 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_PCI_REBAR_GET_POSSIBLE_SIZES_PRESENT" "" "functions"
        ;;

+        pci_resize_resource_has_exclude_bars_arg)
+            #
+            # Determine if pci_resize_resource() has exclude_bars argument.
+            #
+            # exclude_bars argument was added to pci_resize_resource by commit
+            # 337b1b566db0 (11/14/2025) ("PCI: Fix restoring BARs on BAR resize rollback path")
+            # in linux-next.
+            #
+            CODE="
+            #include <linux/pci.h>
+
+            typeof(pci_resize_resource) conftest_pci_resize_resource_has_exclude_bars_arg;
+            int __must_check conftest_pci_resize_resource_has_exclude_bars_arg(struct pci_dev *dev,
+                                                                               int i, int size,
+                                                                               int exclude_bars) {
+                return 0;
+            }"
+
+            compile_check_conftest "$CODE" "NV_PCI_RESIZE_RESOURCE_HAS_EXCLUDE_BARS_ARG" "" "types"
+        ;;
+
        drm_connector_has_override_edid)
            #
            # Determine if 'struct drm_connector' has an 'override_edid' member.
@@ -3955,22 +4026,39 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_IOMMU_SVA_BIND_DEVICE_HAS_DRVDATA_ARG" "" "types"
        ;;

-        vm_area_struct_has_const_vm_flags)
+        vm_flags_set)
            #
-            # Determine if the 'vm_area_struct' structure has
-            # const 'vm_flags'.
+            # Determine if the vm_flags_set() function is present. The
+            # presence of this function indicates that the vm_flags_clear()
+            # function is also present.
            #
-            # A union of '__vm_flags' and 'const vm_flags' was added by
+            # The functions vm_flags_set()/ vm_flags_clear() were added by
            # commit bc292ab00f6c ("mm: introduce vma->vm_flags wrapper
-            # functions") in v6.3.
+            # functions") in v6.3-rc1 (2023-02-09).
            #
            CODE="
-            #include <linux/mm_types.h>
-            int conftest_vm_area_struct_has_const_vm_flags(void) {
-                return offsetof(struct vm_area_struct, __vm_flags);
+            #include <linux/mm.h>
+            void conftest_vm_flags_set(void) {
+                vm_flags_set();
            }"

-            compile_check_conftest "$CODE" "NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS" "" "types"
+            compile_check_conftest "$CODE" "NV_VM_FLAGS_SET_PRESENT" "" "functions"
+        ;;
+
+        vma_flags_set_word)
+            #
+            # Determine if the vma_flags_set_word() function is present.
+            #
+            # Added by commit c3f7c506e8f1 ("mm: introduce VMA flags bitmap type")
+            # in v6.19-rc1.
+            #
+            CODE="
+            #include <linux/mm.h>
+            void conftest_vma_flags_set_word(void) {
+                vma_flags_set_word();
+            }"
+
+            compile_check_conftest "$CODE" "NV_VMA_FLAGS_SET_WORD_PRESENT" "" "functions"
        ;;

        drm_driver_has_dumb_destroy)
@@ -4856,6 +4944,46 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG" "" "types"
        ;;

+        register_shrinker_has_format_arg)
+            # TODO:desc
+            # Determine if the 'mode' pointer argument is const in
+            # drm_connector_helper_funcs::mode_valid.
+            #
+            # The 'mode' pointer argument in
+            # drm_connector_helper_funcs::mode_valid was made const by commit
+            # 26d6fd81916e ("drm/connector: make mode_valid take a const struct
+            # drm_display_mode") in linux-next, expected in v6.15.
+            #
+            CODE="
+            #include <linux/mm.h>
+
+            void conftest_register_shrinker_has_format_arg(void) {
+                register_shrinker(NULL, \"%d\", 0);
+            }"
+
+            compile_check_conftest "$CODE" "NV_REGISTER_SHRINKER_HAS_FMT_ARG" "" "types"
+        ;;
+
+        shrinker_alloc)
+            # TODO:desc
+            # Determine if the 'mode' pointer argument is const in
+            # drm_connector_helper_funcs::mode_valid.
+            #
+            # The 'mode' pointer argument in
+            # drm_connector_helper_funcs::mode_valid was made const by commit
+            # 26d6fd81916e ("drm/connector: make mode_valid take a const struct
+            # drm_display_mode") in linux-next, expected in v6.15.
+            #
+            CODE="
+            #include <linux/mm.h>
+
+            void conftest_shrinker_alloc(void) {
+                shrinker_alloc();
+            }"
+
+            compile_check_conftest "$CODE" "NV_SHRINKER_ALLOC_PRESENT" "" "functions"
+        ;;
+
        memory_device_coherent_present)
            #
            # Determine if MEMORY_DEVICE_COHERENT support is present or not
--- a/kernel-open/header-presence-tests.mk
+++ b/kernel-open/header-presence-tests.mk
@@ -2,6 +2,9 @@
 # corresponding #define will be generated in conftest/headers.h.
 NV_HEADER_PRESENCE_TESTS = \
  asm/system.h \
+  drm/drm_hdcp.h \
+  drm/display/drm_hdcp.h \
+  drm/display/drm_hdcp_helper.h \
  drm/drmP.h \
  drm/drm_aperture.h \
  drm/drm_atomic_state_helper.h \
--- a/kernel-open/nvidia-drm/nv-kthread-q.c
+++ b/kernel-open/nvidia-drm/nv-kthread-q.c
@@ -22,7 +22,6 @@
 */

 #include "nv-kthread-q.h"
-#include "nv-list-helpers.h"

 #include <linux/kthread.h>
 #include <linux/interrupt.h>
@@ -43,17 +42,6 @@
 // into the queue, and those functions will be run in the context of the
 // queue's kthread.

-#ifndef WARN
-    // Only *really* old kernels (2.6.9) end up here. Just use a simple printk
-    // to implement this, because such kernels won't be supported much longer.
-    #define WARN(condition, format...) ({                    \
-        int __ret_warn_on = !!(condition);                   \
-        if (unlikely(__ret_warn_on))                         \
-            printk(KERN_ERR format);                         \
-        unlikely(__ret_warn_on);                             \
-    })
-#endif
-
 #define NVQ_WARN(fmt, ...)                                   \
    do {                                                     \
        if (in_interrupt()) {                                \
--- a/kernel-open/nvidia-drm/nv_drm_common_ioctl.h
+++ b/kernel-open/nvidia-drm/nv_drm_common_ioctl.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2025, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -20,8 +20,8 @@
 * OTHER DEALINGS IN THE SOFTWARE.
 */

-#ifndef _UAPI_NVIDIA_DRM_IOCTL_H_
-#define _UAPI_NVIDIA_DRM_IOCTL_H_
+#ifndef _NV_DRM_COMMON_IOCTL_H_
+#define _NV_DRM_COMMON_IOCTL_H_

 #include <drm/drm.h>

@@ -396,4 +396,4 @@ struct drm_nvidia_get_drm_file_unique_id_params {
    uint64_t id;                    /* OUT Unique ID of the DRM file */
 };

-#endif /* _UAPI_NVIDIA_DRM_IOCTL_H_ */
+#endif /* _NV_DRM_COMMON_IOCTL_H_ */
--- a/kernel-open/nvidia-drm/nvidia-drm-crtc.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-crtc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -31,8 +31,8 @@
 #include "nvidia-drm-encoder.h"
 #include "nvidia-drm-utils.h"
 #include "nvidia-drm-fb.h"
-#include "nvidia-drm-ioctl.h"
 #include "nvidia-drm-format.h"
+#include "nv_drm_common_ioctl.h"

 #include "nvmisc.h"
 #include "nv_common_utils.h"
@@ -1286,15 +1286,10 @@ plane_req_config_update(struct drm_plane *plane,
    if ((nv_drm_plane_state->input_colorspace == NV_DRM_INPUT_COLOR_SPACE_NONE) &&
        nv_drm_format_is_yuv(plane_state->fb->format->format)) {

-        if (nv_plane->supportsColorProperties) {
-            req_config->config.inputColorSpace =
-                nv_drm_color_encoding_to_nvkms_colorspace(plane_state->color_encoding);
-            req_config->config.inputColorRange =
-                nv_drm_color_range_to_nvkms_color_range(plane_state->color_range);
-        } else {
-            req_config->config.inputColorSpace = NVKMS_INPUT_COLOR_SPACE_NONE;
-            req_config->config.inputColorRange = NVKMS_INPUT_COLOR_RANGE_DEFAULT;
-        }
+        req_config->config.inputColorSpace =
+            nv_drm_color_encoding_to_nvkms_colorspace(plane_state->color_encoding);
+        req_config->config.inputColorRange =
+            nv_drm_color_range_to_nvkms_color_range(plane_state->color_range);
        req_config->config.inputTf = NVKMS_INPUT_TF_LINEAR;
    } else {
 #endif
@@ -1559,7 +1554,7 @@ static int __nv_drm_cursor_atomic_check(struct drm_plane *plane,

    WARN_ON(nv_plane->layer_idx != NVKMS_KAPI_LAYER_INVALID_IDX);

-    nv_drm_for_each_crtc_in_state(plane_state->state, crtc, crtc_state, i) {
+    for_each_new_crtc_in_state(plane_state->state, crtc, crtc_state, i) {
        struct nv_drm_crtc_state *nv_crtc_state = to_nv_crtc_state(crtc_state);
        struct NvKmsKapiHeadRequestedConfig *head_req_config =
            &nv_crtc_state->req_config;
@@ -1605,7 +1600,7 @@ static int nv_drm_plane_atomic_check(struct drm_plane *plane,

    WARN_ON(nv_plane->layer_idx == NVKMS_KAPI_LAYER_INVALID_IDX);

-    nv_drm_for_each_crtc_in_state(plane_state->state, crtc, crtc_state, i) {
+    for_each_new_crtc_in_state(plane_state->state, crtc, crtc_state, i) {
        struct nv_drm_crtc_state *nv_crtc_state = to_nv_crtc_state(crtc_state);
        struct NvKmsKapiHeadRequestedConfig *head_req_config =
            &nv_crtc_state->req_config;
@@ -2435,7 +2430,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,

        req_config->flags.displaysChanged = NV_TRUE;

-        nv_drm_for_each_connector_in_state(crtc_state->state,
+        for_each_new_connector_in_state(crtc_state->state,
                                           connector, connector_state, j) {
            if (connector_state->crtc != crtc) {
                continue;
@@ -2844,26 +2839,16 @@ nv_drm_plane_create(struct drm_device *dev,
    }

 #if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
-    if (pResInfo->caps.supportsInputColorSpace &&
-        pResInfo->caps.supportsInputColorRange) {
-
-        nv_plane->supportsColorProperties = true;
-
-        drm_plane_create_color_properties(
-            plane,
-            NVBIT(DRM_COLOR_YCBCR_BT601) |
-            NVBIT(DRM_COLOR_YCBCR_BT709) |
-            NVBIT(DRM_COLOR_YCBCR_BT2020),
-            NVBIT(DRM_COLOR_YCBCR_FULL_RANGE) |
-            NVBIT(DRM_COLOR_YCBCR_LIMITED_RANGE),
-            DRM_COLOR_YCBCR_BT709,
-            DRM_COLOR_YCBCR_FULL_RANGE
-        );
-    } else {
-        nv_plane->supportsColorProperties = false;
-    }
-#else
-    nv_plane->supportsColorProperties = false;
+    drm_plane_create_color_properties(
+        plane,
+        NVBIT(DRM_COLOR_YCBCR_BT601) |
+        NVBIT(DRM_COLOR_YCBCR_BT709) |
+        NVBIT(DRM_COLOR_YCBCR_BT2020),
+        NVBIT(DRM_COLOR_YCBCR_FULL_RANGE) |
+        NVBIT(DRM_COLOR_YCBCR_LIMITED_RANGE),
+        DRM_COLOR_YCBCR_BT709,
+        DRM_COLOR_YCBCR_FULL_RANGE
+    );
 #endif

    drm_plane_helper_add(plane, &nv_plane_helper_funcs);
--- a/kernel-open/nvidia-drm/nvidia-drm-crtc.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-crtc.h
@@ -191,13 +191,6 @@ struct nv_drm_plane {
     */
    uint32_t layer_idx;

-    /**
-     * @supportsColorProperties
-     *
-     * If true, supports the COLOR_ENCODING and COLOR_RANGE properties.
-     */
-    bool supportsColorProperties;
-
    struct NvKmsLUTCaps ilut_caps;
    struct NvKmsLUTCaps tmo_caps;
 };
--- a/kernel-open/nvidia-drm/nvidia-drm-drv.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -40,7 +40,7 @@

 #if defined(NV_DRM_AVAILABLE)

-#include "nvidia-drm-ioctl.h"
+#include "nv_drm_common_ioctl.h"

 #if defined(NV_DRM_DRMP_H_PRESENT)
 #include <drm/drmP.h>
@@ -326,8 +326,8 @@ done:

 static int nv_drm_disp_cmp (const void *l, const void *r)
 {
-    struct nv_drm_mst_display_info *l_info = (struct nv_drm_mst_display_info *)l;
-    struct nv_drm_mst_display_info *r_info = (struct nv_drm_mst_display_info *)r;
+    const struct nv_drm_mst_display_info *l_info = (const struct nv_drm_mst_display_info *)l;
+    const struct nv_drm_mst_display_info *r_info = (const struct nv_drm_mst_display_info *)r;

    return strcmp(l_info->dpAddress, r_info->dpAddress);
 }
@@ -743,6 +743,8 @@ static int nv_drm_dev_load(struct drm_device *dev)

    nv_dev->hasVideoMemory = resInfo.caps.hasVideoMemory;

+    nv_dev->contiguousPhysicalMappings = resInfo.caps.contiguousPhysicalMappings;
+
    nv_dev->genericPageKind = resInfo.caps.genericPageKind;

    // Fermi-Volta use generation 0, Turing+ uses generation 2.
@@ -762,8 +764,6 @@ static int nv_drm_dev_load(struct drm_device *dev)
        resInfo.caps.numDisplaySemaphores;
    nv_dev->display_semaphores.next_index = 0;

-    nv_dev->requiresVrrSemaphores = resInfo.caps.requiresVrrSemaphores;
-
    nv_dev->vtFbBaseAddress = resInfo.vtFbBaseAddress;
    nv_dev->vtFbSize = resInfo.vtFbSize;

@@ -1717,6 +1717,11 @@ static long nv_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
    return retcode;
 }

+static int nv_drm_load_noop(struct drm_device *dev, unsigned long flags)
+{
+    return 0;
+}
+
 static const struct file_operations nv_drm_fops = {
    .owner          = THIS_MODULE,

@@ -1899,6 +1904,8 @@ static struct drm_driver nv_drm_driver = {
    .gem_prime_res_obj      = nv_drm_gem_prime_res_obj,
 #endif

+    .load                   = nv_drm_load_noop,
+
    .postclose              = nv_drm_postclose,
    .open                   = nv_drm_open,

--- a/kernel-open/nvidia-drm/nvidia-drm-fb.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-fb.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2015-2025, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -25,12 +25,12 @@
 #if defined(NV_DRM_AVAILABLE)

 #include "nvidia-drm-priv.h"
-#include "nvidia-drm-ioctl.h"
 #include "nvidia-drm-fb.h"
 #include "nvidia-drm-utils.h"
 #include "nvidia-drm-gem.h"
 #include "nvidia-drm-helper.h"
 #include "nvidia-drm-format.h"
+#include "nv_drm_common_ioctl.h"

 #include <drm/drm_crtc_helper.h>

--- a/kernel-open/nvidia-drm/nvidia-drm-fence.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-fence.c
@@ -29,10 +29,10 @@
 #endif

 #include "nvidia-drm-priv.h"
-#include "nvidia-drm-ioctl.h"
 #include "nvidia-drm-gem.h"
 #include "nvidia-drm-fence.h"
 #include "nvidia-dma-resv-helper.h"
+#include "nv_drm_common_ioctl.h"

 #include <linux/dma-fence.h>

--- a/kernel-open/nvidia-drm/nvidia-drm-format.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-format.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -35,15 +35,22 @@

 static const u32  nvkms_to_drm_format[] = {
    /* RGB formats */
-    [NvKmsSurfaceMemoryFormatA1R5G5B5]    = DRM_FORMAT_ARGB1555,
-    [NvKmsSurfaceMemoryFormatX1R5G5B5]    = DRM_FORMAT_XRGB1555,
-    [NvKmsSurfaceMemoryFormatR5G6B5]      = DRM_FORMAT_RGB565,
-    [NvKmsSurfaceMemoryFormatA8R8G8B8]    = DRM_FORMAT_ARGB8888,
-    [NvKmsSurfaceMemoryFormatX8R8G8B8]    = DRM_FORMAT_XRGB8888,
-    [NvKmsSurfaceMemoryFormatX8B8G8R8]    = DRM_FORMAT_XBGR8888,
-    [NvKmsSurfaceMemoryFormatA2B10G10R10] = DRM_FORMAT_ABGR2101010,
-    [NvKmsSurfaceMemoryFormatX2B10G10R10] = DRM_FORMAT_XBGR2101010,
-    [NvKmsSurfaceMemoryFormatA8B8G8R8]    = DRM_FORMAT_ABGR8888,
+    [NvKmsSurfaceMemoryFormatA1R5G5B5]     = DRM_FORMAT_ARGB1555,
+    [NvKmsSurfaceMemoryFormatX1R5G5B5]     = DRM_FORMAT_XRGB1555,
+    [NvKmsSurfaceMemoryFormatR5G6B5]       = DRM_FORMAT_RGB565,
+    [NvKmsSurfaceMemoryFormatA8R8G8B8]     = DRM_FORMAT_ARGB8888,
+    [NvKmsSurfaceMemoryFormatX8R8G8B8]     = DRM_FORMAT_XRGB8888,
+    [NvKmsSurfaceMemoryFormatX8B8G8R8]     = DRM_FORMAT_XBGR8888,
+    [NvKmsSurfaceMemoryFormatA2B10G10R10]  = DRM_FORMAT_ABGR2101010,
+    [NvKmsSurfaceMemoryFormatX2B10G10R10]  = DRM_FORMAT_XBGR2101010,
+    [NvKmsSurfaceMemoryFormatA8B8G8R8]     = DRM_FORMAT_ABGR8888,
+#if defined(DRM_FORMAT_ABGR16161616)
+    /*
+     * DRM_FORMAT_ABGR16161616 was introduced by Linux kernel commit
+     * ff92ecf575a92 (v5.14).
+     */
+    [NvKmsSurfaceMemoryFormatR16G16B16A16] = DRM_FORMAT_ABGR16161616,
+#endif
 #if defined(DRM_FORMAT_ABGR16161616F)
    [NvKmsSurfaceMemoryFormatRF16GF16BF16AF16] = DRM_FORMAT_ABGR16161616F,
 #endif
--- a/kernel-open/nvidia-drm/nvidia-drm-gem-dma-buf.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem-dma-buf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -33,7 +33,7 @@
 #include <drm/drm_drv.h>

 #include "nvidia-drm-gem-dma-buf.h"
-#include "nvidia-drm-ioctl.h"
+#include "nv_drm_common_ioctl.h"

 #include "linux/dma-buf.h"

--- a/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2017-2025, NVIDIA CORPORATION. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -26,7 +26,7 @@

 #include "nvidia-drm-gem-nvkms-memory.h"
 #include "nvidia-drm-helper.h"
-#include "nvidia-drm-ioctl.h"
+#include "nv_drm_common_ioctl.h"

 #include <drm/drm_drv.h>
 #include <drm/drm_prime.h>
@@ -161,6 +161,21 @@ static int __nv_drm_gem_nvkms_map(
        goto done;
    }

+    /*
+     * XXX Physical mapping currently broken in cases where we can't guarantee
+     * that the mapping is contiguous. Fail on platforms that don't have
+     * guaranteed contiguous physical mappings.
+     */
+    if (!nv_dev->contiguousPhysicalMappings) {
+        NV_DRM_DEV_LOG_INFO(
+            nv_dev,
+            "Mapping vidmem NvKmsKapiMemory 0x%p is currently "
+            "unsupported on coherent GPU memory configurations",
+            pMemory);
+        ret = -ENOMEM;
+        goto done;
+    }
+
    if (!nvKms->mapMemory(nv_dev->pDevice,
                          pMemory,
                          NVKMS_KAPI_MAPPING_TYPE_USER,
--- a/kernel-open/nvidia-drm/nvidia-drm-gem-user-memory.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem-user-memory.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2017-2025, NVIDIA CORPORATION. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -28,7 +28,7 @@

 #include "nvidia-drm-gem-user-memory.h"
 #include "nvidia-drm-helper.h"
-#include "nvidia-drm-ioctl.h"
+#include "nv_drm_common_ioctl.h"

 #include "linux/dma-buf.h"
 #include "linux/mm.h"
--- a/kernel-open/nvidia-drm/nvidia-drm-gem.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2015-2025, NVIDIA CORPORATION. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -25,7 +25,6 @@
 #if defined(NV_DRM_AVAILABLE)

 #include "nvidia-drm-priv.h"
-#include "nvidia-drm-ioctl.h"
 #include "nvidia-drm-fence.h"
 #include "nvidia-drm-gem.h"
 #include "nvidia-drm-gem-nvkms-memory.h"
@@ -34,6 +33,7 @@
 #include "nvidia-drm-helper.h"
 #include "nvidia-drm-gem-dma-buf.h"
 #include "nvidia-drm-gem-nvkms-memory.h"
+#include "nv_drm_common_ioctl.h"

 #include <drm/drm_drv.h>
 #include <drm/drm_prime.h>
--- a/kernel-open/nvidia-drm/nvidia-drm-helper.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-helper.c
@@ -54,7 +54,7 @@
 * drm_atomic_helper_disable_all() is copied from
 * linux/drivers/gpu/drm/drm_atomic_helper.c and modified to use
 * nv_drm_for_each_crtc instead of drm_for_each_crtc to loop over all crtcs,
- * use nv_drm_for_each_*_in_state instead of for_each_connector_in_state to loop
+ * use for_each_new_*_in_state instead of for_each_connector_in_state to loop
 * over all modeset object states, and use drm_atomic_state_free() if
 * drm_atomic_state_put() is not available.
 *
@@ -139,13 +139,13 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
        plane_state->rotation = DRM_MODE_ROTATE_0;
    }

-    nv_drm_for_each_connector_in_state(state, conn, conn_state, i) {
+    for_each_new_connector_in_state(state, conn, conn_state, i) {
        ret = drm_atomic_set_crtc_for_connector(conn_state, NULL);
        if (ret < 0)
            goto free;
    }

-    nv_drm_for_each_plane_in_state(state, plane, plane_state, i) {
+    for_each_new_plane_in_state(state, plane, plane_state, i) {
        ret = drm_atomic_set_crtc_for_plane(plane_state, NULL);
        if (ret < 0)
            goto free;
--- a/kernel-open/nvidia-drm/nvidia-drm-helper.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-helper.h
@@ -138,154 +138,6 @@ nv_drm_prime_pages_to_sg(struct drm_device *dev,
 int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
                                     struct drm_modeset_acquire_ctx *ctx);

-/*
- * for_each_connector_in_state(), for_each_crtc_in_state() and
- * for_each_plane_in_state() were added by kernel commit
- * df63b9994eaf942afcdb946d27a28661d7dfbf2a which was Signed-off-by:
- *      Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
- *      Daniel Vetter <daniel.vetter@ffwll.ch>
- *
- * for_each_connector_in_state(), for_each_crtc_in_state() and
- * for_each_plane_in_state() were copied from
- *      include/drm/drm_atomic.h @
- *      21a01abbe32a3cbeb903378a24e504bfd9fe0648
- * which has the following copyright and license information:
- *
- * Copyright (C) 2014 Red Hat
- * Copyright (C) 2014 Intel Corp.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Rob Clark <robdclark@gmail.com>
- * Daniel Vetter <daniel.vetter@ffwll.ch>
- */
-
-/**
- * nv_drm_for_each_connector_in_state - iterate over all connectors in an
- * atomic update
- * @__state: &struct drm_atomic_state pointer
- * @connector: &struct drm_connector iteration cursor
- * @connector_state: &struct drm_connector_state iteration cursor
- * @__i: int iteration cursor, for macro-internal use
- *
- * This iterates over all connectors in an atomic update. Note that before the
- * software state is committed (by calling drm_atomic_helper_swap_state(), this
- * points to the new state, while afterwards it points to the old state. Due to
- * this tricky confusion this macro is deprecated.
- */
-#if !defined(for_each_connector_in_state)
-#define nv_drm_for_each_connector_in_state(__state,                         \
-                                           connector, connector_state, __i) \
-       for ((__i) = 0;                                                      \
-            (__i) < (__state)->num_connector &&                             \
-            ((connector) = (__state)->connectors[__i].ptr,                  \
-            (connector_state) = (__state)->connectors[__i].state, 1);       \
-            (__i)++)                                                        \
-               for_each_if (connector)
-#else
-#define nv_drm_for_each_connector_in_state(__state,                         \
-                                           connector, connector_state, __i) \
-    for_each_connector_in_state(__state, connector, connector_state, __i)
-#endif
-
-
-/**
- * nv_drm_for_each_crtc_in_state - iterate over all CRTCs in an atomic update
- * @__state: &struct drm_atomic_state pointer
- * @crtc: &struct drm_crtc iteration cursor
- * @crtc_state: &struct drm_crtc_state iteration cursor
- * @__i: int iteration cursor, for macro-internal use
- *
- * This iterates over all CRTCs in an atomic update. Note that before the
- * software state is committed (by calling drm_atomic_helper_swap_state(), this
- * points to the new state, while afterwards it points to the old state. Due to
- * this tricky confusion this macro is deprecated.
- */
-#if !defined(for_each_crtc_in_state)
-#define nv_drm_for_each_crtc_in_state(__state, crtc, crtc_state, __i) \
-       for ((__i) = 0;                                                \
-            (__i) < (__state)->dev->mode_config.num_crtc &&           \
-            ((crtc) = (__state)->crtcs[__i].ptr,                      \
-            (crtc_state) = (__state)->crtcs[__i].state, 1);           \
-            (__i)++)                                                  \
-               for_each_if (crtc_state)
-#else
-#define nv_drm_for_each_crtc_in_state(__state, crtc, crtc_state, __i) \
-    for_each_crtc_in_state(__state, crtc, crtc_state, __i)
-#endif
-
-/**
- * nv_drm_for_each_plane_in_state - iterate over all planes in an atomic update
- * @__state: &struct drm_atomic_state pointer
- * @plane: &struct drm_plane iteration cursor
- * @plane_state: &struct drm_plane_state iteration cursor
- * @__i: int iteration cursor, for macro-internal use
- *
- * This iterates over all planes in an atomic update. Note that before the
- * software state is committed (by calling drm_atomic_helper_swap_state(), this
- * points to the new state, while afterwards it points to the old state. Due to
- * this tricky confusion this macro is deprecated.
- */
-#if !defined(for_each_plane_in_state)
-#define nv_drm_for_each_plane_in_state(__state, plane, plane_state, __i) \
-       for ((__i) = 0;                                                   \
-            (__i) < (__state)->dev->mode_config.num_total_plane &&       \
-            ((plane) = (__state)->planes[__i].ptr,                       \
-            (plane_state) = (__state)->planes[__i].state, 1);            \
-            (__i)++)                                                     \
-               for_each_if (plane_state)
-#else
-#define nv_drm_for_each_plane_in_state(__state, plane, plane_state, __i) \
-    for_each_plane_in_state(__state, plane, plane_state, __i)
-#endif
-
-/*
- * for_each_new_plane_in_state() was added by kernel commit
- * 581e49fe6b411f407102a7f2377648849e0fa37f which was Signed-off-by:
- *      Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
- *      Daniel Vetter <daniel.vetter@ffwll.ch>
- *
- * This commit also added the old_state and new_state pointers to
- * __drm_planes_state. Because of this, the best that can be done on kernel
- * versions without this macro is for_each_plane_in_state.
- */
-
-/**
- * nv_drm_for_each_new_plane_in_state - iterate over all planes in an atomic update
- * @__state: &struct drm_atomic_state pointer
- * @plane: &struct drm_plane iteration cursor
- * @new_plane_state: &struct drm_plane_state iteration cursor for the new state
- * @__i: int iteration cursor, for macro-internal use
- *
- * This iterates over all planes in an atomic update, tracking only the new
- * state. This is useful in enable functions, where we need the new state the
- * hardware should be in when the atomic commit operation has completed.
- */
-#if !defined(for_each_new_plane_in_state)
-#define nv_drm_for_each_new_plane_in_state(__state, plane, new_plane_state, __i) \
-    nv_drm_for_each_plane_in_state(__state, plane, new_plane_state, __i)
-#else
-#define nv_drm_for_each_new_plane_in_state(__state, plane, new_plane_state, __i) \
-    for_each_new_plane_in_state(__state, plane, new_plane_state, __i)
-#endif
-
 #include <drm/drm_auth.h>
 #include <drm/drm_file.h>

--- a/kernel-open/nvidia-drm/nvidia-drm-modeset.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-modeset.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2015, 2025, NVIDIA CORPORATION. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -108,8 +108,11 @@ static bool __will_generate_flip_event(struct drm_crtc *crtc,
        return false;
    }

-    /* Find out whether primary & overlay flip done events will be generated. */
-    nv_drm_for_each_plane_in_state(old_crtc_state->state,
+    /*
+     * Find out whether primary & overlay flip done events will be generated.
+     * Only called after drm_atomic_helper_swap_state, so we use old state.
+     */
+    for_each_old_plane_in_state(old_crtc_state->state,
        plane, old_plane_state, i) {
        if (old_plane_state->crtc != crtc) {
           continue;
@@ -193,7 +196,7 @@ static int __nv_drm_convert_in_fences(
        return 0;
    }

-    nv_drm_for_each_new_plane_in_state(state, plane, plane_state, i) {
+    for_each_new_plane_in_state(state, plane, plane_state, i) {
        if ((plane->type == DRM_PLANE_TYPE_CURSOR) ||
            (plane_state->crtc != crtc) ||
            (plane_state->fence == NULL)) {
@@ -334,7 +337,8 @@ static int __nv_drm_get_syncpt_data(

    head_reply_config = &reply_config->headReplyConfig[nv_crtc->head];

-    nv_drm_for_each_plane_in_state(old_crtc_state->state, plane, old_plane_state, i) {
+    /* Use old state because this is only called after drm_atomic_helper_swap_state */
+    for_each_old_plane_in_state(old_crtc_state->state, plane, old_plane_state, i) {
        struct nv_drm_plane *nv_plane = to_nv_plane(plane);

        if (plane->type == DRM_PLANE_TYPE_CURSOR || old_plane_state->crtc != crtc) {
@@ -395,7 +399,7 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
        &(to_nv_atomic_state(state)->config);
    struct NvKmsKapiModeSetReplyConfig reply_config = { };
    struct drm_crtc *crtc;
-    struct drm_crtc_state *crtc_state;
+    struct drm_crtc_state *old_crtc_state, *new_crtc_state;
    int i;
    int ret;

@@ -429,18 +433,10 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
    memset(requested_config, 0, sizeof(*requested_config));

    /* Loop over affected crtcs and construct NvKmsKapiRequestedModeSetConfig */
-    nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
-        /*
-         * When committing a state, the new state is already stored in
-         * crtc->state. When checking a proposed state, the proposed state is
-         * stored in crtc_state.
-         */
-        struct drm_crtc_state *new_crtc_state =
-                               commit ? crtc->state : crtc_state;
+    for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
        struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);

        if (commit) {
-            struct drm_crtc_state *old_crtc_state = crtc_state;
            struct nv_drm_crtc_state *nv_new_crtc_state =
                to_nv_crtc_state(new_crtc_state);

@@ -497,20 +493,17 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
    }

    if (commit && nv_dev->supportsSyncpts) {
-        nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
+        /* commit is true so we check old state */
+        for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) {
            /*! loop over affected crtcs and get NvKmsKapiModeSetReplyConfig */
            ret = __nv_drm_get_syncpt_data(
-                      nv_dev, crtc, crtc_state, requested_config, &reply_config);
+                      nv_dev, crtc, old_crtc_state, requested_config, &reply_config);
            if (ret != 0) {
                return ret;
            }
        }
    }

-    if (commit && nv_dev->requiresVrrSemaphores && reply_config.vrrFlip) {
-        nvKms->signalVrrSemaphore(nv_dev->pDevice, reply_config.vrrSemaphoreIndex);
-    }
-
    return 0;
 }

@@ -523,12 +516,38 @@ int nv_drm_atomic_check(struct drm_device *dev,
    struct drm_crtc_state *crtc_state;
    int i;

-    nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
+    struct drm_plane *plane;
+    struct drm_plane_state *plane_state;
+    int j;
+    bool cursor_surface_changed;
+    bool cursor_only_commit;
+
+    for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
+
+        /*
+         * Committing cursor surface change without any other plane change can
+         * cause cursor surface in use by HW to be freed prematurely. Add all
+         * planes to the commit to avoid this. This is a workaround for bug 4966645.
+         */
+        cursor_surface_changed = false;
+        cursor_only_commit = true;
+        for_each_new_plane_in_state(crtc_state->state, plane, plane_state, j) {
+            if (plane->type == DRM_PLANE_TYPE_CURSOR) {
+                if (plane_state->fb != plane->state->fb) {
+                    cursor_surface_changed = true;
+                }
+            } else {
+                cursor_only_commit = false;
+                break;
+            }
+        }
+
        /*
         * if the color management changed on the crtc, we need to update the
         * crtc's plane's CSC matrices, so add the crtc's planes to the commit
         */
-        if (crtc_state->color_mgmt_changed) {
+        if (crtc_state->color_mgmt_changed ||
+            (cursor_surface_changed && cursor_only_commit)) {
            if ((ret = drm_atomic_add_affected_planes(state, crtc)) != 0) {
                goto done;
            }
@@ -619,7 +638,7 @@ int nv_drm_atomic_commit(struct drm_device *dev,
     * Our system already implements such a queue, but due to
     * bug 4054608, it is currently not used.
     */
-    nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
+    for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
        struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);

        /*
@@ -726,7 +745,7 @@ int nv_drm_atomic_commit(struct drm_device *dev,
        goto done;
    }

-    nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
+    for_each_old_crtc_in_state(state, crtc, crtc_state, i) {
        struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
        struct nv_drm_crtc_state *nv_new_crtc_state =
            to_nv_crtc_state(crtc->state);
--- a/kernel-open/nvidia-drm/nvidia-drm-priv.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-priv.h
@@ -30,6 +30,7 @@
 #if defined(NV_DRM_DRMP_H_PRESENT)
 #include <drm/drmP.h>
 #endif
+#include <drm/drm_print.h>

 #include <drm/drm_device.h>
 #include <drm/drm_gem.h>
@@ -141,8 +142,9 @@ struct nv_drm_device {

    NvBool hasVideoMemory;

+    NvBool contiguousPhysicalMappings;
+
    NvBool supportsSyncpts;
-    NvBool requiresVrrSemaphores;
    NvBool subOwnershipGranted;
    NvBool hasFramebufferConsole;

--- a/kernel-open/nvidia-drm/nvidia-drm-sources.mk
+++ b/kernel-open/nvidia-drm/nvidia-drm-sources.mk
@@ -62,6 +62,9 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_plane_create_color_properties
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_mixed
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_prime_mmap
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_sysfs_connector_property_event
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_sysfs_connector_status_event
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_flags_set

 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_legacy_dev_list
 NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
@@ -91,7 +94,6 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fenc
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
-NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
 NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers_has_driver_arg
--- a/kernel-open/nvidia-modeset/nv-kthread-q.c
+++ b/kernel-open/nvidia-modeset/nv-kthread-q.c
@@ -22,7 +22,6 @@
 */

 #include "nv-kthread-q.h"
-#include "nv-list-helpers.h"

 #include <linux/kthread.h>
 #include <linux/interrupt.h>
@@ -43,17 +42,6 @@
 // into the queue, and those functions will be run in the context of the
 // queue's kthread.

-#ifndef WARN
-    // Only *really* old kernels (2.6.9) end up here. Just use a simple printk
-    // to implement this, because such kernels won't be supported much longer.
-    #define WARN(condition, format...) ({                    \
-        int __ret_warn_on = !!(condition);                   \
-        if (unlikely(__ret_warn_on))                         \
-            printk(KERN_ERR format);                         \
-        unlikely(__ret_warn_on);                             \
-    })
-#endif
-
 #define NVQ_WARN(fmt, ...)                                   \
    do {                                                     \
        if (in_interrupt()) {                                \
--- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
@@ -130,6 +130,15 @@ module_param_named(config_file, nvkms_conf, charp, 0400);

 static atomic_t nvkms_alloc_called_count;

+#define NV_SUPPORTS_PLATFORM_DEVICE_PUT NV_IS_EXPORT_SYMBOL_GPL_platform_device_put
+
+#if defined(NV_LINUX_NVHOST_H_PRESENT) && NV_SUPPORTS_PLATFORM_DEVICE_PUT
+#if defined(NV_LINUX_HOST1X_NEXT_H_PRESENT) || defined(CONFIG_TEGRA_GRHOST)
+#define NVKMS_NVHOST_SYNCPT_SUPPORTED
+struct platform_device *nvhost_platform_device = NULL;
+#endif
+#endif
+
 NvBool nvkms_test_fail_alloc_core_channel(
    enum FailAllocCoreChannelMethod method
 )
@@ -206,21 +215,18 @@ NvBool nvkms_kernel_supports_syncpts(void)
 * support for syncpts; callers must also check that the hardware
 * supports syncpts.
 */
-#if (defined(CONFIG_TEGRA_GRHOST) || defined(NV_LINUX_HOST1X_NEXT_H_PRESENT))
+#if defined(NVKMS_NVHOST_SYNCPT_SUPPORTED)
    return NV_TRUE;
 #else
    return NV_FALSE;
 #endif
 }

-#define NVKMS_SYNCPT_STUBS_NEEDED
-
 /*************************************************************************
 * NVKMS interface for nvhost unit for sync point APIs.
 *************************************************************************/
-#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)

-#undef NVKMS_SYNCPT_STUBS_NEEDED
+#if defined(NVKMS_NVHOST_SYNCPT_SUPPORTED) && defined(CONFIG_TEGRA_GRHOST)

 #include <linux/nvhost.h>

@@ -228,17 +234,21 @@ NvBool nvkms_syncpt_op(
    enum NvKmsSyncPtOp op,
    NvKmsSyncPtOpParams *params)
 {
-    struct platform_device *pdev = nvhost_get_default_device();
+    if (nvhost_platform_device == NULL) {
+        nvkms_log(NVKMS_LOG_LEVEL_ERROR, NVKMS_LOG_PREFIX,
+                  "Failed to get default nvhost device");
+        return NV_FALSE;
+    }

    switch (op) {

    case NVKMS_SYNCPT_OP_ALLOC:
        params->alloc.id = nvhost_get_syncpt_client_managed(
-                                pdev, params->alloc.syncpt_name);
+                                nvhost_platform_device, params->alloc.syncpt_name);
        break;

    case NVKMS_SYNCPT_OP_PUT:
-        nvhost_syncpt_put_ref_ext(pdev, params->put.id);
+        nvhost_syncpt_put_ref_ext(nvhost_platform_device, params->put.id);
        break;

    case NVKMS_SYNCPT_OP_FD_TO_ID_AND_THRESH: {
@@ -272,7 +282,7 @@ NvBool nvkms_syncpt_op(

    case NVKMS_SYNCPT_OP_ID_AND_THRESH_TO_FD:
        nvhost_syncpt_create_fence_single_ext(
-                pdev,
+                nvhost_platform_device,
                params->id_and_thresh_to_fd.id,
                params->id_and_thresh_to_fd.thresh,
                "nvkms-fence",
@@ -281,7 +291,7 @@ NvBool nvkms_syncpt_op(

    case NVKMS_SYNCPT_OP_READ_MINVAL:
        params->read_minval.minval =
-                nvhost_syncpt_read_minval(pdev, params->read_minval.id);
+                nvhost_syncpt_read_minval(nvhost_platform_device, params->read_minval.id);
        break;

    }
@@ -289,7 +299,7 @@ NvBool nvkms_syncpt_op(
    return NV_TRUE;
 }

-#elif defined(NV_LINUX_HOST1X_NEXT_H_PRESENT) && defined(NV_LINUX_NVHOST_H_PRESENT)
+#elif defined(NVKMS_NVHOST_SYNCPT_SUPPORTED) && defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)

 #include <linux/dma-fence.h>
 #include <linux/file.h>
@@ -305,24 +315,20 @@ NvBool nvkms_syncpt_op(

 #include <linux/nvhost.h>

-#undef NVKMS_SYNCPT_STUBS_NEEDED
-
 NvBool nvkms_syncpt_op(
    enum NvKmsSyncPtOp op,
    NvKmsSyncPtOpParams *params)
 {
    struct host1x_syncpt *host1x_sp;
-    struct platform_device *pdev;
    struct host1x *host1x;

-    pdev = nvhost_get_default_device();
-    if (pdev == NULL) {
+    if (nvhost_platform_device == NULL) {
        nvkms_log(NVKMS_LOG_LEVEL_ERROR, NVKMS_LOG_PREFIX,
-                  "Failed to get nvhost default pdev");
-         return NV_FALSE;
+                  "Failed to get default nvhost device");
+        return NV_FALSE;
    }

-    host1x = nvhost_get_host1x(pdev);
+    host1x = nvhost_get_host1x(nvhost_platform_device);
    if (host1x == NULL) {
        nvkms_log(NVKMS_LOG_LEVEL_ERROR, NVKMS_LOG_PREFIX,
                  "Failed to get host1x");
@@ -436,9 +442,7 @@ NvBool nvkms_syncpt_op(

    return NV_TRUE;
 }
-#endif
-
-#ifdef NVKMS_SYNCPT_STUBS_NEEDED
+#else
 /* Unsupported STUB for nvkms_syncpt APIs */
 NvBool nvkms_syncpt_op(
    enum NvKmsSyncPtOp op,
@@ -2091,6 +2095,14 @@ static int __init nvkms_init(void)

    atomic_set(&nvkms_alloc_called_count, 0);

+#if defined(NVKMS_NVHOST_SYNCPT_SUPPORTED)
+    /*
+     * nvhost_get_default_device() might return NULL; don't check it
+     * until we use it.
+     */
+    nvhost_platform_device = nvhost_get_default_device();
+#endif
+
    ret = nvkms_alloc_rm();

    if (ret != 0) {
@@ -2152,6 +2164,10 @@ static void __exit nvkms_exit(void)
    struct nvkms_timer_t *timer, *tmp_timer;
    unsigned long flags = 0;

+#if defined(NVKMS_NVHOST_SYNCPT_SUPPORTED)
+    platform_device_put(nvhost_platform_device);
+#endif
+
    nvkms_proc_exit();

    down(&nvkms_lock);
--- a/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
@@ -107,6 +107,7 @@ typedef struct {
 enum FailAllocCoreChannelMethod {
    FAIL_ALLOC_CORE_CHANNEL_RM_SETUP_CORE_CHANNEL = 0,
    FAIL_ALLOC_CORE_CHANNEL_RESTORE_CONSOLE = 1,
+    FAIL_ALLOC_CORE_CHANNEL_NO_CLASS = 2,
 };

 NvBool nvkms_test_fail_alloc_core_channel(enum FailAllocCoreChannelMethod method);
--- a/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild
+++ b/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild
@@ -100,4 +100,5 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight
-NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
+NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
+NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_platform_device_put
--- a/kernel-open/nvidia-peermem/nv-p2p.h
+++ b/kernel-open/nvidia-peermem/nv-p2p.h
@@ -159,9 +159,6 @@ struct nvidia_p2p_page_table {
 *
 *   This API only supports pinned, GPU-resident memory, such as that provided
 *   by cudaMalloc().
- *   This API does not support Coherent Driver-based Memory Management(CDMM) mode.
- *   CDMM allows coherent GPU memory to be managed by the driver and not the OS.
- *   This is done by the driver not onlining the memory as a NUMA node.
 *
 *   This API may sleep.
 *
--- a/kernel-open/nvidia-uvm/clc8b5.h
+++ b/kernel-open/nvidia-uvm/clc8b5.h
@@ -30,9 +30,11 @@
 #define NVC8B5_SET_SEMAPHORE_B                                                  (0x00000244)
 #define NVC8B5_SET_SEMAPHORE_B_LOWER                                            31:0
 #define NVC8B5_SET_SEMAPHORE_PAYLOAD                                            (0x00000248)
+#define NVC8B5_SET_SRC_PHYS_MODE                                                (0x00000260)
 #define NVC8B5_SET_SRC_PHYS_MODE_TARGET                                         1:0
 #define NVC8B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
 #define NVC8B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
 #define NVC8B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM                                 (0x00000003)
 #define NVC8B5_SET_SRC_PHYS_MODE_PEER_ID                                        8:6
 #define NVC8B5_SET_SRC_PHYS_MODE_FLA                                            9:9
@@ -40,6 +42,7 @@
 #define NVC8B5_SET_DST_PHYS_MODE_TARGET                                         1:0
 #define NVC8B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
 #define NVC8B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
+#define NVC8B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
 #define NVC8B5_SET_DST_PHYS_MODE_TARGET_PEERMEM                                 (0x00000003)
 #define NVC8B5_LAUNCH_DMA                                                       (0x00000300)
 #define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                    1:0
@@ -66,6 +69,7 @@
 #define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_FALSE                                    (0x00000000)
 #define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_TRUE                                     (0x00000001)
 #define NVC8B5_LAUNCH_DMA_SRC_TYPE                                              12:12
+#define NVC8B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL                                      (0x00000000)
 #define NVC8B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL                                     (0x00000001)
 #define NVC8B5_LAUNCH_DMA_DST_TYPE                                              13:13
 #define NVC8B5_LAUNCH_DMA_DST_TYPE_VIRTUAL                                      (0x00000000)
--- a/kernel-open/nvidia-uvm/ctrl2080mc.h
+++ b/kernel-open/nvidia-uvm/ctrl2080mc.h
@@ -46,4 +46,8 @@

 #define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B              (0x0000000B)
 #define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B              (0x0000000B)
+/* valid ARCHITECTURE_GB20x implementation values */
+#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB206              (0x00000006)
+#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB207              (0x00000007)
+
 #endif /* _ctrl2080mc_h_ */
--- a/kernel-open/nvidia-uvm/hwref/ampere/ga100/dev_runlist.h
+++ b/kernel-open/nvidia-uvm/hwref/ampere/ga100/dev_runlist.h
@@ -1,152 +1,28 @@
-/*******************************************************************************
-    Copyright (c) 2003-2016 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2003-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

 #ifndef __ga100_dev_runlist_h__
 #define __ga100_dev_runlist_h__
-/* This file is autogenerated.  Do not edit */
-#define NV_RUNLIST  0x000003ff:0x00000000 /* RW--D */
-#define NV_CHRAM      0x00001fff:0x00000000 /* RW--D */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK                     0x040 /* RW-4R */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION                                                             3:0 /* RWIVF */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_DEFAULT_PRIV_LEVEL   15 /* RWI-V */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_ALL_LEVELS_ENABLED                                   0x0000000F /* RW--V */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_ONLY_LEVEL3_ENABLED                                  0x00000008 /* RW--V */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0                                                      0:0 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_ENABLE                                        0x00000001 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_DISABLE                                       0x00000000 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1                                                      1:1 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_ENABLE                                        0x00000001 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_DISABLE                                       0x00000000 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2                                                      2:2 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_ENABLE                                        0x00000001 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_DISABLE                                       0x00000000 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3                                                      3:3 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_ENABLE                                        0x00000001 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_DISABLE                                       0x00000000 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION                                                            7:4 /* RWIVF */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_DEFAULT_PRIV_LEVEL    8 /* RWI-V */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_ALL_LEVELS_ENABLED                                        0x0F /* RW--V */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_ONLY_LEVEL3_ENABLED                                       0x08 /* RW--V */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0                                                     4:4 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_ENABLE                                       0x00000001 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_DISABLE                                      0x00000000 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1                                                     5:5 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_ENABLE                                       0x00000001 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_DISABLE                                      0x00000000 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2                                                     6:6 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_ENABLE                                       0x00000001 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_DISABLE                                      0x00000000 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3                                                     7:7 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_ENABLE                                       0x00000001 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_DISABLE                                      0x00000000 /*       */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION                                                              8:8 /* RWIVF */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION_REPORT_ERROR                                          0x00000001 /* RWI-V */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION_SOLDIER_ON                                            0x00000000 /* RW--V */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION                                                             9:9 /* RWIVF */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION_REPORT_ERROR                                         0x00000001 /* RWI-V */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION_SOLDIER_ON                                           0x00000000 /* RW--V */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL                                                       10:10 /* RWIVF */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_BLOCKED                                          0x00000001 /* RWI-V */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_LOWERED                                          0x00000000 /* RW--V */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL                                                      11:11 /* RWIVF */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_BLOCKED                                         0x00000001 /* RWI-V */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_LOWERED                                         0x00000000 /* RW--V */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_ENABLE                                                             31:12 /* RWIVF */
-#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_ENABLE_ALL_SOURCES_ENABLED                                    0x000FFFFF /* RWI-V */
-#define NV_RUNLIST_INT_CYA_SPARE                               0x044 /* RW-4R */
-#define NV_RUNLIST_INT_CYA_SPARE__PRIV_LEVEL_MASK 0x040 /*       */
-#define NV_RUNLIST_INT_CYA_SPARE_DATA                         31:0 /* RWIUF */
-#define NV_RUNLIST_INT_CYA_SPARE_DATA_INIT              0x00000000 /* RWI-V */
-#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE        0:0 /*       */
-#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL    1:1 /*       */
-#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL_1MTHD    0x00000000 /*       */
-#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL_2MTHD    0x00000001 /*       */
-#define NV_RUNLIST_CONFIG                                      0x000 /* RW-4R */
-#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH                        0:0 /* RWIVF */
-#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH_WEAK            0x00000000 /* RWI-V */
-#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH_STRONG          0x00000001 /* RW--V */
-#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH                        4:4 /* RWIVF */
-#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH_WEAK            0x00000000 /* RW--V */
-#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH_STRONG          0x00000001 /* RWI-V */
-#define NV_RUNLIST_CONFIG_L2_EVICT                                9:8 /* RWIVF */
-#define NV_RUNLIST_CONFIG_L2_EVICT_FIRST                   0x00000000 /* RWI-V */
-#define NV_RUNLIST_CONFIG_L2_EVICT_NORMAL                  0x00000001 /* RW--V */
-#define NV_RUNLIST_CONFIG_L2_EVICT_LAST                    0x00000002 /* RW--V */
-#define NV_RUNLIST_CONFIG_SUBCH4                                10:10 /* RWXVF */
-#define NV_RUNLIST_CONFIG_SUBCH4_INACTIVE                  0x00000000 /* RW--V */
-#define NV_RUNLIST_CONFIG_SUBCH4_ACTIVE                    0x00000001 /* RW--V */
-#define NV_RUNLIST_CHANNEL_CONFIG                              0x004 /* R--4R */
-#define NV_RUNLIST_CHANNEL_CONFIG_NUM_CHANNELS_LOG2              3:0 /* C--UF */
-#define NV_RUNLIST_CHANNEL_CONFIG_NUM_CHANNELS_LOG2_2K  11 /* C---V */
-#define NV_RUNLIST_CHANNEL_CONFIG_CHRAM_BAR0_OFFSET             31:4 /* R-XVF */
-#define NV_RUNLIST_DOORBELL_CONFIG                             0x008 /* R--4R */
-#define NV_RUNLIST_DOORBELL_CONFIG_ID                          31:16 /* R-XVF */
-#define NV_RUNLIST_FB_CONFIG                                   0x00C /* R--4R */
-#define NV_RUNLIST_FB_CONFIG_FB_THREAD_ID                        7:0 /* R-XVF */
-#define NV_RUNLIST_FB_CONFIG_ESC_ID                             15:8 /* R-XVF */
-#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG(i)              (0x300+(i)*4) /* RW-4A */
-#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG__SIZE_1   64 /*       */
-#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK          11:0 /*       */
-#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK_HW          10:0 /* RWIVF */
-#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK_HW_INIT      2047 /* RWI-V */
-#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET  27:16 /*       */
-#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET_HW  26:16 /* RWIVF */
-#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET_HW_INIT                     0x0 /* RWI-V */
-#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE                31:31 /* RWIVF */
-#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE_TRUE               1 /* RW--V */
-#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE_FALSE              0 /* RWI-V */
-#define NV_RUNLIST_PBDMA_CONFIG(i)                     (0x010+(i)*4) /* R--4A */
-#define NV_RUNLIST_PBDMA_CONFIG__SIZE_1 2 /*       */
-#define NV_RUNLIST_PBDMA_CONFIG_PBDMA_ID                         7:0 /* R-XUF */
-#define NV_RUNLIST_PBDMA_CONFIG_PBDMA_BAR0_OFFSET              25:10 /* R-XUF */
-#define NV_RUNLIST_PBDMA_CONFIG_VALID                          31:31 /* R-XUF */
-#define NV_RUNLIST_PBDMA_CONFIG_VALID_TRUE                0x00000001 /* R---V */
-#define NV_RUNLIST_PBDMA_CONFIG_VALID_FALSE               0x00000000 /* R---V */
-#define NV_RUNLIST_ACQ_PRETEST                                 0x020 /* RW-4R */
-#define NV_RUNLIST_ACQ_PRETEST_TIMEOUT                            7:0 /* RWIUF */
-#define NV_RUNLIST_ACQ_PRETEST_TIMEOUT_8                   0x00000008 /* RWI-V */
-#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE                        15:12 /* RWIUF */
-#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE_0                 0x00000000 /* RWI-V */
-#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE_10                0x0000000a /* RW--V */
-#define NV_RUNLIST_IDLE_FILTER                                 0x024 /* RW-4R */
-#define NV_RUNLIST_IDLE_FILTER_PERIOD                             7:0 /* RWIUF */
-#define NV_RUNLIST_IDLE_FILTER_PERIOD_INIT                 0x00000050 /* RWI-V */
-#define NV_RUNLIST_IDLE_FILTER_PERIOD__PROD                0x00000064 /* RW--V */
-#define NV_RUNLIST_IDLE_FILTER_PERIOD_8                    0x00000008 /* RW--V */
-#define NV_RUNLIST_IDLE_FILTER_PERIOD_32                   0x00000020 /* RW--V */
-#define NV_RUNLIST_USERD_WRITEBACK                             0x028 /* RW-4R */
-#define NV_RUNLIST_USERD_WRITEBACK_TIMER                          7:0 /* RWIUF */
-#define NV_RUNLIST_USERD_WRITEBACK_TIMER_DISABLED          0x00000000 /* RW--V */
-#define NV_RUNLIST_USERD_WRITEBACK_TIMER_SHORT             0x00000003 /* RW--V */
-#define NV_RUNLIST_USERD_WRITEBACK_TIMER_100US             0x00000064 /* RWI-V */
-#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE                    15:12 /* RWIUF */
-#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_0             0x00000000 /* RWI-V */
-#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_SHORT         0x00000000 /*       */
-#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_100US         0x00000000 /*       */
-#define NV_RUNLIST_ESCHED_CONFIG                               0x02c /* C--4R */
-#define NV_RUNLIST_ESCHED_CONFIG_ESCHED_CLASS_ID                       15:0 /* C--UF */
-#define NV_RUNLIST_ESCHED_CONFIG_ESCHED_CLASS_ID_VALUE     50543 /* C---V */
 #define NV_CHRAM_CHANNEL(i)                            (0x000+(i)*4) /* RW-4A */
 #define NV_CHRAM_CHANNEL__SIZE_1   2048 /*       */
 #define NV_CHRAM_CHANNEL_WRITE_CONTROL                          0:0 /* -WIVF */
@@ -188,22 +64,6 @@
 #define NV_CHRAM_CHANNEL_ACQUIRE_FAIL                         12:12 /* RWIVF */
 #define NV_CHRAM_CHANNEL_ACQUIRE_FAIL_FALSE              0x00000000 /* RWI-V */
 #define NV_CHRAM_CHANNEL_ACQUIRE_FAIL_TRUE               0x00000001 /* RW--V */
-#define NV_CHRAM_CHANNEL_STATUS                                       12:8 /*       */
-#define NV_CHRAM_CHANNEL_STATUS_IDLE                            0x00000000 /*       */
-#define NV_CHRAM_CHANNEL_STATUS_PENDING                         0x00000001 /*       */
-#define NV_CHRAM_CHANNEL_STATUS_PENDING_CTX_RELOAD              0x00000003 /*       */
-#define NV_CHRAM_CHANNEL_STATUS_PENDING_ACQUIRE_FAIL            0x00000011 /*       */
-#define NV_CHRAM_CHANNEL_STATUS_PENDING_ACQUIRE_FAIL_CTX_RELOAD 0x00000013 /*       */
-#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY                      0x00000004 /*       */
-#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_AND_ENG_BUSY         0x0000000C /*       */
-#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY                        0x00000008 /*       */
-#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_ACQUIRE_FAIL   0x00000019 /*       */
-#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING                0x00000009 /*       */
-#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_CTX_RELOAD           0x00000006 /*       */
-#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_ENG_BUSY_CTX_RELOAD  0x0000000E /*       */
-#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_CTX_RELOAD             0x0000000A /*       */
-#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_CTX_RELOAD     0x0000000B /*       */
-#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_ACQUIRE_FAIL_CTX_RELOAD 0x0000001B /*       */
 #define NV_CHRAM_CHANNEL_UPDATE                                31:0 /*       */
 #define NV_CHRAM_CHANNEL_UPDATE_ENABLE_CHANNEL           0x00000002 /*       */
 #define NV_CHRAM_CHANNEL_UPDATE_DISABLE_CHANNEL          0x00000003 /*       */
@@ -211,461 +71,10 @@
 #define NV_CHRAM_CHANNEL_UPDATE_RESET_PBDMA_FAULTED      0x00000011 /*       */
 #define NV_CHRAM_CHANNEL_UPDATE_RESET_ENG_FAULTED        0x00000021 /*       */
 #define NV_CHRAM_CHANNEL_UPDATE_CLEAR_CHANNEL            0xFFFFFFFF /*       */
-#define NV_RUNLIST_SUBMIT_BASE_LO                              0x080 /* RW-4R */
-#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_LO                       31:12 /* RWIUF */
-#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_LO_NULL             0x00000000 /* RWI-V */
-#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET                         1:0 /* RWIVF */
-#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_VID_MEM                 0x0 /* RWI-V */
-#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_SYS_MEM_COHERENT        0x2 /* RW--V */
-#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_SYS_MEM_NONCOHERENT     0x3 /* RW--V */
-#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_ALIGN_SHIFT                 12 /*       */
-#define NV_RUNLIST_SUBMIT_BASE_HI                              0x084 /* RW-4R */
-#define NV_RUNLIST_SUBMIT_BASE_HI_PTR_HI                         7:0 /* RWIUF */
-#define NV_RUNLIST_SUBMIT_BASE_HI_PTR_HI_NULL             0x00000000 /* RWI-V */
-#define NV_RUNLIST_SUBMIT                                      0x088 /* RW-4R */
-#define NV_RUNLIST_SUBMIT_LENGTH                        15:0 /* RWIUF */
-#define NV_RUNLIST_SUBMIT_LENGTH_ZERO             0x00000000 /* RWI-V */
-#define NV_RUNLIST_SUBMIT_LENGTH_MAX              0x0000ffff /* RW--V */
-#define NV_RUNLIST_SUBMIT_OFFSET                       31:16 /* RWIVF */
-#define NV_RUNLIST_SUBMIT_OFFSET_ZERO             0x00000000 /* RWI-V */
-#define NV_RUNLIST_SUBMIT_INFO                                 0x08C /* R--4R */
-#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID                     13:0 /*       */
-#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_HW    10:0 /* R-IUF */
-#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_HW_DEFAULT    0x00000000 /* R-I-V */
-#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID              14:14 /* R-IUF */
-#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID_FALSE   0x00000000 /* R-I-V */
-#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID_TRUE    0x00000001 /* R---V */
-#define NV_RUNLIST_SUBMIT_INFO_PENDING                            15:15 /* R-IVF */
-#define NV_RUNLIST_SUBMIT_INFO_PENDING_FALSE                 0x00000000 /* R-I-V */
-#define NV_RUNLIST_SUBMIT_INFO_PENDING_TRUE                  0x00000001 /* R---V */
-#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_OFFSET                   31:16 /* R-IVF */
-#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_OFFSET_ZERO         0x00000000 /* R-I-V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK(i)             (0x190+(i)*4) /* RW-4A */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK__SIZE_1                                      2 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION                                                             3:0 /* RWIVF */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_DEFAULT_PRIV_LEVEL   15 /* RWI-V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_ALL_LEVELS_ENABLED                                   0x0000000F /* RW--V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_ONLY_LEVEL3_ENABLED                                  0x00000008 /* RW--V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0                                                      0:0 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_ENABLE                                        0x00000001 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_DISABLE                                       0x00000000 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1                                                      1:1 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_ENABLE                                        0x00000001 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_DISABLE                                       0x00000000 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2                                                      2:2 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_ENABLE                                        0x00000001 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_DISABLE                                       0x00000000 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3                                                      3:3 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_ENABLE                                        0x00000001 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_DISABLE                                       0x00000000 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION                                                            7:4 /* RWIVF */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_DEFAULT_PRIV_LEVEL  15 /* RWI-V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_ALL_LEVELS_ENABLED                                        0x0F /* RW--V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_ONLY_LEVEL3_ENABLED                                       0x08 /* RW--V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0                                                     4:4 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_ENABLE                                       0x00000001 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_DISABLE                                      0x00000000 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1                                                     5:5 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_ENABLE                                       0x00000001 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_DISABLE                                      0x00000000 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2                                                     6:6 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_ENABLE                                       0x00000001 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_DISABLE                                      0x00000000 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3                                                     7:7 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_ENABLE                                       0x00000001 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_DISABLE                                      0x00000000 /*       */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION                                                              8:8 /* RWIVF */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION_REPORT_ERROR                                          0x00000001 /* RWI-V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION_SOLDIER_ON                                            0x00000000 /* RW--V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION                                                             9:9 /* RWIVF */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION_REPORT_ERROR                                         0x00000001 /* RWI-V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION_SOLDIER_ON                                           0x00000000 /* RW--V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL                                                       10:10 /* RWIVF */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_BLOCKED                                          0x00000001 /* RWI-V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_LOWERED                                          0x00000000 /* RW--V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL                                                      11:11 /* RWIVF */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_BLOCKED                                         0x00000001 /* RWI-V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_LOWERED                                         0x00000000 /* RW--V */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_ENABLE                                                             31:12 /* RWIVF */
-#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_ENABLE_ALL_SOURCES_ENABLED                                    0x000FFFFF /* RWI-V */
-#define NV_RUNLIST_INTR_VECTORID(i)                    (0x160+(i)*4) /* RW-4A */
-#define NV_RUNLIST_INTR_VECTORID__SIZE_1          2 /*       */
-#define NV_RUNLIST_INTR_VECTORID__PRIV_LEVEL_MASK  "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /*       */
-#define NV_RUNLIST_INTR_VECTORID_VECTOR                                         11:0 /* RWXUF */
-#define NV_RUNLIST_INTR_VECTORID_GSP                                           30:30 /* RWIUF */
-#define NV_RUNLIST_INTR_VECTORID_GSP_DISABLE                                       0 /* RW--V */
-#define NV_RUNLIST_INTR_VECTORID_GSP_ENABLE                                        1 /* RWI-V */
-#define NV_RUNLIST_INTR_VECTORID_CPU                                           31:31 /* RWIUF */
-#define NV_RUNLIST_INTR_VECTORID_CPU_DISABLE                                       0 /* RW--V */
-#define NV_RUNLIST_INTR_VECTORID_CPU_ENABLE                                        1 /* RWI-V */
-#define NV_RUNLIST_INTR_RETRIGGER(i)                   (0x180+(i)*4) /* -W-4A */
-#define NV_RUNLIST_INTR_RETRIGGER__SIZE_1           2 /*       */
-#define NV_RUNLIST_INTR_RETRIGGER__PRIV_LEVEL_MASK   "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /*       */
-#define NV_RUNLIST_INTR_RETRIGGER_TRIGGER                                          0:0 /* -W-VF */
-#define NV_RUNLIST_INTR_RETRIGGER_TRIGGER_TRUE                                       1 /* -W--V */
-#define NV_RUNLIST_INTR_0                                      0x100 /* RW-4R */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0                      0:0 /* RWIVF */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_NOT_PENDING   0x00000000 /* R-I-V */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_PENDING       0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_RESET         0x00000001 /* -W--V */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1                      1:1 /* RWIVF */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_NOT_PENDING   0x00000000 /* R-I-V */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_PENDING       0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_RESET         0x00000001 /* -W--V */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2                      2:2 /* RWIVF */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_NOT_PENDING   0x00000000 /* R-I-V */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_PENDING       0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_RESET         0x00000001 /* -W--V */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG(i)                (i):(i) /*       */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG__SIZE_1  3 /*       */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_NOT_PENDING    0x00000000 /*       */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_PENDING        0x00000001 /*       */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_RESET          0x00000001 /*       */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE(i)       ((i)+1):((i)+1) /*       */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE__SIZE_1  2 /*       */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_NOT_PENDING   0x00000000 /*       */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_PENDING       0x00000001 /*       */
-#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_RESET         0x00000001 /*       */
-#define NV_RUNLIST_INTR_0_RUNLIST_IDLE                            4:4 /* RWIVF */
-#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_NOT_PENDING         0x00000000 /* R-I-V */
-#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_PENDING             0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_RESET               0x00000001 /* -W--V */
-#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE                    5:5 /* RWXVF */
-#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_NOT_PENDING 0x00000000 /* R---V */
-#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_PENDING     0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_RESET       0x00000001 /* -W--V */
-#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE                         6:6 /* RWXVF */
-#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_NOT_PENDING      0x00000000 /* R---V */
-#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_PENDING          0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_RESET            0x00000001 /* -W--V */
-#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE                      7:7 /* RWXVF */
-#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_NOT_PENDING   0x00000000 /* R---V */
-#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_PENDING       0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_RESET         0x00000001 /* -W--V */
-#define NV_RUNLIST_INTR_0_BAD_TSG                                         12:12 /* RWIVF */
-#define NV_RUNLIST_INTR_0_BAD_TSG_NOT_PENDING                        0x00000000 /* R-I-V */
-#define NV_RUNLIST_INTR_0_BAD_TSG_PENDING                            0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_0_BAD_TSG_RESET                              0x00000001 /* -W--V */
-#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE                    8:8 /* RWIVF */
-#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_NOT_PENDING 0x00000000 /* R-I-V */
-#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_PENDING     0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_RESET       0x00000001 /* -W--V */
-#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0                    16:16 /* R-XVF */
-#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0_NOT_PENDING   0x00000000 /* R---V */
-#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0_PENDING       0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0                    17:17 /* R-XVF */
-#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0_NOT_PENDING   0x00000000 /* R---V */
-#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0_PENDING       0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1                    18:18 /* R-XVF */
-#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1_NOT_PENDING   0x00000000 /* R---V */
-#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1_PENDING       0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1                    19:19 /* R-XVF */
-#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1_NOT_PENDING   0x00000000 /* R---V */
-#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1_PENDING       0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /*       */
-#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j__SIZE_1  2 /*       */
-#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j__SIZE_2  2 /*       */
-#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j_NOT_PENDING   0x00000000 /*       */
-#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j_PENDING       0x00000001 /*       */
-#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE                    9:9 /* RWIVF */
-#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_NOT_PENDING 0x00000000 /* R-I-V */
-#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_PENDING     0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_RESET       0x00000001 /* -W--V */
-#define NV_RUNLIST_INTR_0_RUNLIST_EVENT                               9:9 /*       */
-#define NV_RUNLIST_INTR_0_MASK_SET                             0x110 /* RW-4R */
-#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE                     8:8 /* RWIVF */
-#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE_DISABLED     0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE_ENABLED      0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_MASK_CLEAR                           0x118 /* RW-4R */
-#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE                   8:8 /* RWIVF */
-#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE_DISABLED   0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE_ENABLED    0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE(i)               (0x120+(i)*8) /* RW-4A */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE__SIZE_1         2 /*       */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /*       */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0                             0:0 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1                             1:1 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2                             2:2 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG(i)                       (i):(i) /*       */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG__SIZE_1  3 /*       */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG_DISABLED              0x00000000 /*       */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG_ENABLED               0x00000001 /*       */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE(i)              ((i)+1):((i)+1) /*       */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE__SIZE_1  2 /*       */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE_DISABLED             0x00000000 /*       */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE_ENABLED              0x00000001 /*       */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE                                   4:4 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE_DISABLED                   0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE_ENABLED                    0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE                           5:5 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE_DISABLED           0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE_ENABLED            0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE                                6:6 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_DISABLED                0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_ENABLED                 0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE                   7:7 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_DISABLED   0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_ENABLED    0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE                           8:8 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE_DISABLED           0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE_ENABLED            0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE                       9:9 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE_DISABLED       0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE_ENABLED        0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG                                      12:12 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG_DISABLED                        0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG_ENABLED                         0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0                           16:16 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0                           17:17 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1                           18:18 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1                           19:19 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /*       */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j__SIZE_1 2 /*       */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j__SIZE_2 2 /*       */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j_DISABLED             0x00000000 /*       */
-#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j_ENABLED              0x00000001 /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE(i)             (0x140+(i)*8) /* RW-4A */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE__SIZE_1         2 /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0                             0:0 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1                             1:1 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2                             2:2 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG(i)                       (i):(i) /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG__SIZE_1  3 /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG_DISABLED              0x00000000 /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG_ENABLED               0x00000001 /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE(i)              ((i)+1):((i)+1) /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE__SIZE_1  2 /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE_DISABLED             0x00000000 /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE_ENABLED              0x00000001 /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE                                   4:4 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE_DISABLED                   0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE_ENABLED                    0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE                           5:5 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE_DISABLED           0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE_ENABLED            0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE                                6:6 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_DISABLED                0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_ENABLED                 0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE                   7:7 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_DISABLED   0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_ENABLED    0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE                           8:8 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE_DISABLED           0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE_ENABLED            0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE                       9:9 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE_DISABLED       0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE_ENABLED        0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG                                      12:12 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG_DISABLED                        0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG_ENABLED                         0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0                           16:16 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0                           17:17 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1                           18:18 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1                           19:19 /* RWIVF */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j__SIZE_1 2 /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j__SIZE_2 2 /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j_DISABLED             0x00000000 /*       */
-#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j_ENABLED              0x00000001 /*       */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO(i)       (0x224+(i)*64) /* R--4A */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO__SIZE_1 3 /*       */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_PREV_TSGID                    13:0 /* R-IUF */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_PREV_TSGID_DEFAULT      0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE                  15:14 /* R-IUF */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_LOAD        0x00000001 /* R-I-V */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_SAVE        0x00000002 /* R---V */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_SWITCH      0x00000003 /* R---V */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_NEXT_TSGID                   29:16 /* R-IUF */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_NEXT_TSGID_DEFAULT      0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS                       31:30 /* R-IUF */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_AWAITING_ACK     0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_ENG_WAS_RESET    0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_ACK_RECEIVED     0x00000002 /* R---V */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_DROPPED_TIMEOUT  0x00000003 /* R---V */
-#define NV_RUNLIST_INFO                                        0x108 /* R--4R */
-#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM                                 0:0 /* R-IUF */
-#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM_UNARMED                  0x00000000 /* R-I-V */
-#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM_ARMED                    0x00000001 /* R---V */
-#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM                              1:1 /* R-IUF */
-#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM_UNARMED               0x00000000 /* R-I-V */
-#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM_ARMED                 0x00000001 /* R---V */
-#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM                         4:4 /* R-IUF */
-#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM_UNARMED          0x00000000 /* R-I-V */
-#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM_ARMED            0x00000001 /* R---V */
-#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM                 5:5 /* R-IUF */
-#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM_UNARMED  0x00000000 /* R-I-V */
-#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM_ARMED    0x00000001 /* R---V */
-#define NV_RUNLIST_INFO_ENG_IDLE                                              8:8 /* R-IUF */
-#define NV_RUNLIST_INFO_ENG_IDLE_FALSE                                 0x00000000 /* R---V */
-#define NV_RUNLIST_INFO_ENG_IDLE_TRUE                                  0x00000001 /* R-I-V */
-#define NV_RUNLIST_INFO_RUNLIST_IDLE                                          9:9 /* R-IUF */
-#define NV_RUNLIST_INFO_RUNLIST_IDLE_FALSE                             0x00000000 /* R---V */
-#define NV_RUNLIST_INFO_RUNLIST_IDLE_TRUE                              0x00000001 /* R-I-V */
-#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS                                10:10 /* R-IVF */
-#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS_IDLE                      0x00000000 /* R-I-V */
-#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS_BUSY                      0x00000001 /* R---V */
-#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING                               12:12 /* R-IUF */
-#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING_FALSE                    0x00000000 /* R-I-V */
-#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING_TRUE                     0x00000001 /* R---V */
-#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED                                13:13 /* R-IUF */
-#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED_FALSE                     0x00000000 /* R-I-V */
-#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED_TRUE                      0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_BAD_TSG                                0x174 /* R--4R */
-#define NV_RUNLIST_INTR_BAD_TSG_CODE                               3:0 /* R-IVF */
-#define NV_RUNLIST_INTR_BAD_TSG_CODE_NO_ERROR               0x00000000 /* R-I-V */
-#define NV_RUNLIST_INTR_BAD_TSG_CODE_ZERO_LENGTH_TSG        0x00000001 /* R---V */
-#define NV_RUNLIST_INTR_BAD_TSG_CODE_MAX_LENGTH_EXCEEDED    0x00000002 /* R---V */
-#define NV_RUNLIST_INTR_BAD_TSG_CODE_RUNLIST_OVERFLOW       0x00000003 /* R---V */
-#define NV_RUNLIST_INTR_BAD_TSG_CODE_EXPECTED_A_CHID_ENTRY  0x00000004 /* R---V */
-#define NV_RUNLIST_INTR_BAD_TSG_CODE_EXPECTED_A_TSG_HEADER  0x00000005 /* R---V */
-#define NV_RUNLIST_INTR_BAD_TSG_CODE_INVALID_RUNQUEUE       0x00000006 /* R---V */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG(i)     (0x220+(i)*64) /* RW-4A */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG__SIZE_1 3 /*       */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD                      30:0 /* RWIVF */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD_INIT           0x003fffff /* RWI-V */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD_MAX            0x7fffffff /* RW--V */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION                  31:31 /* RWIVF */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION_DISABLED    0x00000000 /* RW--V */
-#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION_ENABLED     0x00000001 /* RWI-V */
-#define NV_RUNLIST_BLKCG                                       0x050 /* RW-4R */
-#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT                         5:0 /* RWIVF */
-#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT_INIT             0x00000000 /* RWI-V */
-#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT__PROD            0x00000002 /* RW--V */
-#define NV_RUNLIST_BLKCG_IDLE_CG_EN                              6:6 /* RWIVF */
-#define NV_RUNLIST_BLKCG_IDLE_CG_EN_ENABLED               0x00000001 /* RW--V */
-#define NV_RUNLIST_BLKCG_IDLE_CG_EN_DISABLED              0x00000000 /* RWI-V */
-#define NV_RUNLIST_BLKCG_IDLE_CG_EN__PROD                 0x00000001 /* RW--V */
-#define NV_RUNLIST_BLKCG_STATE_CG_EN                             7:7 /*       */
-#define NV_RUNLIST_BLKCG_STATE_CG_EN_ENABLED              0x00000001 /*       */
-#define NV_RUNLIST_BLKCG_STATE_CG_EN_DISABLED             0x00000000 /*       */
-#define NV_RUNLIST_BLKCG_STATE_CG_EN__PROD                0x00000000 /*       */
-#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT                       13:8 /*       */
-#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT_INIT            0x00000000 /*       */
-#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT__PROD           0x00000002 /*       */
-#define NV_RUNLIST_BLKCG_STALL_CG_EN                           14:14 /* RWIVF */
-#define NV_RUNLIST_BLKCG_STALL_CG_EN_ENABLED              0x00000001 /* RW--V */
-#define NV_RUNLIST_BLKCG_STALL_CG_EN_DISABLED             0x00000000 /* RWI-V */
-#define NV_RUNLIST_BLKCG_STALL_CG_EN__PROD                0x00000001 /* RW--V */
-#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN                       15:15 /*       */
-#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN_ENABLED          0x00000001 /*       */
-#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN_DISABLED         0x00000000 /*       */
-#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN__PROD            0x00000001 /*       */
-#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT                        19:16 /* RWIVF */
-#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT_INIT              0x00000000 /* RWI-V */
-#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT__PROD             0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG_THROT_CLK_CNT                         23:20 /*       */
-#define NV_RUNLIST_BLKCG_THROT_CLK_CNT_INIT               0x0000000f /*       */
-#define NV_RUNLIST_BLKCG_THROT_CLK_CNT_FULLSPEED          0x0000000f /*       */
-#define NV_RUNLIST_BLKCG_THROT_CLK_CNT__PROD              0x00000000 /*       */
-#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL                        27:24 /*       */
-#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL_INIT              0x00000000 /*       */
-#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL__PROD             0x00000000 /*       */
-#define NV_RUNLIST_BLKCG_THROT_CLK_EN                          28:28 /*       */
-#define NV_RUNLIST_BLKCG_THROT_CLK_EN_ENABLED             0x00000001 /*       */
-#define NV_RUNLIST_BLKCG_THROT_CLK_EN_DISABLED            0x00000000 /*       */
-#define NV_RUNLIST_BLKCG_THROT_CLK_EN__PROD               0x00000000 /*       */
-#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER                     29:29 /*       */
-#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER_EN             0x00000001 /*       */
-#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER_DIS            0x00000000 /*       */
-#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER__PROD          0x00000000 /*       */
-#define NV_RUNLIST_BLKCG_PAUSE_CG_EN                           30:30 /*       */
-#define NV_RUNLIST_BLKCG_PAUSE_CG_EN_ENABLED              0x00000001 /*       */
-#define NV_RUNLIST_BLKCG_PAUSE_CG_EN_DISABLED             0x00000000 /*       */
-#define NV_RUNLIST_BLKCG_PAUSE_CG_EN__PROD                0x00000000 /*       */
-#define NV_RUNLIST_BLKCG_HALT_CG_EN                            31:31 /*       */
-#define NV_RUNLIST_BLKCG_HALT_CG_EN_ENABLED               0x00000001 /*       */
-#define NV_RUNLIST_BLKCG_HALT_CG_EN_DISABLED              0x00000000 /*       */
-#define NV_RUNLIST_BLKCG_HALT_CG_EN__PROD                 0x00000000 /*       */
-#define NV_RUNLIST_BLKCG1                                      0x054 /* RW-4R */
-#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN                          0:0 /* RWIVF */
-#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN_ENABLED           0x00000001 /* RW--V */
-#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN_DISABLED          0x00000000 /* RWI-V */
-#define NV_RUNLIST_BLKCG1_SLCG                                  16:1 /*       */
-#define NV_RUNLIST_BLKCG1_SLCG_ENABLED                    0x00000000 /*       */
-#define NV_RUNLIST_BLKCG1_SLCG_DISABLED                   0x0000FFFF /*       */
-#define NV_RUNLIST_BLKCG1_SLCG__PROD                      0x00000001 /*       */
-#define NV_RUNLIST_BLKCG1_SLCG_RLP                               1:1 /* RWIVF */
-#define NV_RUNLIST_BLKCG1_SLCG_RLP_ENABLED                0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_RLP_DISABLED               0x00000001 /* RWI-V */
-#define NV_RUNLIST_BLKCG1_SLCG_RLP__PROD                  0x00000001 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_EVH                               3:3 /* RWIVF */
-#define NV_RUNLIST_BLKCG1_SLCG_EVH_ENABLED                0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_EVH_DISABLED               0x00000001 /* RWI-V */
-#define NV_RUNLIST_BLKCG1_SLCG_EVH__PROD                  0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_EISM                              7:7 /* RWIVF */
-#define NV_RUNLIST_BLKCG1_SLCG_EISM_ENABLED               0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_EISM_DISABLED              0x00000001 /* RWI-V */
-#define NV_RUNLIST_BLKCG1_SLCG_EISM__PROD                 0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_LB                                8:8 /* RWIVF */
-#define NV_RUNLIST_BLKCG1_SLCG_LB_ENABLED                 0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_LB_DISABLED                0x00000001 /* RWI-V */
-#define NV_RUNLIST_BLKCG1_SLCG_LB__PROD                   0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL                         9:9 /* RWIVF */
-#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL_ENABLED          0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL_DISABLED         0x00000001 /* RWI-V */
-#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL__PROD            0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP                        10:10 /* RWIVF */
-#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP_ENABLED           0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP_DISABLED          0x00000001 /* RWI-V */
-#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP__PROD             0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB                        11:11 /* RWIVF */
-#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB_ENABLED           0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB_DISABLED          0x00000001 /* RWI-V */
-#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB__PROD             0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_PRI                             13:13 /* RWIVF */
-#define NV_RUNLIST_BLKCG1_SLCG_PRI_ENABLED                0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_PRI_DISABLED               0x00000001 /* RWI-V */
-#define NV_RUNLIST_BLKCG1_SLCG_PRI__PROD                  0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_CHSW                            14:14 /* RWIVF */
-#define NV_RUNLIST_BLKCG1_SLCG_CHSW_ENABLED               0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_CHSW_DISABLED              0x00000001 /* RWI-V */
-#define NV_RUNLIST_BLKCG1_SLCG_CHSW__PROD                 0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_XBAR                            15:15 /* RWIVF */
-#define NV_RUNLIST_BLKCG1_SLCG_XBAR_ENABLED               0x00000000 /* RW--V */
-#define NV_RUNLIST_BLKCG1_SLCG_XBAR_DISABLED              0x00000001 /* RWI-V */
-#define NV_RUNLIST_BLKCG1_SLCG_XBAR__PROD                 0x00000000 /* RW--V */
-#define NV_RUNLIST_SLCG_MISC                                   0x05C /* RW-4R */
-#define NV_RUNLIST_SLCG_MISC_EXTRA_BUSY_CLKS                     3:0 /* RWIVF */
-#define NV_RUNLIST_SLCG_MISC_EXTRA_BUSY_CLKS_ZERO         0x00000000 /* RWI-V */
 #define NV_RUNLIST_INTERNAL_DOORBELL                           0x090 /* -W-4R */
 #define NV_RUNLIST_INTERNAL_DOORBELL_CHID      11:0 /*       */
 #define NV_RUNLIST_INTERNAL_DOORBELL_CHID_HW      10:0 /* -WXUF */
 #define NV_RUNLIST_INTERNAL_DOORBELL_GFID                      21:16 /* -WXUF */
-#define NV_RUNLIST_SCHED_DISABLE                               0x094 /* RW-4R */
-#define NV_RUNLIST_SCHED_DISABLE_RUNLIST                         0:0 /* RWIVF */
-#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_ENABLED          0x00000000 /* RWI-V */
-#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_DISABLED         0x00000001 /* RW--V */
-#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_FALSE            0x00000000 /*       */
-#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_TRUE             0x00000001 /*       */
 #define NV_RUNLIST_PREEMPT                                     0x098 /* RW-4R */
 #define NV_RUNLIST_PREEMPT_ID                                    11:0 /*       */
 #define NV_RUNLIST_PREEMPT_ID_HW                   10:0 /* RWIUF */
@@ -679,104 +88,4 @@
 #define NV_RUNLIST_PREEMPT_TYPE                                 25:24 /* RWIVF */
 #define NV_RUNLIST_PREEMPT_TYPE_RUNLIST                    0x00000000 /* RWI-V */
 #define NV_RUNLIST_PREEMPT_TYPE_TSG                        0x00000001 /* RW--V */
-#define NV_RUNLIST_ENGINE_STATUS0(i)                  (0x200+(i)*64) /* R--4A */
-#define NV_RUNLIST_ENGINE_STATUS0__SIZE_1 3 /*       */
-#define NV_RUNLIST_ENGINE_STATUS0_TSGID          11:0 /*       */
-#define NV_RUNLIST_ENGINE_STATUS0_TSGID_HW          10:0 /* R-XUF */
-#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS                     15:13 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_INVALID        0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_VALID          0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_SAVE     0x00000005 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_LOAD     0x00000006 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_SWITCH   0x00000007 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS0_CTX                            13:13 /*       */
-#define NV_RUNLIST_ENGINE_STATUS0_CTX_INVALID               0x00000000 /*       */
-#define NV_RUNLIST_ENGINE_STATUS0_CTX_VALID                 0x00000001 /*       */
-#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD                        14:14 /*       */
-#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD_NOT_IN_PROGRESS   0x00000000 /*       */
-#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD_IN_PROGRESS       0x00000001 /*       */
-#define NV_RUNLIST_ENGINE_STATUS0_CTXSW                          15:15 /*       */
-#define NV_RUNLIST_ENGINE_STATUS0_CTXSW_NOT_IN_PROGRESS     0x00000000 /*       */
-#define NV_RUNLIST_ENGINE_STATUS0_CTXSW_IN_PROGRESS         0x00000001 /*       */
-#define NV_RUNLIST_ENGINE_STATUS0_NEXT_TSGID  27:16 /*       */
-#define NV_RUNLIST_ENGINE_STATUS0_NEXT_TSGID_HW  26:16 /* R-XUF */
-#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD                     29:29 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD_FALSE          0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD_TRUE           0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS0_FAULTED                        30:30 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS0_FAULTED_FALSE             0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS0_FAULTED_TRUE              0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS0_ENGINE                         31:31 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS0_ENGINE_IDLE               0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS0_ENGINE_BUSY               0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS1(i)                  (0x204+(i)*64) /* R--4A */
-#define NV_RUNLIST_ENGINE_STATUS1__SIZE_1 3 /*       */
-#define NV_RUNLIST_ENGINE_STATUS1_GFID                             5:0 /* R-XVF */
-#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID                       13:8 /* R-XVF */
-#define NV_RUNLIST_ENGINE_STATUS1_INTR_ID                        20:16 /* R-XVF */
-#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID                     30:30 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID_FALSE          0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID_TRUE           0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID                31:31 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID_FALSE     0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID_TRUE      0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_CHANNEL(i,j)    (0x208+(i)*64+(j)*4) /* R--4A */
-#define NV_RUNLIST_ENGINE_STATUS_CHANNEL__SIZE_1 3 /*       */
-#define NV_RUNLIST_ENGINE_STATUS_CHANNEL__SIZE_2 2 /*       */
-#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_CHID                11:0 /*       */
-#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_CHID_HW                10:0 /* R-XUF */
-#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID                               15:15 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID_FALSE                    0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID_TRUE                     0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_CHID  27:16 /*       */
-#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_CHID_HW  26:16 /* R-XUF */
-#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID                          31:31 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID_FALSE               0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID_TRUE                0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG(i)             (0x228+(i)*64) /* R--4A */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG__SIZE_1 3 /*       */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN                          0:0 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN_DISABLED          0x00000000 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN_ENABLED           0x00000001 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS               8:8 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS_FALSE  0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS_TRUE   0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI                    12:12 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI_FALSE         0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI_TRUE          0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS             16:16 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS_FALSE  0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS_TRUE   0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI                    20:20 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI_FALSE         0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI_TRUE          0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_DEBUG_ENGINE_ID                    29:24 /* R-XUF */
-#define NV_RUNLIST_ENGINE_STATUS_INST(i)              (0x210+(i)*64) /* R--4A */
-#define NV_RUNLIST_ENGINE_STATUS_INST__SIZE_1 3 /*       */
-#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET                            1:0 /* R-XUF */
-#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_VID_MEM             0x00000000 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_SYS_MEM_COHERENT    0x00000002 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_SYS_MEM_NONCOHERENT 0x00000003 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_INST_VALID                           11:11 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS_INST_VALID_FALSE                0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS_INST_VALID_TRUE                 0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_INST_PTR_LO                          31:12 /* R-XUF */
-#define NV_RUNLIST_ENGINE_STATUS_INST_HI(i)           (0x214+(i)*64) /* R--4A */
-#define NV_RUNLIST_ENGINE_STATUS_INST_HI__SIZE_1 3 /*       */
-#define NV_RUNLIST_ENGINE_STATUS_INST_HI_PTR_HI                        31:0 /* R-XUF */
-#define NV_RUNLIST_ENGINE_STATUS_INST_HI_PTR_HI_ZERO             0x00000000 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST(i)         (0x218+(i)*64) /* R--4A */
-#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST__SIZE_1 3 /*       */
-#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET                            1:0 /* R-XUF */
-#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_VID_MEM             0x00000000 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_SYS_MEM_COHERENT    0x00000002 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_SYS_MEM_NONCOHERENT 0x00000003 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID                           11:11 /* R-IVF */
-#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID_FALSE                0x00000000 /* R-I-V */
-#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID_TRUE                 0x00000001 /* R---V */
-#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_PTR_LO                          31:12 /* R-XUF */
-#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI(i)      (0x21C+(i)*64) /* R--4A */
-#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI__SIZE_1 3 /*       */
-#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI_PTR_HI                      31:0 /* R-XUF */
-#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI_PTR_HI_ZERO           0x00000000 /* R---V */
 #endif // __ga100_dev_runlist_h__
--- a/kernel-open/nvidia-uvm/nv-kthread-q.c
+++ b/kernel-open/nvidia-uvm/nv-kthread-q.c
@@ -22,7 +22,6 @@
 */

 #include "nv-kthread-q.h"
-#include "nv-list-helpers.h"

 #include <linux/kthread.h>
 #include <linux/interrupt.h>
@@ -43,17 +42,6 @@
 // into the queue, and those functions will be run in the context of the
 // queue's kthread.

-#ifndef WARN
-    // Only *really* old kernels (2.6.9) end up here. Just use a simple printk
-    // to implement this, because such kernels won't be supported much longer.
-    #define WARN(condition, format...) ({                    \
-        int __ret_warn_on = !!(condition);                   \
-        if (unlikely(__ret_warn_on))                         \
-            printk(KERN_ERR format);                         \
-        unlikely(__ret_warn_on);                             \
-    })
-#endif
-
 #define NVQ_WARN(fmt, ...)                                   \
    do {                                                     \
        if (in_interrupt()) {                                \
--- a/kernel-open/nvidia-uvm/nvidia-uvm-sources.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm-sources.Kbuild
@@ -78,6 +78,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_ce.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_fault_buffer.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_mmu.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_host.c
--- a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
@@ -61,6 +61,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_arch_invalidate_secondary_tlb
 NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
 NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
 NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += get_dev_pagemap_has_pgmap_arg
 NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
 NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
 NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
--- a/kernel-open/nvidia-uvm/uvm.h
+++ b/kernel-open/nvidia-uvm/uvm.h
@@ -2354,7 +2354,9 @@ NV_STATUS UvmDisableReadDuplication(void     *base,
 //
 // When a page is in its preferred location, a fault from another processor will
 // not cause a migration if a mapping for that page from that processor can be
-// established without migrating the page.
+// established without migrating the page. Individual faulting pages will still
+// migrate to service immediate access needs, but prefetch operations will not
+// pull additional pages away from their preferred location.
 //
 // If the specified processor is a GPU and the GPU is not a NUMA node and the
 // input range is system-allocated pageable memory and the system supports
@@ -2951,619 +2953,6 @@ NV_STATUS UvmIs8Supported(NvU32 *is8Supported);
 //    Tools API
 //------------------------------------------------------------------------------

-//------------------------------------------------------------------------------
-// UvmDebugGetVersion
-//
-// Returns the version number of the UVM debug library
-// See uvm_types.h for valid verion numbers, e.g. UVM_DEBUG_V1
-//
-//------------------------------------------------------------------------------
-unsigned UvmDebugVersion(void);
-
-//------------------------------------------------------------------------------
-// UvmDebugCreateSession
-//
-// Creates a handle for a debugging session.
-//
-// When the client initializes, it will pass in a process handle and get a
-// session ID for itself. Subsequent calls to the UVM API will take in that
-// session ID.
-//
-// There are security requirements to this call.
-// One of the following must be true:
-// 1.  The session owner must be running as an elevated user
-// 2.  The session owner and target must belong to the same user and the
-//     session owner is at least as privileged as the target.
-//
-// For CUDA 6.0 we can create at most 64 sessions per debugger process.
-//
-// Arguments:
-//     pid: (INPUT)
-//         Process id for which the debugging session will be created
-//
-//     session: (OUTPUT)
-//         Handle to the debugging session associated to that pid.
-//
-// Error codes:
-//     NV_ERR_PID_NOT_FOUND:
-//         pid is invalid/ not associated with UVM.
-//
-//     NV_ERR_INSUFFICIENT_PERMISSIONS:
-//         Function fails the security check.
-//
-//     NV_ERR_INSUFFICIENT_RESOURCES:
-//         Attempt is made to allocate more than 64 sessions per process.
-//
-//     NV_ERR_BUSY_RETRY:
-//         internal resources are blocked by other threads.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmDebugCreateSession(unsigned         pid,
-                                UvmDebugSession *session);
-
-//------------------------------------------------------------------------------
-// UvmDebugDestroySession
-//
-// Destroys a debugging session.
-//
-// Arguments:
-//     session: (INPUT)
-//         Handle to the debugging session associated to that pid.
-//
-// Error codes:
-//     NV_ERR_INVALID_ARGUMENT:
-//         session is invalid.
-//
-//     NV_ERR_BUSY_RETRY:
-//         ebug session is in use by some other thread.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmDebugDestroySession(UvmDebugSession session);
-
-//------------------------------------------------------------------------------
-// UvmDebugCountersEnable
-//
-// Enables the counters following the user specified configuration.
-//
-// The user must fill a list with the configuration of the counters it needs to
-// either enable or disable. It can only enable one counter per line.
-//
-// The structure (UvmCounterConfig) has several fields:
-//  - scope: Please see the UvmCounterScope  enum (above), for details.
-//  - name: Name of the counter. Please check UvmCounterName for list.
-//  - gpuid: Identifies the GPU for which the counter will be enabled/disabled
-//           This parameter is ignored in AllGpu scopes.
-//  - state: A value of 0 will disable the counter, a value of 1 will enable
-//           the counter.
-//
-//  Note: All counters are refcounted, that means that a counter will only be
-//  disable when its refcount reached zero.
-//
-// Arguments:
-//     session: (INPUT)
-//         Handle to the debugging session.
-//
-//     config: (INPUT)
-//         pointer to configuration list as per above.
-//
-//     count: (INPUT)
-//         number of entries in the config list.
-//
-// Error codes:
-//     NV_ERR_INSUFFICIENT_PERMISSIONS:
-//         Function fails the security check
-//
-//     RM_INVALID_ARGUMENT:
-//         debugging session is invalid or one of the counter lines is invalid.
-//         If call returns this value, no action specified by the config list
-//         will have taken effect.
-//
-//     NV_ERR_NOT_SUPPORTED:
-//         UvmCounterScopeGlobalSingleGpu is not supported for CUDA 6.0
-//
-//     NV_ERR_BUSY_RETRY:
-//         the debug session is in use by some other thread.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmDebugCountersEnable(UvmDebugSession   session,
-                                 UvmCounterConfig *config,
-                                 unsigned          count);
-
-//------------------------------------------------------------------------------
-// UvmDebugGetCounterHandle
-//
-// Returns handle to a particular counter. This is an opaque handle that the
-// implementation uses in order to find your counter, later. This handle can be
-// used in subsequent calls to UvmDebugGetCounterVal().
-//
-// Arguments:
-//     session: (INPUT)
-//         Handle to the debugging session.
-//
-//     scope: (INPUT)
-//         Scope that will be mapped.
-//
-//     counterName: (INPUT)
-//         Name of the counter in that scope.
-//
-//     gpu: (INPUT)
-//         UUID of the physical GPU if the GPU is not SMC capable or SMC
-//         enabled, or the GPU instance UUID of the partition of the scoped GPU.
-//         This parameter is ignored in AllGpu scopes.
-//
-//     pCounterHandle: (OUTPUT)
-//         Handle to the counter address.
-//
-// Error codes:
-//     NV_ERR_INVALID_ARGUMENT:
-//         Specified scope/gpu pair or session id is invalid
-//
-//     NV_ERR_NOT_SUPPORTED:
-//         UvmCounterScopeGlobalSingleGpu is not supported for CUDA 6.0
-//
-//     NV_ERR_BUSY_RETRY:
-//         debug session is in use by some other thread.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmDebugGetCounterHandle(UvmDebugSession  session,
-                                   UvmCounterScope  scope,
-                                   UvmCounterName   counterName,
-                                   NvProcessorUuid  gpu,
-                                   NvUPtr          *pCounterHandle);
-
-//------------------------------------------------------------------------------
-// UvmDebugGetCounterVal
-//
-// Returns the counter value specified by the counter name.
-//
-// Arguments:
-//     session: (INPUT)
-//         Handle to the debugging session.
-//
-//     counterHandleArray: (INPUT)
-//         Array of counter handles
-//
-//     handleCount: (INPUT)
-//         Number of handles in the pPCounterHandle array.
-//
-//     counterValArray: (OUTPUT)
-//         Array of counter values corresponding to the handles.
-//
-// Error codes:
-//     NV_ERR_INVALID_ARGUMENT:
-//         one of the specified handles is invalid.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmDebugGetCounterVal(UvmDebugSession     session,
-                                NvUPtr             *counterHandleArray,
-                                unsigned            handleCount,
-                                unsigned long long *counterValArray);
-
-//------------------------------------------------------------------------------
-// UvmEventQueueCreate
-//
-// This call creates an event queue of the given size.
-// No events are added in the queue until they are enabled by the user.
-// Event queue data is visible to the user even after the target process dies
-// if the session is active and queue is not freed.
-//
-// User doesn't need to serialize multiple UvmEventQueueCreate calls as
-// each call creates a new queue state associated with the returned queue
-// handle.
-//
-// Arguments:
-//     sessionHandle: (INPUT)
-//         Handle to the debugging session.
-//
-//     queueHandle: (OUTPUT)
-//         Handle to created queue.
-//
-//     queueSize: (INPUT)
-//         Size of the event queue buffer in units of UvmEventEntry's.
-//         This quantity must be > 1.
-//
-//     notificationCount: (INPUT)
-//         Number of entries after which the user should be notified that
-//         there are events to fetch.
-//         User is notified when queueEntries >= notification count.
-//
-// Error codes:
-//     NV_ERR_INSUFFICIENT_PERMISSIONS:
-//         Function fails the security check.
-//
-//     NV_ERR_INVALID_ARGUMENT:
-//         One of the arguments is invalid.
-//
-//     NV_ERR_INSUFFICIENT_RESOURCES:
-//         it's not possible to allocate a queue of requested size.
-//
-//     NV_ERR_BUSY_RETRY:
-//         internal resources are blocked by other threads.
-//
-//     NV_ERR_PID_NOT_FOUND:
-//         queue create call is made on a session after the target dies.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmEventQueueCreate(UvmDebugSession        sessionHandle,
-                              UvmEventQueueHandle   *queueHandle,
-                              NvS64                  queueSize,
-                              NvU64                  notificationCount,
-                              UvmEventTimeStampType  timeStampType);
-
-//------------------------------------------------------------------------------
-// UvmEventQueueDestroy
-//
-// This call frees all interal resources associated with the queue, including
-// unpinning of the memory associated with that queue. Freeing user buffer is
-// responsibility of a caller. Event queue might be also destroyed as a side
-// effect of destroying a session associated with this queue.
-//
-// User needs to ensure that a queue handle is not deleted while some other
-// thread is using the same queue handle.
-//
-// Arguments:
-//     sessionHandle: (INPUT)
-//         Handle to the debugging session.
-//
-//     queueHandle: (INPUT)
-//         Handle to the queue which is to be freed
-//
-// Error codes:
-//     RM_ERR_NOT_PERMITTED:
-//         Function fails the security check.
-//
-//     NV_ERR_INVALID_ARGUMENT:
-//         One of the arguments is invalid.
-//
-//     NV_ERR_BUSY_RETRY:
-//         internal resources are blocked by other threads.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmEventQueueDestroy(UvmDebugSession     sessionHandle,
-                               UvmEventQueueHandle queueHandle);
-
-//------------------------------------------------------------------------------
-// UvmEventEnable
-//
-// This call enables a particular event type in the event queue.
-// All events are disabled by default when a queue is created.
-//
-// This API does not access the queue state maintained in the user
-// library so the user doesn't need to acquire a lock to protect the queue
-// state.
-//
-// Arguments:
-//     sessionHandle: (INPUT)
-//         Handle to the debugging session.
-//
-//     queueHandle: (INPUT)
-//         Handle to the queue where events are to be enabled
-//
-//     eventTypeFlags: (INPUT)
-//         This field specifies the event types to be enabled. For example:
-//         To enable migration events and memory violations: pass flags
-//         "UVM_EVENT_ENABLE_MEMORY_VIOLATION |UVM_EVENT_ENABLE_MIGRATION"
-//
-// Error codes:
-//     RM_ERR_NOT_PERMITTED:
-//         Function fails the security check.
-//
-//     NV_ERR_INVALID_ARGUMENT:
-//         One of the arguments is invalid.
-//
-//     NV_ERR_PID_NOT_FOUND:
-//         this call is made after the target process dies
-//
-//     NV_ERR_BUSY_RETRY:
-//         internal resources are blocked by other threads.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmEventEnable(UvmDebugSession     sessionHandle,
-                         UvmEventQueueHandle queueHandle,
-                         unsigned            eventTypeFlags);
-
-//------------------------------------------------------------------------------
-// UvmEventDisable
-//
-// This call disables a particular event type in the queue.
-//
-// This API does not access the queue state maintained in the user
-// library so the user doesn't need to acquire a lock to protect the queue
-// state.
-//
-// Arguments:
-//     sessionHandle: (INPUT)
-//         Handle to the debugging session.
-//
-//     queueHandle: (INPUT)
-//         Handle to the queue where events are to be enabled
-//
-//     eventTypeFlags: (INPUT)
-//         This field specifies the event types to be enabled
-//         For example: To enable migration events and memory violations:
-//         pass "UVM_EVENT_ENABLE_MEMORY_VIOLATION |UVM_EVENT_ENABLE_MIGRATION"
-//         as flags
-//
-// Error codes:
-//     RM_ERR_NOT_PERMITTED:
-//         Function fails the security check.
-//
-//     NV_ERR_INVALID_ARGUMENT:
-//         One of the arguments is invalid.
-//
-//     NV_ERR_PID_NOT_FOUND:
-//         this call is made after the target process dies
-//
-//     NV_ERR_BUSY_RETRY:
-//         internal resources are blocked by other threads.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmEventDisable(UvmDebugSession     sessionHandle,
-                          UvmEventQueueHandle queueHandle,
-                          unsigned            eventTypeFlags);
-
-//------------------------------------------------------------------------------
-// UvmEventWaitOnQueueHandles
-//
-// User is notified when queueEntries >= notification count.
-// This call does a blocking wait for this notification. It returns when
-// at least one of the queue handles has events to be fetched or if it timeouts
-//
-//     This API accesses constant data maintained in the queue state. Hence,
-//     the user doesn't need to acquire a lock to protect the queue state.
-//
-// Arguments:
-//     queueHandles: (INPUT)
-//         array of queue handles.
-//
-//     arraySize: (INPUT)
-//         number of handles in array.
-//
-//     timeout: (INPUT)
-//         timeout in msec
-//
-//     pNotificationFlags: (OUTPUT)
-//         If a particular queue handle in the input array is notified then
-//         the respective bit flag is set in pNotificationFlags.
-//
-// Error codes:
-//     NV_ERR_INVALID_ARGUMENT:
-//         one of the queueHandles is invalid.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmEventWaitOnQueueHandles(UvmEventQueueHandle *queueHandleArray,
-                                     unsigned             arraySize,
-                                     NvU64                timeout,
-                                     unsigned            *pNotificationFlags);
-
-//------------------------------------------------------------------------------
-// UvmEventGetNotificationHandles
-//
-// User is notified when queueEntries >= notification count.
-// The user can directly get the queue notification handles rather than using
-// a UVM API to wait on queue handles. This helps the user to wait on other
-// objects (apart from queue notification) along with queue notification
-// handles in the same thread. The user can safely use this call along with the
-// library supported wait call UvmEventWaitOnQueueHandles.
-//
-// This API reads constant data maintained in the queue state. Hence,
-// the user doesn't need to acquire a lock to protect the queue state.
-//
-// Arguments:
-//     queueHandles: (INPUT)
-//         array of queue handles.
-//
-//     arraySize: (INPUT)
-//         number of handles in array.
-//
-//     notificationHandles: (OUTPUT)
-//         Windows: Output of this call contains an array of 'windows event
-//             handles' corresponding to the queue handles passes as input.
-//         Linux: All queues belonging to the same process share the same
-//             file descriptor(fd) for notification. If the user chooses to use
-//             UvmEventGetNotificationHandles then he should check all queues
-//             for new events (by calling UvmEventFetch) when notified on
-//             the fd.
-//
-// Error codes:
-//     NV_ERR_INVALID_ARGUMENT:
-//         One of the arguments is invalid.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle  *queueHandleArray,
-                                         unsigned              arraySize,
-                                         void                **notificationHandleArray);
-
-//------------------------------------------------------------------------------
-// UvmEventGetGpuUuidTable
-//
-// Each migration event entry contains the gpu index to/from where data is
-// migrated. This index maps to a corresponding physical gpu UUID in the
-// gpuUuidTable. Using indices saves on the size of each event entry. This API
-// provides the gpuIndex to gpuUuid relation to the user.
-//
-// This API does not access the queue state maintained in the user
-// library and so the user doesn't need to acquire a lock to protect the
-// queue state.
-//
-// Arguments:
-//     gpuUuidTable: (OUTPUT)
-//         The return value is an array of physical GPU UUIDs. The array index
-//         is the corresponding gpuIndex. There can be at max 32 GPUs
-//         associated with UVM, so array size is 32.
-//
-//     validCount: (OUTPUT)
-//         The system doesn't normally contain 32 GPUs. This field gives the
-//         count of entries that are valid in the returned gpuUuidTable.
-//
-// Error codes:
-//     NV_ERR_BUSY_RETRY:
-//         internal resources are blocked by other threads.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmEventGetGpuUuidTable(NvProcessorUuid *gpuUuidTable,
-                                  unsigned        *validCount);
-
-//------------------------------------------------------------------------------
-// UvmEventFetch
-//
-// This call is used to fetch the queue entries in a user buffer.
-//
-// This API updates the queue state. Hence simultaneous calls to fetch/skip
-// events should be avoided as that might corrupt the queue state.
-//
-// Arguments:
-//     sessionHandle: (INPUT)
-//         Handle to the debugging session.
-//
-//     queueHandle: (INPUT)
-//         queue from where to fetch the events.
-//
-//     pBuffer: (OUTPUT)
-//         Pointer to the buffer where the API will copy the events. User
-//         shall ensure the size is enough.
-//
-//     nEntries: (INPUT/OUTPUT)
-//         It provides the maximum number of entries that will be fetched
-//         from the queue. If this number is larger than the size of the
-//         queue it will be internally capped to that value.
-//         As output it returns the actual number of entries copies to the
-//         buffer.
-//
-// Error codes:
-//     RM_ERR_NOT_PERMITTED:
-//         Function fails the security check.
-//
-//     NV_ERR_INVALID_ARGUMENT:
-//         One of the arguments is invalid.
-//
-//     NV_ERR_INVALID_INDEX:
-//         The indices of the queue have been corrupted.
-//
-//     NV_ERR_BUFFER_TOO_SMALL:
-//         The event queue buffer provided by the caller was too small to
-//         contain all of the events that occurred during this run.
-//         Events were therefore dropped (not recorded).
-//         Please re-run with a larger buffer.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmEventFetch(UvmDebugSession      sessionHandle,
-                        UvmEventQueueHandle  queueHandle,
-                        UvmEventEntry       *pBuffer,
-                        NvU64               *nEntries);
-
-//------------------------------------------------------------------------------
-// UvmEventSkipAll
-//
-// This API drops all event entries from the queue.
-//
-// This API updates the queue state. Hence simultaneous calls to fetch/
-// skip events should be avoided as that might corrupt the queue state.
-//
-// Arguments:
-//     sessionHandle: (INPUT)
-//         Handle to the debugging session.
-//
-//     queueHandle: (INPUT)
-//         target queue.
-//
-// Error codes:
-//     RM_ERR_NOT_PERMITTED:
-//         Function fails the security check.
-//
-//     NV_ERR_INVALID_ARGUMENT:
-//         One of the arguments is invalid.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmEventSkipAll(UvmDebugSession     sessionHandle,
-                          UvmEventQueueHandle queueHandle);
-
-//------------------------------------------------------------------------------
-// UvmEventQueryTimeStampType
-//
-// This API returns the type of time stamp used in an event entry for a given
-// queue.
-//
-// This API reads constant data maintained in the queue state. Hence,
-// the user doesn't need to acquire a lock to protect the queue state.
-//
-// Arguments:
-//     sessionHandle: (INPUT)
-//         Handle to the debugging session.
-//
-//     queueHandle: (INPUT)
-//         target queue.
-//
-//     timeStampType: (OUTPUT)
-//         type of time stamp used in event entry. See UvmEventTimestampType
-//         for supported types of time stamps.
-//
-// Error codes:
-//     RM_ERR_NOT_PERMITTED:
-//         Function fails the security check.
-//
-//     NV_ERR_INVALID_ARGUMENT:
-//         One of the arguments is invalid.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmEventQueryTimeStampType(UvmDebugSession        sessionHandle,
-                                     UvmEventQueueHandle    queueHandle,
-                                     UvmEventTimeStampType *timeStampType);
-
-//------------------------------------------------------------------------------
-// UvmDebugAccessMemory
-//
-// This call can be used by the debugger to read/write memory range. UVM driver
-// may not be aware of all the pages in this range. A bit per page is set by the
-// driver if it is read/written by UVM.
-//
-// Arguments:
-//     session: (INPUT)
-//         Handle to the debugging session.
-//
-//     baseAddress: (INPUT)
-//         base address from where memory is to be accessed
-//
-//     sizeInBytes: (INPUT)
-//         Number of bytes to be accessed
-//
-//     accessType: (INPUT)
-//         Read or write access request
-//
-//     buffer: (INPUT/OUTPUT)
-//         This buffer would be read or written to by the driver.
-//         User needs to allocate a big enough buffer to fit sizeInBytes.
-//
-//     isBitmaskSet: (INPUT/OUTPUT)
-//         Set to 1, if any field in bitmask is set
-//         NULL(INPUT) if unused
-//
-//     bitmask: (INPUT/OUTPUT)
-//         One bit per page is set if UVM reads or writes to it.
-//         User should allocate a bitmask big enough to fit one bit per page
-//         covered by baseAddress + sizeInBytes:
-//         (baseAlignmentBytes + sizeInBytes + pageSize - 1)/pageSize number
-//         of bits.
-//         NULL(IN) if unused.
-//
-// Error codes:
-//     NV_ERR_INVALID_ARGUMENT:
-//         One of the arguments is invalid.
-//
-//------------------------------------------------------------------------------
-NV_STATUS UvmDebugAccessMemory(UvmDebugSession     session,
-                               void               *baseAddress,
-                               NvU64               sizeInBytes,
-                               UvmDebugAccessType  accessType,
-                               void               *buffer,
-                               NvBool             *isBitmaskSet,
-                               NvU64              *bitmask);
-
-//
-// Uvm Tools uvm API
-//
-
-
 //------------------------------------------------------------------------------
 // UvmToolsCreateSession
 //
--- a/kernel-open/nvidia-uvm/uvm_ada.c
+++ b/kernel-open/nvidia-uvm/uvm_ada.c
@@ -78,6 +78,10 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->non_replayable_faults_supported = true;

+    parent_gpu->access_counters_serialize_clear_ops_by_type = false;
+
+    parent_gpu->access_bits_supported = false;
+
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
@@ -96,5 +100,7 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->ats.no_ats_range_required = false;

+    parent_gpu->ats.gmmu_pt_depth0_init_required = false;
+
    parent_gpu->conf_computing.per_channel_key_rotation = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_ampere.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere.c
@@ -82,6 +82,8 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->non_replayable_faults_supported = true;

+    parent_gpu->access_counters_serialize_clear_ops_by_type = false;
+
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
@@ -94,16 +96,22 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->sparse_mappings_supported = true;

+    parent_gpu->access_bits_supported = false;
+
    UVM_ASSERT(parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100);
    if (parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA100 ||
-        parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA000)
+        parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA000) {
        parent_gpu->map_remap_larger_page_promotion = true;
-    else
+    }
+    else {
        parent_gpu->map_remap_larger_page_promotion = false;
+    }

    parent_gpu->plc_supported = true;

    parent_gpu->ats.no_ats_range_required = false;

+    parent_gpu->ats.gmmu_pt_depth0_init_required = false;
+
    parent_gpu->conf_computing.per_channel_key_rotation = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_ampere_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_ce.c
@@ -73,6 +73,7 @@ static NvU32 ce_aperture(uvm_aperture_t aperture)
        return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB);
    }
    else {
+        UVM_ASSERT(uvm_aperture_is_peer(aperture));
        return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) |
               HWVALUE(C6B5, SET_SRC_PHYS_MODE, FLA, 0) |
               HWVALUE(C6B5, SET_SRC_PHYS_MODE, PEER_ID, UVM_APERTURE_PEER_ID(aperture));
--- a/kernel-open/nvidia-uvm/uvm_ampere_host.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_host.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2018-2024 NVIDIA Corporation
+    Copyright (c) 2018-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -111,8 +111,6 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
    uvm_spin_loop_t spin;
    NvU32 channel_faulted_mask = 0;
    NvU32 clear_type_value = 0;
-    NvU32 doorbell_value = 0;
-    volatile NvU32 *doorbell_ptr;

    UVM_ASSERT(!user_channel->gpu->parent->has_clear_faulted_channel_method);

@@ -129,12 +127,6 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
                       uvm_mmu_engine_type_string(fault->fault_source.mmu_engine_type));
    }

-    doorbell_ptr = (NvU32 *)((NvU8 *)user_channel->runlist_pri_base_register + NV_RUNLIST_INTERNAL_DOORBELL);
-
-    // GFID is not required since we clear faulted channel with a SW method on
-    // SRIOV. On baremetal, GFID is always zero.
-    doorbell_value = HWVALUE(_RUNLIST, INTERNAL_DOORBELL, CHID, user_channel->hw_channel_id);
-
    // Wait for the channel to have the FAULTED bit set as this can race with
    // interrupt notification
    UVM_SPIN_WHILE(!(UVM_GPU_READ_ONCE(*user_channel->chram_channel_register) & channel_faulted_mask), &spin);
@@ -143,7 +135,7 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user

    wmb();

-    UVM_GPU_WRITE_ONCE(*doorbell_ptr, doorbell_value);
+    UVM_GPU_WRITE_ONCE(*user_channel->work_submission_offset, user_channel->work_submission_token);
 }

 static NvU32 instance_ptr_aperture_type_to_hw_value(uvm_aperture_t aperture)
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.c
@@ -601,7 +601,12 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
    uvm_page_mask_zero(faults_serviced_mask);
    uvm_page_mask_zero(reads_serviced_mask);

-    if (!(vma->vm_flags & VM_READ))
+    // If the VMA doesn't have read or write permissions then all faults are
+    // fatal so we exit early.
+    // TODO: Bug 5451843: This fix brings to light potential issues in the ATS
+    // fault handling path as described in the bug. Those need to be handled
+    // to avoid any potential permission issues.
+    if (!(vma->vm_flags & (VM_READ | VM_WRITE)))
        return NV_OK;

    if (!(vma->vm_flags & VM_WRITE)) {
--- a/kernel-open/nvidia-uvm/uvm_blackwell.c
+++ b/kernel-open/nvidia-uvm/uvm_blackwell.c
@@ -26,6 +26,7 @@
 #include "uvm_gpu.h"
 #include "uvm_mem.h"
 #include "uvm_blackwell_fault_buffer.h"
+#include "ctrl2080mc.h"

 void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
 {
@@ -81,6 +82,16 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->non_replayable_faults_supported = true;

+    parent_gpu->access_counters_serialize_clear_ops_by_type = parent_gpu->rm_info.accessCntrBufferCount == 2;
+
+    // TODO: Bug 5262806: Remove this WAR once the bug is fixed.
+    // Before this override, accessCntrBufferCount has only been used to
+    // determine the support for access counters in uvm_gpu.c and the statement
+    // above. After the HAL init, it is used for buffer allocations, and must
+    // not change its value.
+    if (parent_gpu->rm_info.accessCntrBufferCount > 1)
+        parent_gpu->rm_info.accessCntrBufferCount = 1;
+
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
@@ -99,6 +110,10 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->ats.no_ats_range_required = true;

+    parent_gpu->ats.gmmu_pt_depth0_init_required = parent_gpu->ats.non_pasid_ats_enabled;
+
+    parent_gpu->access_bits_supported = false;
+
    // Blackwell has a physical translation prefetcher, meaning SW must assume
    // that any physical ATS translation can be fetched at any time. The
    // specific behavior and impact differs with non-PASID ATS support, but
@@ -142,16 +157,26 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    // TODO: Bug 5023085: this should be queried from RM instead of determined
    // by UVM.
    if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 &&
-        parent_gpu->rm_info.gpuImplementation ==
-            NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B) {
+        parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B) {
        parent_gpu->is_integrated_gpu = true;
+        parent_gpu->access_bits_supported = false;
        // GB10B has sticky L2 coherent cache lines.
        // For details, refer to the comments in uvm_gpu.h
        // where this field is declared.
        parent_gpu->sticky_l2_coherent_cache_lines = true;
    }
    if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 &&
-        parent_gpu->rm_info.gpuImplementation ==
-            NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B)
+        parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B) {
        parent_gpu->is_integrated_gpu = true;
+        parent_gpu->access_bits_supported = false;
+    }
+    if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 &&
+        (parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB206 ||
+         parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB207)) {
+        // TODO: Bug 3186788 : As reported in Bug 5309034, GB206
+        // and GB207 experience a GSP crash with VAB. Depending
+        // on whether RM fixes it or marks it as cannot fix, the
+        // below checks can be removed or retained.
+        parent_gpu->access_bits_supported = false;
+    }
 }
--- a/kernel-open/nvidia-uvm/uvm_blackwell_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_blackwell_ce.c
@@ -0,0 +1,77 @@
+/*******************************************************************************
+    Copyright (c) 2025 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_hal.h"
+#include "uvm_global.h"
+#include "uvm_push.h"
+#include "uvm_mem.h"
+#include "uvm_conf_computing.h"
+
+bool uvm_hal_blackwell_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
+{
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    if (uvm_gpu_address_is_peer(gpu, src)) {
+        UVM_ERR_PRINT("Peer copy from peer address (0x%llx) is not allowed!", src.address);
+        return false;
+    }
+
+    if (push->channel && uvm_gpu_address_is_peer(gpu, dst) && !uvm_channel_is_p2p(push->channel)) {
+        UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
+                      src.address,
+                      dst.address);
+        return false;
+    }
+
+    if (g_uvm_global.conf_computing_enabled) {
+        // Blackwell+ GPUs support secure P2P. In that case, memcopy using
+        // physical addresses is valid.
+        if (!uvm_aperture_is_peer(dst.aperture)) {
+            // In Confidential Computing, if a non-p2p memcopy uses physical
+            // addressing for either the destination or the source, then the
+            // corresponding aperture must be vidmem. If virtual addressing
+            // is used, and the backing storage is sysmem the access is only
+            // legal if the copy type is NONPROT2NONPROT, but the validation
+            // does not detect it.
+            if (!src.is_virtual && (src.aperture != UVM_APERTURE_VID))
+                return false;
+
+            if (!dst.is_virtual && dst.aperture != UVM_APERTURE_VID)
+                return false;
+        }
+
+        // The source and destination must be both unprotected, for sysmem
+        // copy, or both protected for p2p copy.
+        if (dst.is_unprotected != src.is_unprotected)
+            return false;
+    }
+
+    if (!gpu->parent->ce_phys_vidmem_write_supported && !dst.is_virtual && dst.aperture == UVM_APERTURE_VID) {
+        UVM_ERR_PRINT("Destination address of vidmem memcopy must be virtual, not physical: {%s, 0x%llx}\n",
+                      uvm_gpu_address_aperture_string(dst),
+                      dst.address);
+        return false;
+    }
+
+    return true;
+}
--- a/kernel-open/nvidia-uvm/uvm_blackwell_host.c
+++ b/kernel-open/nvidia-uvm/uvm_blackwell_host.c
@@ -352,9 +352,8 @@ void uvm_hal_blackwell_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t apert
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
    NvU32 aperture_value;

-    if (!gpu->parent->is_integrated_gpu) {
+    if (!gpu->parent->is_integrated_gpu)
        return uvm_hal_ampere_host_l2_invalidate(push, aperture);
-    }

    switch (aperture) {
        case UVM_APERTURE_SYS:
@@ -369,9 +368,9 @@ void uvm_hal_blackwell_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t apert
    uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
    // Flush dirty
    NV_PUSH_4U(C96F, MEM_OP_A, 0,
-               MEM_OP_B, 0,
-               MEM_OP_C, 0,
-               MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, L2_FLUSH_DIRTY));
+                     MEM_OP_B, 0,
+                     MEM_OP_C, 0,
+                     MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, L2_FLUSH_DIRTY));
    // Invalidate
    NV_PUSH_4U(C96F, MEM_OP_A, 0,
               MEM_OP_B, 0,
--- a/kernel-open/nvidia-uvm/uvm_channel.c
+++ b/kernel-open/nvidia-uvm/uvm_channel.c
@@ -3140,13 +3140,25 @@ static void pick_ces_conf_computing(uvm_channel_manager_t *manager,
                                                UVM_CHANNEL_TYPE_MEMOPS,
                                                UVM_CHANNEL_TYPE_WLC };

+    static const uvm_channel_type_t types_p2p[] = { UVM_CHANNEL_TYPE_CPU_TO_GPU,
+                                                    UVM_CHANNEL_TYPE_GPU_TO_CPU,
+                                                    UVM_CHANNEL_TYPE_GPU_INTERNAL,
+                                                    UVM_CHANNEL_TYPE_GPU_TO_GPU,
+                                                    UVM_CHANNEL_TYPE_MEMOPS,
+                                                    UVM_CHANNEL_TYPE_WLC };
+
    UVM_ASSERT(g_uvm_global.conf_computing_enabled);

-    pick_ces_for_channel_types(manager, ce_caps, types, ARRAY_SIZE(types), preferred_ce);
+    if (gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_UNSUPPORTED) {
+        pick_ces_for_channel_types(manager, ce_caps, types, ARRAY_SIZE(types), preferred_ce);

-    // Direct transfers between GPUs are disallowed in Confidential Computing,
-    // but the preferred CE is still set to an arbitrary value for consistency.
-    preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_GPU] = preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_CPU];
+        // If direct transfers between GPUs are disallowed, the preferred
+        // CE is still set to an arbitrary value for consistency.
+        preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_GPU] = preferred_ce[UVM_CHANNEL_TYPE_GPU_TO_CPU];
+    }
+    else {
+        pick_ces_for_channel_types(manager, ce_caps, types_p2p, ARRAY_SIZE(types_p2p), preferred_ce);
+    }

    best_wlc_ce = preferred_ce[UVM_CHANNEL_TYPE_WLC];

--- a/kernel-open/nvidia-uvm/uvm_channel_test.c
+++ b/kernel-open/nvidia-uvm/uvm_channel_test.c
@@ -37,6 +37,16 @@
 #define TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU     1024
 #define TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU_EMU 64

+// It is unsafe to destroy the GPU's channel manager of an active uvm_gpu_t
+// object. We sync trackers to avoid having any of the GPU's channels in any
+// trackers. We can only guarantee that because in these tests, we only allow
+// a single reference to the GPU.
+static void channel_manager_destroy(uvm_gpu_t *gpu)
+{
+    uvm_parent_gpu_sync_trackers(gpu->parent);
+    uvm_channel_manager_destroy(gpu->channel_manager);
+}
+
 // Schedule pushes one after another on all GPUs and channel types that copy and
 // increment a counter into an adjacent memory location in a buffer. And then
 // verify that all the values are correct on the CPU.
@@ -177,7 +187,7 @@ static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
        uvm_channel_update_progress_all(channel);
        TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE);

-        uvm_channel_manager_destroy(gpu->channel_manager);
+        channel_manager_destroy(gpu);

        // Destruction will hit the error again, so clear one more time.
        uvm_global_reset_fatal_error();
@@ -306,7 +316,7 @@ static NV_STATUS test_rc(uvm_va_space_t *va_space)
        test_status = uvm_test_rc_for_gpu(gpu);
        g_uvm_global.disable_fatal_error_assert = false;

-        uvm_channel_manager_destroy(gpu->channel_manager);
+        channel_manager_destroy(gpu);
        create_status = uvm_channel_manager_create(gpu, &gpu->channel_manager);

        TEST_NV_CHECK_RET(test_status);
@@ -355,7 +365,10 @@ static NV_STATUS uvm_test_iommu_rc_for_gpu(uvm_gpu_t *gpu)
    cpu_ptr = uvm_mem_get_cpu_addr_kernel(sysmem);
    sysmem_dma_addr = uvm_mem_gpu_address_physical(sysmem, gpu, 0, data_size);

-    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Test memset to IOMMU mapped sysmem");
+    status = uvm_push_begin(gpu->channel_manager,
+                            UVM_CHANNEL_TYPE_GPU_TO_CPU,
+                            &push,
+                            "Test memset to IOMMU mapped sysmem");
    TEST_NV_CHECK_GOTO(status, done);

    gpu->parent->ce_hal->memset_8(&push, sysmem_dma_addr, 0, data_size);
@@ -497,14 +510,15 @@ static NV_STATUS test_iommu(uvm_va_space_t *va_space)
        NV_STATUS test_status, create_status;

        // The GPU channel manager is destroyed and then re-created after
-        // testing ATS RC fault, so this test requires exclusive access to the GPU.
+        // testing ATS RC fault, so this test requires exclusive access to the
+        // GPU.
        TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);

        g_uvm_global.disable_fatal_error_assert = true;
        test_status = uvm_test_iommu_rc_for_gpu(gpu);
        g_uvm_global.disable_fatal_error_assert = false;

-        uvm_channel_manager_destroy(gpu->channel_manager);
+        channel_manager_destroy(gpu);
        create_status = uvm_channel_manager_create(gpu, &gpu->channel_manager);

        TEST_NV_CHECK_RET(test_status);
@@ -989,7 +1003,9 @@ static NV_STATUS test_channel_iv_rotation(uvm_va_space_t *va_space)
                uvm_conf_computing_dma_buffer_t *cipher_text;
                void *cipher_cpu_va, *plain_cpu_va, *tag_cpu_va;
                uvm_gpu_address_t cipher_gpu_address, plain_gpu_address, tag_gpu_address;
-                uvm_channel_t *work_channel = uvm_channel_is_lcic(channel) ? uvm_channel_lcic_get_paired_wlc(channel) : channel;
+                uvm_channel_t *work_channel = uvm_channel_is_lcic(channel) ?
+                                              uvm_channel_lcic_get_paired_wlc(channel) :
+                                              channel;

                plain_cpu_va = &status;
                data_size = sizeof(status);
@@ -1037,8 +1053,8 @@ release:
            if (status != NV_OK)
                return status;

-            // All channels except SEC2 used at least a single IV to release tracking.
-            // SEC2 doesn't support decrypt direction.
+            // All channels except SEC2 used at least a single IV to release
+            // tracking. SEC2 doesn't support decrypt direction.
            if (uvm_channel_is_sec2(channel))
                TEST_CHECK_RET(before_rotation_dec == after_rotation_dec);
            else
@@ -1557,7 +1573,7 @@ static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space
        TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);

        gpu->uvm_test_force_upper_pushbuffer_segment = 1;
-        uvm_channel_manager_destroy(gpu->channel_manager);
+        channel_manager_destroy(gpu);
        TEST_NV_CHECK_GOTO(uvm_channel_manager_create(gpu, &gpu->channel_manager), error);
        gpu->uvm_test_force_upper_pushbuffer_segment = 0;

--- a/kernel-open/nvidia-uvm/uvm_common.h
+++ b/kernel-open/nvidia-uvm/uvm_common.h
@@ -157,7 +157,7 @@ void on_uvm_assert(void);
 #define UVM_ASSERT_MSG_IGNORE(expr, fmt, ...)   \
    do {                                        \
        UVM_IGNORE_EXPR(expr);                  \
-        UVM_NO_PRINT(fmt, ##__VA_ARGS__);       \
+        no_printk(fmt, ##__VA_ARGS__);          \
    } while (0)

 // UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity
--- a/kernel-open/nvidia-uvm/uvm_extern_decl.h
+++ b/kernel-open/nvidia-uvm/uvm_extern_decl.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2019 NVIDIA Corporation
+    Copyright (c) 2019-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -27,8 +27,6 @@
 #include "uvm_linux.h"
 #include "uvm_forward_decl.h"

-extern int uvm_enable_debug_procfs;
-
 extern unsigned uvm_perf_map_remote_on_native_atomics_fault;

 extern uvm_global_t g_uvm_global;
--- a/kernel-open/nvidia-uvm/uvm_forward_decl.h
+++ b/kernel-open/nvidia-uvm/uvm_forward_decl.h
@@ -98,6 +98,4 @@ typedef struct uvm_non_replayable_fault_buffer_struct uvm_non_replayable_fault_b
 typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
 typedef struct uvm_access_counter_buffer_struct uvm_access_counter_buffer_t;
 typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
-
-typedef struct uvm_reverse_map_struct uvm_reverse_map_t;
 #endif //__UVM_FORWARD_DECL_H__
--- a/kernel-open/nvidia-uvm/uvm_get_rm_ptes_test.c
+++ b/kernel-open/nvidia-uvm/uvm_get_rm_ptes_test.c
@@ -146,12 +146,20 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,

    // Add the physical offset for peer mappings
    if (uvm_aperture_is_peer(aperture)) {
-        if (uvm_parent_gpus_are_direct_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
+        if (uvm_parent_gpus_are_nvlink_direct_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
            phys_offset += memory_owning_gpu->parent->peer_address_info.peer_gpa_memory_window_start;
        else if (uvm_parent_gpus_are_nvswitch_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
            phys_offset += memory_owning_gpu->parent->nvswitch_info.fabric_memory_window_start;
    }

+    // Add DMA offset for bar1 p2p.
+    if (uvm_aperture_is_sys(aperture) && !memory_info->sysmem) {
+        uvm_gpu_phys_address_t phys_address = uvm_gpu_peer_phys_address(memory_owning_gpu, memory_info->physAddr, memory_mapping_gpu);
+
+        UVM_ASSERT(uvm_aperture_is_sys(phys_address.aperture));
+        phys_offset += (phys_address.address - memory_info->physAddr);
+    }
+
    for (index = 0; index < ext_mapping_info->numWrittenPtes; index++) {

        pte = hal->make_pte(aperture,
@@ -159,6 +167,16 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
                            prot,
                            pte_flags);

+        if (pte != ext_mapping_info->pteBuffer[index * skip]) {
+            UVM_ERR_PRINT("PTE mismatch for %s->%s at %d (aperture: %s) %llx vs. %llx (address: %llx)\n",
+                          uvm_parent_gpu_name(memory_mapping_gpu->parent),
+                          uvm_parent_gpu_name(memory_owning_gpu->parent),
+                          index,
+                          uvm_aperture_string(aperture),
+                          pte,
+                          ext_mapping_info->pteBuffer[index * skip],
+                          memory_info->physAddr);
+        }
        TEST_CHECK_RET(pte == ext_mapping_info->pteBuffer[index * skip]);

        phys_offset += page_size;
--- a/kernel-open/nvidia-uvm/uvm_global.c
+++ b/kernel-open/nvidia-uvm/uvm_global.c
@@ -42,24 +42,21 @@
 #include "nv_uvm_interface.h"

 uvm_global_t g_uvm_global;
-static struct UvmOpsUvmEvents g_exported_uvm_ops;
+static struct UvmEventsLinux g_exported_uvm_events;
 static bool g_ops_registered = false;

 static NV_STATUS uvm_register_callbacks(void)
 {
    NV_STATUS status = NV_OK;

-    g_exported_uvm_ops.suspend = uvm_suspend_entry;
-    g_exported_uvm_ops.resume = uvm_resume_entry;
-    g_exported_uvm_ops.startDevice = NULL;
-    g_exported_uvm_ops.stopDevice  = NULL;
-    g_exported_uvm_ops.isrTopHalf  = uvm_isr_top_half_entry;
-
-    g_exported_uvm_ops.drainP2P = uvm_suspend_and_drainP2P_entry;
-    g_exported_uvm_ops.resumeP2P = uvm_resumeP2P_entry;
+    g_exported_uvm_events.isrTopHalf = uvm_isr_top_half_entry;
+    g_exported_uvm_events.suspend = uvm_suspend_entry;
+    g_exported_uvm_events.resume = uvm_resume_entry;
+    g_exported_uvm_events.drainP2P = uvm_suspend_and_drainP2P_entry;
+    g_exported_uvm_events.resumeP2P = uvm_resumeP2P_entry;

    // Register the UVM callbacks with the main GPU driver:
-    status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmCallbacks(&g_exported_uvm_ops));
+    status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmEvents(&g_exported_uvm_events));
    if (status != NV_OK)
        return status;

@@ -71,7 +68,7 @@ static NV_STATUS uvm_register_callbacks(void)
 static void uvm_unregister_callbacks(void)
 {
    if (g_ops_registered) {
-        uvm_rm_locked_call_void(nvUvmInterfaceDeRegisterUvmOps());
+        uvm_rm_locked_call_void(nvUvmInterfaceDeRegisterUvmEvents());
        g_ops_registered = false;
    }
 }
--- a/kernel-open/nvidia-uvm/uvm_global.h
+++ b/kernel-open/nvidia-uvm/uvm_global.h
@@ -408,14 +408,14 @@ static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent
    uvm_gpu_t *gpu = NULL;
    uvm_gpu_id_t gpu_id;
    NvU32 sub_processor_index;
-    NvU32 cur_sub_processor_index;
+    NvU32 start_search_index;

    UVM_ASSERT(parent_gpu);

    gpu_id = uvm_gpu_id_from_parent_gpu_id(parent_gpu->id);
-    cur_sub_processor_index = cur_gpu ? uvm_id_sub_processor_index(cur_gpu->id) : -1;
+    start_search_index = cur_gpu ? uvm_id_sub_processor_index(cur_gpu->id) + 1 : 0;

-    sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1);
+    sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS, start_search_index);
    if (sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS) {
        gpu = uvm_gpu_get(uvm_id_from_value(uvm_id_value(gpu_id) + sub_processor_index));
        UVM_ASSERT(gpu != NULL);
--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
@@ -44,6 +44,7 @@
 #include "uvm_conf_computing.h"
 #include "uvm_linux.h"
 #include "uvm_mmu.h"
+#include "uvm_kvmalloc.h"

 #define UVM_PROC_GPUS_PEER_DIR_NAME "peers"

@@ -67,6 +68,8 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
    switch (link_type) {
        case UVM_LINK_TYPE_PCIE:
            return UVM_GPU_LINK_PCIE;
+        case UVM_LINK_TYPE_PCIE_BAR1:
+            return UVM_GPU_LINK_PCIE_BAR1;
        case UVM_LINK_TYPE_NVLINK_1:
            return UVM_GPU_LINK_NVLINK_1;
        case UVM_LINK_TYPE_NVLINK_2:
@@ -107,18 +110,18 @@ static void fill_parent_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo
    }

    parent_gpu->nvswitch_info.is_nvswitch_connected = gpu_info->connectedToSwitch;
-    parent_gpu->peer_address_info.is_direct_connected = gpu_info->nvlDirectConnect;
+    parent_gpu->peer_address_info.is_nvlink_direct_connected = gpu_info->nvlDirectConnect;

-    // nvswitch is routed via physical pages, where the upper 13-bits of the
-    // 47-bit address space holds the routing information for each peer.
-    // Currently, this is limited to a 16GB framebuffer window size.
-    if (parent_gpu->nvswitch_info.is_nvswitch_connected) {
+    if (parent_gpu->peer_address_info.is_nvlink_direct_connected) {
+        parent_gpu->peer_address_info.peer_gpa_memory_window_start = gpu_info->nvlDirectConnectMemoryWindowStart;
+    }
+    else if (parent_gpu->nvswitch_info.is_nvswitch_connected) {
+        // nvswitch is routed via physical pages, where the upper 13-bits of the
+        // 47-bit address space holds the routing information for each peer.
+        // Currently, this is limited to a 16GB framebuffer window size.
        parent_gpu->nvswitch_info.fabric_memory_window_start = gpu_info->nvswitchMemoryWindowStart;
        parent_gpu->nvswitch_info.egm_fabric_memory_window_start = gpu_info->nvswitchEgmMemoryWindowStart;
    }
-    else if (parent_gpu->peer_address_info.is_direct_connected) {
-        parent_gpu->peer_address_info.peer_gpa_memory_window_start = gpu_info->nvlDirectConnectMemoryWindowStart;
-    }

    parent_gpu->ats.non_pasid_ats_enabled = gpu_info->nonPasidAtsSupport;

@@ -533,11 +536,12 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
 static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
 {

-    BUILD_BUG_ON(UVM_GPU_LINK_MAX != 8);
+    BUILD_BUG_ON(UVM_GPU_LINK_MAX != 9);

    switch (link_type) {
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_INVALID);
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_PCIE);
+        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_PCIE_BAR1);
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_1);
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_2);
        UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_3);
@@ -666,14 +670,14 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
                                     gpu->parent->isr.access_counters[i].stats.cpu_exec_count[cpu]);
            }
            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_buffer_entries       %u\n",
-                                 gpu->parent->access_counter_buffer[i].max_notifications);
+                                 gpu->parent->access_counters.buffer[i].max_notifications);
            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_cached_get           %u\n",
-                                 gpu->parent->access_counter_buffer[i].cached_get);
+                                 gpu->parent->access_counters.buffer[i].cached_get);
            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_cached_put           %u\n",
-                                 gpu->parent->access_counter_buffer[i].cached_put);
+                                 gpu->parent->access_counters.buffer[i].cached_put);

-            get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferGet);
-            put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferPut);
+            get = UVM_GPU_READ_ONCE(*gpu->parent->access_counters.buffer[i].rm_info.pAccessCntrBufferGet);
+            put = UVM_GPU_READ_ONCE(*gpu->parent->access_counters.buffer[i].rm_info.pAccessCntrBufferPut);

            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_get                  %u\n", get);
            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_put                  %u\n", put);
@@ -766,10 +770,10 @@ static void gpu_access_counters_print_common(uvm_parent_gpu_t *parent_gpu, struc
    UVM_ASSERT(uvm_procfs_is_debug_enabled());

    // procfs_files are created before gpu_init_isr, we need to check if the
-    // access_counter_buffer is allocated.
-    if (parent_gpu->access_counter_buffer) {
+    // access_counters.buffer is allocated.
+    if (parent_gpu->access_counters.buffer) {
        for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++) {
-            uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[i];
+            uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counters.buffer[i];

            num_pages_out = atomic64_read(&access_counters->stats.num_pages_out);
            num_pages_in = atomic64_read(&access_counters->stats.num_pages_in);
@@ -885,6 +889,19 @@ static uvm_aperture_t parent_gpu_peer_aperture(uvm_parent_gpu_t *local,
    else
        peer_index = 1;

+    if (parent_peer_caps->link_type == UVM_GPU_LINK_PCIE_BAR1) {
+        // UVM_APERTURE_SYS can be used if either the local (accessing) GPU
+        // _DOES NOT_ use PCIE atomics, or the remote (owning) GPU _DOES_
+        // accept PCIE atomics. Moreover, the bus topology needs to support
+        // routing of PCIe atomics between the devices.
+        //
+        // If either of the above conditions is not met we need to use
+        // UVM_APERTURE_SYS_NON_COHERENT to prevent use of PCIe atomics.
+        // RM provides the consolidated information in P2P properties.
+        const bool enable_atomics = parent_peer_caps->bar1_p2p_pcie_atomics_enabled[peer_index];
+        return enable_atomics ? UVM_APERTURE_SYS : UVM_APERTURE_SYS_NON_COHERENT;
+    }
+
    return UVM_APERTURE_PEER(parent_peer_caps->peer_ids[peer_index]);
 }

@@ -1164,6 +1181,22 @@ static void deinit_semaphore_pools(uvm_gpu_t *gpu)
    uvm_gpu_semaphore_pool_destroy(gpu->secure_semaphore_pool);
 }

+static void init_access_counters_serialize_clear_tracker(uvm_parent_gpu_t *parent)
+{
+    NvU32 i;
+
+    for (i = 0; i < UVM_ACCESS_COUNTER_CLEAR_OP_COUNT; i++)
+        uvm_tracker_init(&parent->access_counters.serialize_clear_tracker[i]);
+}
+
+static void deinit_access_counters_serialize_clear_tracker(uvm_parent_gpu_t *parent)
+{
+    NvU32 i;
+
+    for (i = 0; i < UVM_ACCESS_COUNTER_CLEAR_OP_COUNT; i++)
+        uvm_tracker_deinit(&parent->access_counters.serialize_clear_tracker[i]);
+}
+
 static NV_STATUS find_unused_gpu_id(uvm_parent_gpu_t *parent_gpu, uvm_gpu_id_t *out_id)
 {
    NvU32 i;
@@ -1209,9 +1242,11 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
    uvm_uuid_copy(&parent_gpu->uuid, gpu_uuid);
    uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
    uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
-    uvm_mutex_init(&parent_gpu->access_counters_enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
-    uvm_mutex_init(&parent_gpu->access_counters_clear_tracker_lock, UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS);
-    uvm_tracker_init(&parent_gpu->access_counters_clear_tracker);
+    uvm_mutex_init(&parent_gpu->access_counters.enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
+    uvm_mutex_init(&parent_gpu->access_counters.clear_tracker_lock, UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS);
+    uvm_mutex_init(&parent_gpu->access_counters.serialize_clear_lock, UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS);
+    uvm_tracker_init(&parent_gpu->access_counters.clear_tracker);
+    init_access_counters_serialize_clear_tracker(parent_gpu);
    uvm_spin_lock_irqsave_init(&parent_gpu->isr.interrupts_lock, UVM_LOCK_ORDER_LEAF);
    uvm_spin_lock_init(&parent_gpu->instance_ptr_table_lock, UVM_LOCK_ORDER_LEAF);
    uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
@@ -1229,7 +1264,8 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
    return NV_OK;

 cleanup:
-    uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker);
+    uvm_tracker_deinit(&parent_gpu->access_counters.clear_tracker);
+    deinit_access_counters_serialize_clear_tracker(parent_gpu);
    uvm_kvfree(parent_gpu);

    return status;
@@ -1686,24 +1722,41 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
    }

    // Sync the access counter clear tracker too.
-    if (parent_gpu->access_counters_supported && parent_gpu->access_counter_buffer) {
-        uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock);
-        status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker);
-        uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock);
+    if (parent_gpu->access_counters_supported && parent_gpu->access_counters.buffer) {
+        uvm_mutex_lock(&parent_gpu->access_counters.clear_tracker_lock);
+        status = uvm_tracker_wait(&parent_gpu->access_counters.clear_tracker);
+        uvm_mutex_unlock(&parent_gpu->access_counters.clear_tracker_lock);

        if (status != NV_OK)
            UVM_ASSERT(status == uvm_global_get_status());
+
+        if (parent_gpu->access_counters_serialize_clear_ops_by_type) {
+            uvm_access_counter_clear_op_t op;
+            uvm_mutex_lock(&parent_gpu->access_counters.serialize_clear_lock);
+            for (op = 0; op < UVM_ACCESS_COUNTER_CLEAR_OP_COUNT; op++) {
+                status = uvm_tracker_wait(&parent_gpu->access_counters.serialize_clear_tracker[op]);
+
+                if (status != NV_OK)
+                    UVM_ASSERT(status == uvm_global_get_status());
+            }
+            uvm_mutex_unlock(&parent_gpu->access_counters.serialize_clear_lock);
+        }
    }
 }

+void uvm_parent_gpu_sync_trackers(uvm_parent_gpu_t *parent_gpu)
+{
+    sync_parent_gpu_trackers(parent_gpu,
+                             parent_gpu->isr.replayable_faults.handling,
+                             parent_gpu->isr.non_replayable_faults.handling);
+}
+
 // Remove all references the given GPU has to other GPUs, since one of those
 // other GPUs is getting removed. This involves waiting for any unfinished
 // trackers contained by this GPU.
 static void remove_gpus_from_gpu(uvm_gpu_t *gpu)
 {
-    sync_parent_gpu_trackers(gpu->parent,
-                             gpu->parent->isr.replayable_faults.handling,
-                             gpu->parent->isr.non_replayable_faults.handling);
+    uvm_parent_gpu_sync_trackers(gpu->parent);

    // Sync all trackers in PMM
    uvm_pmm_gpu_sync(&gpu->pmm);
@@ -1713,7 +1766,7 @@ static void remove_gpus_from_gpu(uvm_gpu_t *gpu)
 }

 // Remove all references to the given GPU from its parent, since it is being
-// removed.  This involves waiting for any unfinished trackers contained
+// removed. This involves waiting for any unfinished trackers contained
 // by the parent GPU.
 static void remove_gpu_from_parent_gpu(uvm_gpu_t *gpu)
 {
@@ -1823,7 +1876,8 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)
    for_each_sub_processor_index(sub_processor_index)
        UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);

-    uvm_tracker_deinit(&parent_gpu->access_counters_clear_tracker);
+    uvm_tracker_deinit(&parent_gpu->access_counters.clear_tracker);
+    deinit_access_counters_serialize_clear_tracker(parent_gpu);

    uvm_kvfree(parent_gpu);
 }
@@ -1960,7 +2014,7 @@ static void update_stats_migration_cb(uvm_va_space_t *va_space,
        }
        else if (is_access_counter) {
            NvU32 index = event_data->migration.access_counters_buffer_index;
-            atomic64_add(pages, &gpu_dst->parent->access_counter_buffer[index].stats.num_pages_in);
+            atomic64_add(pages, &gpu_dst->parent->access_counters.buffer[index].stats.num_pages_in);
        }
    }
    if (gpu_src) {
@@ -1973,7 +2027,7 @@ static void update_stats_migration_cb(uvm_va_space_t *va_space,
        }
        else if (is_access_counter) {
            NvU32 index = event_data->migration.access_counters_buffer_index;
-            atomic64_add(pages, &gpu_src->parent->access_counter_buffer[index].stats.num_pages_out);
+            atomic64_add(pages, &gpu_src->parent->access_counters.buffer[index].stats.num_pages_out);
        }
    }
 }
@@ -2114,11 +2168,19 @@ bool uvm_parent_gpus_are_nvswitch_connected(const uvm_parent_gpu_t *parent_gpu0,
    return false;
 }

-bool uvm_parent_gpus_are_direct_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1)
+bool uvm_parent_gpus_are_bar1_peers(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1)
+{
+    if (parent_gpu0 != parent_gpu1)
+        return parent_gpu_peer_caps(parent_gpu0, parent_gpu1)->link_type == UVM_GPU_LINK_PCIE_BAR1;
+
+    return false;
+}
+
+bool uvm_parent_gpus_are_nvlink_direct_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1)
 {
    if (parent_gpu0 != parent_gpu1 &&
-        parent_gpu0->peer_address_info.is_direct_connected &&
-        parent_gpu1->peer_address_info.is_direct_connected)
+        parent_gpu0->peer_address_info.is_nvlink_direct_connected &&
+        parent_gpu1->peer_address_info.is_nvlink_direct_connected)
        return true;

    return false;
@@ -2419,6 +2481,17 @@ static NV_STATUS parent_peers_init(uvm_parent_gpu_t *parent_gpu0,
    parent_peer_caps->optimalNvlinkWriteCEs[0] = p2p_caps_params.optimalNvlinkWriteCEs[0];
    parent_peer_caps->optimalNvlinkWriteCEs[1] = p2p_caps_params.optimalNvlinkWriteCEs[1];

+    // Set IOMMU/DMA mappings for bar1 p2p
+    parent_peer_caps->bar1_p2p_dma_base_address[0] = p2p_caps_params.bar1DmaAddress[0];
+    parent_peer_caps->bar1_p2p_dma_base_address[1] = p2p_caps_params.bar1DmaAddress[1];
+    parent_peer_caps->bar1_p2p_dma_size[0] = p2p_caps_params.bar1DmaSize[0];
+    parent_peer_caps->bar1_p2p_dma_size[1] = p2p_caps_params.bar1DmaSize[1];
+    parent_peer_caps->bar1_p2p_pcie_atomics_enabled[0] = p2p_caps_params.bar1PcieAtomics[0];
+    parent_peer_caps->bar1_p2p_pcie_atomics_enabled[1] = p2p_caps_params.bar1PcieAtomics[1];
+
+    if (parent_peer_caps->bar1_p2p_dma_size[0] || parent_peer_caps->bar1_p2p_dma_size[1])
+        UVM_ASSERT(link_type == UVM_GPU_LINK_PCIE_BAR1);
+
    return NV_OK;

 cleanup:
@@ -2563,7 +2636,7 @@ static void peers_release(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
        peers_destroy(gpu0, gpu1, peer_caps);
 }

-static void parent_peers_destroy_nvlink(uvm_parent_gpu_t *parent_gpu)
+static void parent_peers_destroy_static_link(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_parent_gpu_t *other_parent_gpu;

@@ -2585,7 +2658,7 @@ static void parent_peers_destroy_nvlink(uvm_parent_gpu_t *parent_gpu)
    }
 }

-static NV_STATUS parent_peers_discover_nvlink(uvm_parent_gpu_t *parent_gpu)
+static NV_STATUS parent_peers_discover_static_link(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_parent_gpu_t *other_parent_gpu;
    NV_STATUS status;
@@ -2617,12 +2690,12 @@ static NV_STATUS parent_peers_discover_nvlink(uvm_parent_gpu_t *parent_gpu)
    return NV_OK;

 cleanup:
-    parent_peers_destroy_nvlink(parent_gpu);
+    parent_peers_destroy_static_link(parent_gpu);

    return status;
 }

-static void peers_destroy_nvlink(uvm_gpu_t *gpu)
+static void peers_destroy_static_link(uvm_gpu_t *gpu)
 {
    uvm_parent_gpu_t *other_parent_gpu;
    uvm_parent_gpu_t *parent_gpu;
@@ -2656,7 +2729,7 @@ static void peers_destroy_nvlink(uvm_gpu_t *gpu)
    }
 }

-static NV_STATUS peers_discover_nvlink(uvm_gpu_t *gpu)
+static NV_STATUS peers_discover_static_link(uvm_gpu_t *gpu)
 {
    uvm_parent_gpu_t *parent_gpu = gpu->parent;
    uvm_parent_gpu_t *other_parent_gpu;
@@ -2688,11 +2761,26 @@ static NV_STATUS peers_discover_nvlink(uvm_gpu_t *gpu)
    return NV_OK;

 cleanup:
-    peers_destroy_nvlink(gpu);
+    peers_destroy_static_link(gpu);

    return status;
 }

+static NV_STATUS uvm_gpu_init_access_bits(uvm_parent_gpu_t *parent_gpu)
+{
+    return uvm_rm_locked_call(nvUvmInterfaceAccessBitsBufAlloc(parent_gpu->rm_device, &parent_gpu->vab_info));
+}
+
+static NV_STATUS uvm_gpu_update_access_bits(uvm_parent_gpu_t *parent_gpu, UVM_ACCESS_BITS_DUMP_MODE mode) 
+{
+    return nvUvmInterfaceAccessBitsDump(parent_gpu->rm_device, &parent_gpu->vab_info, mode);
+}
+
+static NV_STATUS uvm_gpu_deinit_access_bits(uvm_parent_gpu_t *parent_gpu)
+{
+    return uvm_rm_locked_call(nvUvmInterfaceAccessBitsBufFree(parent_gpu->rm_device, &parent_gpu->vab_info));
+}
+
 // Remove a gpu and unregister it from RM
 // Note that this is also used in most error paths in add_gpu()
 static void remove_gpu(uvm_gpu_t *gpu)
@@ -2700,6 +2788,7 @@ static void remove_gpu(uvm_gpu_t *gpu)
    NvU32 sub_processor_index;
    uvm_parent_gpu_t *parent_gpu;
    bool free_parent;
+    NV_STATUS status;

    uvm_assert_mutex_locked(&g_uvm_global.global_lock);

@@ -2716,12 +2805,17 @@ static void remove_gpu(uvm_gpu_t *gpu)

    free_parent = (parent_gpu->num_retained_gpus == 0);

+    if (free_parent && parent_gpu->access_bits_supported) {
+        status = uvm_gpu_deinit_access_bits(parent_gpu);
+        UVM_ASSERT(status == NV_OK);
+    }
+
    // NVLINK peers must be removed and the relevant access counter buffers must
    // be flushed before removing this GPU from the global table.
-    peers_destroy_nvlink(gpu);
+    peers_destroy_static_link(gpu);

    if (free_parent)
-        parent_peers_destroy_nvlink(parent_gpu);
+        parent_peers_destroy_static_link(parent_gpu);

    // uvm_mem_free and other uvm_mem APIs invoked by the Confidential Compute
    // deinitialization must be called before the GPU is removed from the global
@@ -2865,21 +2959,27 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
    uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);

    if (alloc_parent) {
-        status = parent_peers_discover_nvlink(parent_gpu);
+        status = parent_peers_discover_static_link(parent_gpu);
        if (status != NV_OK)
            goto error_retained;
    }

-    status = peers_discover_nvlink(gpu);
+    status = peers_discover_static_link(gpu);
    if (status != NV_OK)
        goto error_retained;

    *gpu_out = gpu;

+    if (alloc_parent && parent_gpu->access_bits_supported) {
+        status = uvm_gpu_init_access_bits(parent_gpu);
+        if (status != NV_OK)
+            goto error_retained;
+    }
+
    return NV_OK;

 error_retained:
-    UVM_ERR_PRINT("Failed to discover NVLINK peers: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
+    UVM_ERR_PRINT("Failed to discover NVLINK/BAR1 peers: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));

    // Nobody can have retained the GPU yet, since we still hold the
    // global lock.
@@ -2933,10 +3033,6 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
    if (status != NV_OK)
        goto error_unregister;

-    // TODO: Bug 5262806: Remove this WAR once the bug is fixed.
-    if (gpu_info->accessCntrBufferCount > 1)
-        gpu_info->accessCntrBufferCount = 1;
-
    if (parent_gpu != NULL) {
        // If the UUID has been seen before, and if SMC is enabled, then check
        // if this specific partition has been seen previously. The UUID-based
@@ -3082,10 +3178,25 @@ uvm_gpu_phys_address_t uvm_gpu_peer_phys_address(uvm_gpu_t *owning_gpu, NvU64 ad
 {
    uvm_aperture_t aperture = uvm_gpu_peer_aperture(accessing_gpu, owning_gpu);

-    if (uvm_parent_gpus_are_direct_connected(accessing_gpu->parent, owning_gpu->parent))
+    if (uvm_parent_gpus_are_nvlink_direct_connected(accessing_gpu->parent, owning_gpu->parent)) {
+        UVM_ASSERT(uvm_aperture_is_peer(aperture));
        address += owning_gpu->parent->peer_address_info.peer_gpa_memory_window_start;
-    else if (uvm_parent_gpus_are_nvswitch_connected(accessing_gpu->parent, owning_gpu->parent))
+    }
+    else if (uvm_parent_gpus_are_nvswitch_connected(accessing_gpu->parent, owning_gpu->parent)) {
+        UVM_ASSERT(uvm_aperture_is_peer(aperture));
        address += owning_gpu->parent->nvswitch_info.fabric_memory_window_start;
+    }
+    else if (uvm_aperture_is_sys(aperture)) {
+        // BAR1 P2P can use either coherent or non-coherent sysmem,
+        // depending on atomic capabilities of the peer devices.
+        uvm_parent_gpu_peer_t *parent_peer_caps = parent_gpu_peer_caps(accessing_gpu->parent, owning_gpu->parent);
+        int peer_index = (uvm_id_cmp(accessing_gpu->id, owning_gpu->id) < 0) ? 0 : 1;
+        
+        UVM_ASSERT(parent_peer_caps->link_type == UVM_GPU_LINK_PCIE_BAR1);
+        UVM_ASSERT(parent_peer_caps->bar1_p2p_dma_size[peer_index] != 0);
+
+        address += parent_peer_caps->bar1_p2p_dma_base_address[peer_index];
+    }

    return uvm_gpu_phys_address(aperture, address);
 }
@@ -3134,6 +3245,69 @@ NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
    return gpu_peer_caps(gpu0, gpu1)->ref_count;
 }

+static bool gpu_address_is_coherent_peer(uvm_gpu_t *gpu, uvm_gpu_phys_address_t address)
+{
+    bool is_peer = false;
+    uvm_parent_gpu_t *parent_gpu;
+    phys_addr_t phys_addr;
+
+    if (address.aperture != UVM_APERTURE_SYS)
+        return false;
+
+    // GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
+    // either inline, or via ATS.
+    phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
+
+    // Exposed coherent vidmem can be accessed via sys aperture even without
+    // GPUs being explicit peers, so each parent GPU is a potential peer.
+    uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
+    for_each_parent_gpu(parent_gpu) {
+
+        if (parent_gpu == gpu->parent)
+            continue;
+
+        if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
+            phys_addr <= parent_gpu->system_bus.memory_window_end) {
+            is_peer = true;
+            break;
+        }
+    }
+    uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
+
+    return is_peer;
+}
+
+static bool gpu_phys_address_is_bar1p2p_peer(uvm_gpu_t *gpu, uvm_gpu_phys_address_t address)
+{
+    bool is_peer = false;
+    uvm_parent_processor_mask_t peer_parent_gpus;
+    uvm_parent_gpu_t *peer_parent_gpu;
+
+    // BAR1 P2P is accessed via sys aperture
+    if (!uvm_aperture_is_sys(address.aperture))
+        return false;
+
+    uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
+    uvm_parent_gpus_from_processor_mask(&peer_parent_gpus, &gpu->peer_info.peer_gpu_mask);
+    for_each_parent_gpu_in_mask(peer_parent_gpu, &peer_parent_gpus) {
+        const uvm_parent_gpu_peer_t *peer_caps = parent_gpu_peer_caps(gpu->parent, peer_parent_gpu);
+        const int peer_index = (uvm_parent_id_cmp(gpu->parent->id, peer_parent_gpu->id) < 0) ? 0 : 1;
+
+        UVM_ASSERT(peer_caps->ref_count > 0);
+        if (peer_caps->link_type != UVM_GPU_LINK_PCIE_BAR1)
+            continue;
+
+        if (address.address >= peer_caps->bar1_p2p_dma_base_address[peer_index] &&
+            address.address < (peer_caps->bar1_p2p_dma_base_address[peer_index] + peer_caps->bar1_p2p_dma_size[peer_index])) {
+            is_peer = true;
+            break;
+        }
+    }
+    uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
+
+    return is_peer;
+}
+
 bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
 {
    if (address.is_virtual) {
@@ -3145,21 +3319,18 @@ bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
        }
    }
    else {
-        uvm_parent_gpu_t *parent_gpu;
-        phys_addr_t phys_addr;
-
        if (uvm_aperture_is_peer(address.aperture)) {
-            uvm_parent_processor_mask_t parent_gpus;
-            uvm_parent_gpu_t *parent_peer_gpu;
+            uvm_parent_processor_mask_t peer_parent_gpus;
+            uvm_parent_gpu_t *peer_parent_gpu;

            // Local EGM accesses don't go over NVLINK
            if (gpu->parent->egm.enabled && address.aperture == gpu->parent->egm.local_peer_id)
                return false;

            uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
-            uvm_parent_gpus_from_processor_mask(&parent_gpus, &gpu->peer_info.peer_gpu_mask);
-            for_each_parent_gpu_in_mask(parent_peer_gpu, &parent_gpus) {
-                if (!parent_peer_gpu->egm.enabled)
+            uvm_parent_gpus_from_processor_mask(&peer_parent_gpus, &gpu->peer_info.peer_gpu_mask);
+            for_each_parent_gpu_in_mask(peer_parent_gpu, &peer_parent_gpus) {
+                if (!peer_parent_gpu->egm.enabled)
                    continue;

                // EGM uses peer IDs but they are different from VIDMEM peer
@@ -3171,32 +3342,18 @@ bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
                //                    when accessing EGM memory
                // TODO: Bug: 5007527 [UVM] Extend STO recovery to EGM enabled
                //                    systems
-                UVM_ASSERT(address.aperture != uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu));
+                UVM_ASSERT(address.aperture != uvm_gpu_egm_peer_aperture(gpu->parent, peer_parent_gpu));
            }

            uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);

            return true;
-        } else if (address.aperture == UVM_APERTURE_SYS) {
-            bool is_peer = false;
-
-            // GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
-            // either inline, or via ATS.
-            phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
-
-            // Exposed coherent vidmem can be accessed via sys aperture
-            uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
-            for_each_parent_gpu(parent_gpu) {
-                if (parent_gpu == gpu->parent)
-                    continue;
-
-                if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
-                    phys_addr <= parent_gpu->system_bus.memory_window_end) {
-                    is_peer = true;
-                }
-            }
-            uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
-            return is_peer;
+        }
+        else if (uvm_aperture_is_sys(address.aperture)) {
+            // SYS aperture is used for coherent peers or BAR1 P2P.
+            // SYS_NON_COHERNET aperture is used for BAR1 P2P.
+            uvm_gpu_phys_address_t phys_addr = uvm_gpu_phys_address(address.aperture, address.address);
+            return gpu_address_is_coherent_peer(gpu, phys_addr) || gpu_phys_address_is_bar1p2p_peer(gpu, phys_addr);
        }

        UVM_ASSERT(address.aperture == UVM_APERTURE_VID);
@@ -3927,3 +4084,50 @@ NV_STATUS uvm_test_get_gpu_time(UVM_TEST_GET_GPU_TIME_PARAMS *params, struct fil

    return status;
 }
+
+NV_STATUS uvm_test_dump_access_bits(UVM_TEST_DUMP_ACCESS_BITS_PARAMS *params, struct file *filp)
+{
+    uvm_va_space_t *va_space = uvm_va_space_get(filp);
+    uvm_gpu_t *gpu = NULL;
+    NV_STATUS status = NV_OK;
+    NvU64 granularity_size_kb = 0;
+
+    gpu = uvm_va_space_retain_gpu_by_uuid(va_space, &params->gpu_uuid);
+    if (!gpu || !gpu->parent->access_bits_supported) {
+        status = NV_ERR_INVALID_DEVICE;
+        goto done;
+    }
+
+    if (!gpu->parent->vab_info.accessBitsBufferHandle) {
+        status = NV_ERR_INVALID_STATE;
+        goto done;
+    }
+
+    // See resman/interface/rmapi/finn/ctrl/ctrlc763.finn for 'granularity' enum values
+    granularity_size_kb = (NvU64)(64) << gpu->parent->vab_info.granularity;
+    params->granularity_size_kb = granularity_size_kb;
+
+    status = uvm_gpu_update_access_bits(gpu->parent, params->mode);
+    if (status != NV_OK)
+        goto done;
+
+    // If this is a length query, we are done after we set the length
+    if (params->current_bits_length == 0) {
+        params->current_bits_length = ARRAY_SIZE(gpu->parent->vab_info.currentBits);
+        goto done;
+    }
+
+    // Copy the bits to user space
+    if (copy_to_user(params->current_bits, 
+                     gpu->parent->vab_info.currentBits,
+                     sizeof(NvU64) * params->current_bits_length)) {
+        status = NV_ERR_INVALID_ADDRESS;
+        goto done;
+    }
+
+done:
+    if (gpu)
+        uvm_gpu_release(gpu);
+    return status;
+}
+
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -615,6 +615,7 @@ typedef enum
 {
    UVM_GPU_LINK_INVALID = 0,
    UVM_GPU_LINK_PCIE,
+    UVM_GPU_LINK_PCIE_BAR1,
    UVM_GPU_LINK_NVLINK_1,
    UVM_GPU_LINK_NVLINK_2,
    UVM_GPU_LINK_NVLINK_3,
@@ -996,6 +997,9 @@ struct uvm_parent_gpu_struct
    // Total amount of physical memory available on the parent GPU.
    NvU64 max_allocatable_address;

+    // Access bits buffer information
+    UvmGpuAccessBitsBufferAlloc vab_info;
+
 #if UVM_IS_CONFIG_HMM() || defined(NV_MEMORY_DEVICE_COHERENT_PRESENT)
    uvm_pmm_gpu_devmem_t *devmem;
 #endif
@@ -1069,6 +1073,11 @@ struct uvm_parent_gpu_struct

    bool access_counters_supported;

+    // True when HW does not allow mixing different clear types concurrently.
+    bool access_counters_serialize_clear_ops_by_type;
+
+    bool access_bits_supported;
+
    bool fault_cancel_va_supported;

    // True if the GPU has hardware support for scoped atomics
@@ -1209,15 +1218,25 @@ struct uvm_parent_gpu_struct
    // TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
    nv_kthread_q_t lazy_free_q;

-    // This is only valid if supports_access_counters is set to true. This array
-    // has rm_info.accessCntrBufferCount entries.
-    uvm_access_counter_buffer_t *access_counter_buffer;
-    uvm_mutex_t access_counters_enablement_lock;
+    struct
+    {
+        // This is only valid if supports_access_counters is set to true. This
+        // array has rm_info.accessCntrBufferCount entries.
+        uvm_access_counter_buffer_t *buffer;
+        uvm_mutex_t enablement_lock;

-    // Tracker used to aggregate access counters clear operations, needed for
-    // GPU removal. It is only used when supports_access_counters is set.
-    uvm_tracker_t access_counters_clear_tracker;
-    uvm_mutex_t access_counters_clear_tracker_lock;
+        // Tracker used to aggregate access counters clear operations, needed
+        // for GPU removal. It is used when supports_access_counters is set.
+        uvm_tracker_t clear_tracker;
+        uvm_mutex_t clear_tracker_lock;
+
+        // The following access_counters fields are used when
+        // access_counters_serialize_clear_ops_by_type is set.
+        // The serialize_clear_tracker is not the common case, its use is
+        // decoupled from the clear_tracker (above.)
+        uvm_tracker_t serialize_clear_tracker[UVM_ACCESS_COUNTER_CLEAR_OP_COUNT];
+        uvm_mutex_t serialize_clear_lock;
+    } access_counters;

    // Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
    NvU32 utlb_per_gpc_count;
@@ -1335,6 +1354,14 @@ struct uvm_parent_gpu_struct
        // only affects ATS systems.
        bool no_ats_range_required : 1;

+        // Page tree initialization requires the initialization of the entire
+        // depth-0 allocated area, not only the HW supported entry count range.
+        // The GMMU page table walk cache operates at its own CL granularity
+        // (32B). We must have an allocated depth-0 page table of at least this
+        // size, regardless of how many entries are supported by HW.
+        // The allocation size is determined by MMU HAL allocation_size().
+        bool gmmu_pt_depth0_init_required : 1;
+
        // See the comments on uvm_dma_map_invalidation_t
        uvm_dma_map_invalidation_t dma_map_invalidation;

@@ -1371,7 +1398,7 @@ struct uvm_parent_gpu_struct
    struct
    {
        // Is the GPU directly connected to peer GPUs.
-        bool is_direct_connected;
+        bool is_nvlink_direct_connected;

        // 48-bit fabric memory physical offset that peer gpus need in order
        // access to be rounted to the correct peer.
@@ -1467,6 +1494,22 @@ typedef struct
    // iff max(gpu_id_1, gpu_id_2) is EGM-enabled.
    NvU8 egm_peer_ids[2];

+    // IOMMU/DMA mappings of the peer vidmem via bar1. Access to this window
+    // are routed to peer GPU vidmem. The values are provided by RM and RM is
+    // responsible for creating IOMMU mappings if such mappings are required.
+    // RM is also responsible for querying PCIe bus topology and determining
+    // if PCIe atomics are supported between the peers.
+    // These fields are valid for link type UVM_GPU_LINK_PCIE_BAR1, and the
+    // address is only valid if size > 0.
+    // bar1_p2p_dma_base_address[i] provides DMA window used by GPU[i] to
+    // access bar1 region of GPU[1-i].
+    NvU64 bar1_p2p_dma_base_address[2];
+    NvU64 bar1_p2p_dma_size[2];
+
+    // True if GPU[i] can use PCIe atomic operations when accessing BAR1
+    // region of GPU[i-1].
+    bool bar1_p2p_pcie_atomics_enabled[2];
+
    // The link type between the peer parent GPUs, currently either PCIe or
    // NVLINK.
    uvm_gpu_link_type_t link_type;
@@ -1580,9 +1623,10 @@ static NvU64 uvm_gpu_retained_count(uvm_gpu_t *gpu)

 // Decrease the refcount on the parent GPU object, and actually delete the
 // object if the refcount hits zero.
-void uvm_parent_gpu_kref_put(uvm_parent_gpu_t *gpu);
+void uvm_parent_gpu_kref_put(uvm_parent_gpu_t *parent_gpu);

-// Returns a GPU peer pair index in the range [0 .. UVM_MAX_UNIQUE_GPU_PAIRS).
+// waiting for any unfinished trackers contained by the parent GPU.
+void uvm_parent_gpu_sync_trackers(uvm_parent_gpu_t *parent_gpu);

 static bool uvm_parent_gpu_supports_full_coherence(uvm_parent_gpu_t *parent_gpu)
 {
@@ -1591,6 +1635,7 @@ static bool uvm_parent_gpu_supports_full_coherence(uvm_parent_gpu_t *parent_gpu)
    return parent_gpu->is_integrated_gpu;
 }

+// Returns a GPU peer pair index in the range [0 .. UVM_MAX_UNIQUE_GPU_PAIRS).
 NvU32 uvm_gpu_pair_index(const uvm_gpu_id_t id0, const uvm_gpu_id_t id1);

 // Either retains an existing PCIe peer entry or creates a new one. In both
@@ -1633,7 +1678,9 @@ uvm_aperture_t uvm_gpu_egm_peer_aperture(uvm_parent_gpu_t *local_gpu, uvm_parent

 bool uvm_parent_gpus_are_nvswitch_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);

-bool uvm_parent_gpus_are_direct_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
+bool uvm_parent_gpus_are_bar1_peers(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
+
+bool uvm_parent_gpus_are_nvlink_direct_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);

 static bool uvm_gpus_are_smc_peers(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
 {
@@ -1700,7 +1747,7 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_
 // Check whether the provided address points to peer memory:
 // * Physical address using one of the PEER apertures
 // * Physical address using SYS aperture that belongs to an exposed coherent
-//   memory
+//   memory, or a BAR1 P2P address
 // * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
 bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);

--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
@@ -126,15 +126,15 @@ static uvm_access_counter_buffer_t *parent_gpu_access_counter_buffer_get(uvm_par
 {
    UVM_ASSERT(parent_gpu->access_counters_supported);
    UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
-    UVM_ASSERT(parent_gpu->access_counter_buffer);
+    UVM_ASSERT(parent_gpu->access_counters.buffer);

-    return &parent_gpu->access_counter_buffer[notif_buf_index];
+    return &parent_gpu->access_counters.buffer[notif_buf_index];
 }

 static uvm_access_counter_buffer_t *parent_gpu_access_counter_buffer_get_or_null(uvm_parent_gpu_t *parent_gpu,
                                                                                 NvU32 notif_buf_index)
 {
-    if (parent_gpu->access_counter_buffer)
+    if (parent_gpu->access_counters.buffer)
        return parent_gpu_access_counter_buffer_get(parent_gpu, notif_buf_index);
    return NULL;
 }
@@ -221,9 +221,70 @@ static NV_STATUS parent_gpu_clear_tracker_wait(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status;

-    uvm_mutex_lock(&parent_gpu->access_counters_clear_tracker_lock);
-    status = uvm_tracker_wait(&parent_gpu->access_counters_clear_tracker);
-    uvm_mutex_unlock(&parent_gpu->access_counters_clear_tracker_lock);
+    uvm_mutex_lock(&parent_gpu->access_counters.clear_tracker_lock);
+    status = uvm_tracker_wait(&parent_gpu->access_counters.clear_tracker);
+    uvm_mutex_unlock(&parent_gpu->access_counters.clear_tracker_lock);
+
+    return status;
+}
+
+static NV_STATUS access_counters_push_begin(uvm_gpu_t *gpu, uvm_push_t *push, uvm_access_counter_clear_op_t clear_op)
+{
+    NV_STATUS status;
+    uvm_tracker_t *pending_clear_op_tracker = NULL;
+    static const char *push_info_msg[2] = { "Clear access counter: batch",
+                                            "Clear access counter: all" };
+
+    if (gpu->parent->access_counters_serialize_clear_ops_by_type) {
+        // The following logic only works when we have 2 clear_op options.
+        // Otherwise, we must select the pending clear op tracker.
+        BUILD_BUG_ON(UVM_ACCESS_COUNTER_CLEAR_OP_COUNT != 2);
+        pending_clear_op_tracker = &gpu->parent->access_counters.serialize_clear_tracker[!clear_op];
+
+        // On push_begin (below) success, this lock is released in
+        // access_counters_push_end();
+        uvm_mutex_lock(&gpu->parent->access_counters.serialize_clear_lock);
+    }
+
+    // uvm_push_begin_acquire() is converted to uvm_push_begin() when
+    // pending_clear_op_tracker is NULL. Otherwise, it adds a semaphore acquire
+    // at the push prologue. The semaphore acquire waits until all pending clear
+    // ops are finished before processing the different type clear op. The wait
+    // could be none, if there is no pending clear ops in flight.
+    status = uvm_push_begin_acquire(gpu->channel_manager,
+                                    UVM_CHANNEL_TYPE_MEMOPS,
+                                    pending_clear_op_tracker,
+                                    push,
+                                    push_info_msg[clear_op]);
+    if (status != NV_OK && gpu->parent->access_counters_serialize_clear_ops_by_type)
+        uvm_mutex_unlock(&gpu->parent->access_counters.serialize_clear_lock);
+
+    return status;
+}
+
+static NV_STATUS access_counters_push_end(uvm_push_t *push, uvm_access_counter_clear_op_t clear_op)
+{
+    NV_STATUS status = NV_OK;
+
+    uvm_push_end(push);
+
+    if (push->gpu->parent->access_counters_serialize_clear_ops_by_type) {
+        uvm_tracker_t *tracker = &push->gpu->parent->access_counters.serialize_clear_tracker[clear_op];
+
+        uvm_tracker_remove_completed(tracker);
+        status = uvm_tracker_add_push_safe(tracker, push);
+
+        // This lock is acquired in access_counters_push_begin();
+        uvm_mutex_unlock(&push->gpu->parent->access_counters.serialize_clear_lock);
+
+        if (status != NV_OK)
+            return status;
+    }
+
+    uvm_mutex_lock(&push->gpu->parent->access_counters.clear_tracker_lock);
+    uvm_tracker_remove_completed(&push->gpu->parent->access_counters.clear_tracker);
+    status = uvm_tracker_add_push_safe(&push->gpu->parent->access_counters.clear_tracker, push);
+    uvm_mutex_unlock(&push->gpu->parent->access_counters.clear_tracker_lock);

    return status;
 }
@@ -235,28 +296,19 @@ static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu, uvm_access_counter_buf
    NV_STATUS status;
    uvm_push_t push;

-    status = uvm_push_begin(gpu->channel_manager,
-                            UVM_CHANNEL_TYPE_MEMOPS,
-                            &push,
-                            "Clear access counter: all");
+    status = access_counters_push_begin(gpu, &push, UVM_ACCESS_COUNTER_CLEAR_OP_ALL);
    if (status != NV_OK) {
        UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n",
                      nvstatusToString(status),
                      uvm_gpu_name(gpu),
                      access_counters->index);
+
        return status;
    }

    gpu->parent->host_hal->access_counter_clear_all(&push);

-    uvm_push_end(&push);
-
-    uvm_mutex_lock(&gpu->parent->access_counters_clear_tracker_lock);
-    uvm_tracker_remove_completed(&gpu->parent->access_counters_clear_tracker);
-    status = uvm_tracker_add_push_safe(&gpu->parent->access_counters_clear_tracker, &push);
-    uvm_mutex_unlock(&gpu->parent->access_counters_clear_tracker_lock);
-
-    return status;
+    return access_counters_push_end(&push, UVM_ACCESS_COUNTER_CLEAR_OP_ALL);
 }

 // Clear the access counter notifications and add it to the per-GPU clear
@@ -277,26 +329,20 @@ static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,

    UVM_ASSERT(clear_op == UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED);

-    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
+    status = access_counters_push_begin(gpu, &push, UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED);
    if (status != NV_OK) {
        UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n",
                      nvstatusToString(status),
                      uvm_gpu_name(gpu),
                      access_counters->index);
+
        return status;
    }

    for (i = 0; i < num_notifications; i++)
        gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]);

-    uvm_push_end(&push);
-
-    uvm_mutex_lock(&gpu->parent->access_counters_clear_tracker_lock);
-    uvm_tracker_remove_completed(&gpu->parent->access_counters_clear_tracker);
-    status = uvm_tracker_add_push_safe(&gpu->parent->access_counters_clear_tracker, &push);
-    uvm_mutex_unlock(&gpu->parent->access_counters_clear_tracker_lock);
-
-    return status;
+    return access_counters_push_end(&push, UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED);
 }

 bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index)
@@ -603,7 +649,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac

    UVM_ASSERT(gpu->parent->access_counters_supported);

-    uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
+    uvm_mutex_lock(&gpu->parent->access_counters.enablement_lock);

    if (uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
        status = NV_OK;
@@ -631,7 +677,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
        uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
    }

-    uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
+    uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);

    return status;

@@ -646,7 +692,7 @@ cleanup:
        uvm_access_counters_isr_unlock(access_counters);
    }

-    uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
+    uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);

    return status;
 }
@@ -703,7 +749,7 @@ void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)

    UVM_ASSERT(gpu->parent->access_counters_supported);

-    uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
+    uvm_mutex_lock(&gpu->parent->access_counters.enablement_lock);

    if (uvm_parent_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors,
                                                        gpu->parent->id)) {
@@ -719,7 +765,7 @@ void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
        }
    }

-    uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
+    uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);
 }

 static void write_get(uvm_access_counter_buffer_t *access_counters, NvU32 get)
@@ -2054,7 +2100,7 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
        goto exit_release_gpu;
    }

-    uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
+    uvm_mutex_lock(&gpu->parent->access_counters.enablement_lock);

    for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) {
        uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
@@ -2072,7 +2118,7 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
    uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);

 exit_ac_lock:
-    uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
+    uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);

 exit_release_gpu:
    uvm_gpu_release(gpu);
@@ -2101,15 +2147,15 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
        goto exit_release_gpu;
    }

-    uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
+    uvm_mutex_lock(&gpu->parent->access_counters.enablement_lock);

    // Access counters not enabled. Nothing to reset
    if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
-        uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
+        uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);
        goto exit_release_gpu;
    }

-    uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
+    uvm_mutex_unlock(&gpu->parent->access_counters.enablement_lock);

    // Clear operations affect all notification buffers, we use the
    // notif_buf_index = 0;
@@ -2233,10 +2279,10 @@ NV_STATUS uvm_test_query_access_counters(UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS *
        goto exit_release_gpu;
    }

-    buffer_size = gpu->parent->access_counter_buffer[0].rm_info.bufferSize;
+    buffer_size = gpu->parent->access_counters.buffer[0].rm_info.bufferSize;

    for (index = 1; index < gpu->parent->rm_info.accessCntrBufferCount; index++)
-        UVM_ASSERT(gpu->parent->access_counter_buffer[index].rm_info.bufferSize == buffer_size);
+        UVM_ASSERT(gpu->parent->access_counters.buffer[index].rm_info.bufferSize == buffer_size);

    params->num_notification_buffers = gpu->parent->rm_info.accessCntrBufferCount;
    params->num_notification_entries = buffer_size / gpu->parent->access_counter_buffer_hal->entry_size(gpu->parent);
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.c
@@ -183,7 +183,7 @@ static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu, N
    nv_kref_get(&parent_gpu->gpu_kref);

    // Interrupts need to be disabled to avoid an interrupt storm
-    uvm_access_counters_intr_disable(&parent_gpu->access_counter_buffer[notif_buf_index]);
+    uvm_access_counters_intr_disable(&parent_gpu->access_counters.buffer[notif_buf_index]);

    nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
                                 &parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item);
@@ -327,12 +327,12 @@ static NV_STATUS uvm_isr_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU3
    if (!block_context)
        return NV_ERR_NO_MEMORY;

-    parent_gpu->access_counter_buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
+    parent_gpu->access_counters.buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
        block_context;

    nv_kthread_q_item_init(&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item,
                           access_counters_isr_bottom_half_entry,
-                           &parent_gpu->access_counter_buffer[notif_buf_index]);
+                           &parent_gpu->access_counters.buffer[notif_buf_index]);

    // Access counters interrupts are initially disabled. They are
    // dynamically enabled when the GPU is registered on a VA space.
@@ -431,9 +431,9 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
            if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_buffer)
                return NV_ERR_NO_MEMORY;

-            parent_gpu->access_counter_buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counter_buffer) *
-                                                                  index_count);
-            if (!parent_gpu->access_counter_buffer)
+            parent_gpu->access_counters.buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counters.buffer) *
+                                                                   index_count);
+            if (!parent_gpu->access_counters.buffer)
                return NV_ERR_NO_MEMORY;

            if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc)
@@ -535,8 +535,8 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
            // been successfully initialized.
            uvm_parent_gpu_deinit_access_counters(parent_gpu, notif_buf_index);

-            if (parent_gpu->access_counter_buffer) {
-                uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counter_buffer[notif_buf_index];
+            if (parent_gpu->access_counters.buffer) {
+                uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counters.buffer[notif_buf_index];
                block_context = access_counter->batch_service_context.block_service_context.block_context;
                uvm_va_block_context_free(block_context);
            }
@@ -546,7 +546,7 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
        }

        uvm_kvfree(parent_gpu->isr.access_counters);
-        uvm_kvfree(parent_gpu->access_counter_buffer);
+        uvm_kvfree(parent_gpu->access_counters.buffer);
    }

    if (parent_gpu->non_replayable_faults_supported) {
--- a/kernel-open/nvidia-uvm/uvm_hal.c
+++ b/kernel-open/nvidia-uvm/uvm_hal.c
@@ -154,6 +154,7 @@ static uvm_hal_class_ops_t ce_table[] =
        .id = HOPPER_DMA_COPY_A,
        .parent_id = AMPERE_DMA_COPY_B,
        .u.ce_ops = {
+            .phys_mode = uvm_hal_hopper_ce_phys_mode,
            .semaphore_release = uvm_hal_hopper_ce_semaphore_release,
            .semaphore_timestamp = uvm_hal_hopper_ce_semaphore_timestamp,
            .semaphore_reduction_inc = uvm_hal_hopper_ce_semaphore_reduction_inc,
@@ -172,7 +173,9 @@ static uvm_hal_class_ops_t ce_table[] =
    {
        .id = BLACKWELL_DMA_COPY_A,
        .parent_id = HOPPER_DMA_COPY_A,
-        .u.ce_ops = {},
+        .u.ce_ops = {
+            .memcopy_is_valid = uvm_hal_blackwell_ce_memcopy_is_valid,
+        },
    },
    {
        .id = BLACKWELL_DMA_COPY_B,
@@ -1166,8 +1169,6 @@ void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
 void uvm_hal_host_l2_invalidate_unsupported(uvm_push_t *push, uvm_aperture_t aperture)
 {
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
-    UVM_ERR_PRINT("L2 cache invalidation: Called on unsupported GPU %s (arch: 0x%x, impl: 0x%x)\n", 
-                   uvm_gpu_name(gpu), gpu->parent->rm_info.gpuArch, gpu->parent->rm_info.gpuImplementation);
    UVM_ASSERT_MSG(false, "L2 invalidate is not supported on %s",
                   uvm_parent_gpu_name(gpu->parent));
-}
+}
--- a/kernel-open/nvidia-uvm/uvm_hal.h
+++ b/kernel-open/nvidia-uvm/uvm_hal.h
@@ -348,6 +348,7 @@ void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 of
 typedef NvU32 (*uvm_hal_ce_phys_mode_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
 NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
 NvU32 uvm_hal_ampere_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+NvU32 uvm_hal_hopper_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);

 typedef NvU32 (*uvm_hal_ce_plc_mode_t)(void);
 NvU32 uvm_hal_maxwell_ce_plc_mode(void);
@@ -368,6 +369,7 @@ typedef bool (*uvm_hal_ce_memcopy_is_valid)(uvm_push_t *push, uvm_gpu_address_t
 bool uvm_hal_maxwell_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
 bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
 bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+bool uvm_hal_blackwell_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);

 // Patching of the memcopy source; if not needed for a given architecture use
 // the (empty) uvm_hal_ce_memcopy_patch_src_stub implementation
--- a/kernel-open/nvidia-uvm/uvm_hal_types.h
+++ b/kernel-open/nvidia-uvm/uvm_hal_types.h
@@ -50,6 +50,10 @@ typedef enum
    // It is directly encoded as SYS_COH in PTEs and CE/esched methods.
    UVM_APERTURE_SYS,

+    // SYS_NON_COHERENT aperture is used when we must prevent PCIe atomics from
+    // being issued to BAR1 P2P addresses. It's only used to control the use of
+    // atomics with no other impact on the coherence model.
+    //
    // On platforms that support the GPU coherently caching system memory,
    // SYS_NON_COHERENT prevents other clients from snooping the GPU L2 cache.
    // This allows noncoherent caching of system memory by GPUs on these
@@ -93,6 +97,11 @@ typedef enum

 const char *uvm_aperture_string(uvm_aperture_t aperture);

+static bool uvm_aperture_is_sys(uvm_aperture_t aperture)
+{
+    return (aperture == UVM_APERTURE_SYS) || (aperture == UVM_APERTURE_SYS_NON_COHERENT);
+}
+
 static bool uvm_aperture_is_peer(uvm_aperture_t aperture)
 {
    return (aperture >= UVM_APERTURE_PEER_0) && (aperture < UVM_APERTURE_PEER_MAX);
@@ -514,9 +523,9 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)

 typedef enum
 {
-    UVM_ACCESS_COUNTER_CLEAR_OP_NONE = 0,
-    UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED,
-    UVM_ACCESS_COUNTER_CLEAR_OP_ALL
+    UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED = 0,
+    UVM_ACCESS_COUNTER_CLEAR_OP_ALL,
+    UVM_ACCESS_COUNTER_CLEAR_OP_COUNT,
 } uvm_access_counter_clear_op_t;

 struct uvm_access_counter_buffer_entry_struct
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
@@ -1758,7 +1758,6 @@ static void gpu_chunk_free(uvm_va_block_t *va_block,
        return;

    UVM_ASSERT(gpu_chunk->va_block == va_block);
-    UVM_ASSERT(gpu_chunk->va_block_page_index == page_index);

    uvm_mmu_chunk_unmap(gpu_chunk, &va_block->tracker);
    gpu_state->chunks[page_index] = NULL;
@@ -1867,7 +1866,6 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,

    if (gpu_state->chunks[page_index] == gpu_chunk) {
        UVM_ASSERT(gpu_chunk->va_block == va_block);
-        UVM_ASSERT(gpu_chunk->va_block_page_index == page_index);
        return NV_OK;
    }

@@ -1887,11 +1885,7 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
    uvm_processor_mask_set(&va_block->resident, gpu->id);
    uvm_page_mask_set(&gpu_state->resident, page_index);

-    // It is safe to modify the page index field without holding any PMM locks
-    // because the chunk is allocated, which means that none of the other
-    // fields in the bitmap can change.
    gpu_chunk->va_block = va_block;
-    gpu_chunk->va_block_page_index = page_index;

    gpu_state->chunks[page_index] = gpu_chunk;

--- a/kernel-open/nvidia-uvm/uvm_hopper.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper.c
@@ -95,6 +95,10 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->non_replayable_faults_supported = true;

+    parent_gpu->access_counters_serialize_clear_ops_by_type = false;
+
+    parent_gpu->access_bits_supported = false;
+
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
@@ -113,6 +117,8 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->ats.no_ats_range_required = true;

+    parent_gpu->ats.gmmu_pt_depth0_init_required = false;
+
    // Hopper doesn't prefetch translations for physical requests, so the only
    // concern would be if we enabled physical ATS with 4K pages. In that case
    // we could see a mix of cached valid and invalid translations in the same
--- a/kernel-open/nvidia-uvm/uvm_hopper_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_ce.c
@@ -34,22 +34,68 @@ static NvU32 ce_aperture(uvm_aperture_t aperture)
                 HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
    BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM) !=
                 HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
+    BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, NONCOHERENT_SYSMEM) !=
+                 HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, NONCOHERENT_SYSMEM));
    BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) !=
                 HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, PEERMEM));

    if (aperture == UVM_APERTURE_SYS) {
        return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM);
    }
+    else if (aperture == UVM_APERTURE_SYS_NON_COHERENT) {
+        // SYS_NON_COHERENT aperture is currently only used for certain
+        // BAR1 P2P addresses. The use of SYS vs. SYS_NON_COHERENT aperture
+        // controls the ability to use PCIe atomics to access the BAR1 region.
+        // The only way to potentially use atomic operations in UVM is a
+        // semaphore reduction operation.
+        // Since UVM doesn't use semaphore operations on peer (or physical)
+        // addresses, it'd be safe to encode SYS_NON_COHERENT aperture as
+        // COHERENT_SYSMEM for CE methods.
+        // NONCOHERENT_SYSMEM encoding is used for correctness and potential
+        // future use of SYS_NON_COHERENT aperture outside of atomics control
+        // in BAR1 P2P.
+        return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, NONCOHERENT_SYSMEM);
+    }
    else if (aperture == UVM_APERTURE_VID) {
        return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB);
    }
    else {
+        UVM_ASSERT(uvm_aperture_is_peer(aperture));
        return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) |
               HWVALUE(C8B5, SET_SRC_PHYS_MODE, FLA, 0) |
               HWVALUE(C8B5, SET_SRC_PHYS_MODE, PEER_ID, UVM_APERTURE_PEER_ID(aperture));
    }
 }

+// Push SET_{SRC,DST}_PHYS mode if needed and return LAUNCH_DMA_{SRC,DST}_TYPE
+// flags
+NvU32 uvm_hal_hopper_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
+{
+    NvU32 launch_dma_src_dst_type = 0;
+
+    if (src.is_virtual)
+        launch_dma_src_dst_type |= HWCONST(C8B5, LAUNCH_DMA, SRC_TYPE, VIRTUAL);
+    else
+        launch_dma_src_dst_type |= HWCONST(C8B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL);
+
+    if (dst.is_virtual)
+        launch_dma_src_dst_type |= HWCONST(C8B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
+    else
+        launch_dma_src_dst_type |= HWCONST(C8B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
+
+    if (!src.is_virtual && !dst.is_virtual) {
+        NV_PUSH_2U(C8B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture),
+                         SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
+    }
+    else if (!src.is_virtual) {
+        NV_PUSH_1U(C8B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture));
+    }
+    else if (!dst.is_virtual) {
+        NV_PUSH_1U(C8B5, SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
+    }
+
+    return launch_dma_src_dst_type;
+}
 void uvm_hal_hopper_ce_offset_out(uvm_push_t *push, NvU64 offset_out)
 {
    NV_PUSH_2U(C8B5, OFFSET_OUT_UPPER, HWVALUE(C8B5, OFFSET_OUT_UPPER, UPPER, NvOffset_HI32(offset_out)),
--- a/kernel-open/nvidia-uvm/uvm_hopper_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_mmu.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2020-2024 NVIDIA Corporation
+    Copyright (c) 2020-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -121,7 +121,10 @@ static NvLength allocation_size_hopper(NvU32 depth, NvU64 page_size)
    if (depth == 5 && page_size == UVM_PAGE_SIZE_64K)
        return 256;

-    // depth 0 requires only a 16-byte allocation, but it must be 4k aligned.
+    // depth 0 requires only a 16-byte allocation to house the 2 entries at the
+    // 0-depth level. Due to HW constraints, the effective minimum allocation
+    // size is 32 bytes. It must be 4k aligned.
+    // See comments in uvm_gpu.h:gmmu_pt_depth0_init_required for details.
    return 4096;
 }

@@ -250,7 +253,7 @@ static NvU64 unmapped_pte_hopper(NvU64 page_size)
           HWCONST64(_MMU_VER3, PTE, PCF, NO_VALID_4KB_PAGE);
 }

-static NvU64 poisoned_pte_hopper(void)
+static NvU64 poisoned_pte_hopper(uvm_page_tree_t *tree)
 {
    // An invalid PTE won't be fatal from faultable units like SM, which is the
    // most likely source of bad PTE accesses.
@@ -264,7 +267,7 @@ static NvU64 poisoned_pte_hopper(void)
    // be aligned to page_size.
    NvU64 phys_addr = 0x2bad000000ULL;

-    NvU64 pte_bits = make_pte_hopper(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
+    NvU64 pte_bits = tree->hal->make_pte(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
    return WRITE_HWCONST64(pte_bits, _MMU_VER3, PTE, PCF, PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD);
 }

@@ -380,10 +383,10 @@ static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_
                pde_bits |= HWCONST64(_MMU_VER3, PDE, APERTURE, VIDEO_MEMORY);
                break;
            case UVM_APERTURE_SYS_NON_COHERENT:
-                // SYS_NON_COHERENT aperture is currently only used for testing
-                // in kernel_driver_get_rm_ptes. Since UVM never places page
-                // tables in peer memory. SYS_NON_COHERENT should never be used
-                // in PDEs.
+                // SYS_NON_COHERENT aperture is currently only used for some
+                // BAR1 P2P addresses and testing in kernel_driver_get_rm_ptes.
+                // Since UVM never places page tables in peer memory,
+                // SYS_NON_COHERENT should never be used in PDEs.
                // falls through
            default:
                UVM_ASSERT_MSG(0, "Invalid aperture: %d\n", phys_alloc->addr.aperture);
@@ -415,10 +418,10 @@ static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_pag
                pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, APERTURE_BIG, VIDEO_MEMORY);
                break;
            case UVM_APERTURE_SYS_NON_COHERENT:
-                // SYS_NON_COHERENT aperture is currently only used for testing
-                // in kernel_driver_get_rm_ptes. Since UVM never places page
-                // tables in peer memory. SYS_NON_COHERENT should never be used
-                // in PDEs.
+                // SYS_NON_COHERENT aperture is currently only used for some
+                // BAR1 P2P addresses and testing in kernel_driver_get_rm_ptes.
+                // Since UVM never places page tables in peer memory,
+                // SYS_NON_COHERENT should never be used in PDEs.
                // falls through
            default:
                UVM_ASSERT_MSG(0, "Invalid big aperture %d\n", phys_alloc->addr.aperture);
@@ -453,10 +456,10 @@ static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_p
                pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, APERTURE_SMALL, VIDEO_MEMORY);
                break;
            case UVM_APERTURE_SYS_NON_COHERENT:
-                // SYS_NON_COHERENT aperture is currently only used for testing
-                // in kernel_driver_get_rm_ptes. Since UVM never places page
-                // tables in peer memory. SYS_NON_COHERENT should never be used
-                // in PDEs.
+                // SYS_NON_COHERENT aperture is currently only used for some
+                // BAR1 P2P addresses and testing in kernel_driver_get_rm_ptes.
+                // Since UVM never places page tables in peer memory,
+                // SYS_NON_COHERENT should never be used in PDEs.
                // falls through
            default:
                UVM_ASSERT_MSG(0, "Invalid small aperture %d\n", phys_alloc->addr.aperture);
--- a/kernel-open/nvidia-uvm/uvm_ioctl.h
+++ b/kernel-open/nvidia-uvm/uvm_ioctl.h
@@ -147,40 +147,6 @@ typedef struct
    NV_STATUS rmStatus;                                                        // OUT
 } UVM_SET_STREAM_STOPPED_PARAMS;

-//
-// UvmCallTestFunction
-//
-#define UVM_RUN_TEST                                                  UVM_IOCTL_BASE(9)
-
-typedef struct
-{
-    NvProcessorUuid gpuUuid;     // IN
-    NvU32           test;        // IN
-    struct
-    {
-        NvProcessorUuid peerGpuUuid; // IN
-        NvU32           peerId;      // IN
-    } multiGpu;
-    NV_STATUS      rmStatus;    // OUT
-} UVM_RUN_TEST_PARAMS;
-
-//
-// This is a magic offset for mmap. Any mapping of an offset above this
-// threshold will be treated as a counters mapping, not as an allocation
-// mapping. Since allocation offsets must be identical to the virtual address
-// of the mapping, this threshold has to be an offset that cannot be
-// a valid virtual address.
-//
-#if defined(__linux__)
-    #if defined(NV_64_BITS)
-        #define UVM_EVENTS_OFFSET_BASE   (1UL << 63)
-        #define UVM_COUNTERS_OFFSET_BASE (1UL << 62)
-    #else
-        #define UVM_EVENTS_OFFSET_BASE   (1UL << 31)
-        #define UVM_COUNTERS_OFFSET_BASE (1UL << 30)
-    #endif
-#endif // defined(__linux___)
-
 //
 // UvmAddSession
 //
@@ -189,27 +155,9 @@ typedef struct
 typedef struct
 {
    NvU32        pidTarget;                             // IN
-#ifdef __linux__
-    NvP64        countersBaseAddress NV_ALIGN_BYTES(8); // IN
-    NvS32        sessionIndex;                          // OUT (session index that got added)
-#endif
    NV_STATUS    rmStatus;                              // OUT
 } UVM_ADD_SESSION_PARAMS;

-//
-// UvmRemoveSession
-//
-#define UVM_REMOVE_SESSION                                             UVM_IOCTL_BASE(11)
-
-typedef struct
-{
-#ifdef __linux__
-    NvS32        sessionIndex; // IN (session index to be removed)
-#endif
-    NV_STATUS    rmStatus;     // OUT
-} UVM_REMOVE_SESSION_PARAMS;
-
-
 #define UVM_MAX_COUNTERS_PER_IOCTL_CALL 32

 //
@@ -219,9 +167,6 @@ typedef struct

 typedef struct
 {
-#ifdef __linux__
-    NvS32            sessionIndex;                            // IN
-#endif
    UvmCounterConfig config[UVM_MAX_COUNTERS_PER_IOCTL_CALL]; // IN
    NvU32            count;                                   // IN
    NV_STATUS        rmStatus;                                // OUT
@@ -234,9 +179,6 @@ typedef struct

 typedef struct
 {
-#ifdef __linux__
-    NvS32           sessionIndex;                   // IN
-#endif
    NvU32           scope;                          // IN (UvmCounterScope)
    NvU32           counterName;                    // IN (UvmCounterName)
    NvProcessorUuid gpuUuid;                        // IN
@@ -251,15 +193,10 @@ typedef struct

 typedef struct
 {
-#ifdef __linux__
-    NvS32                 sessionIndex;                         // IN
-#endif
    NvU32                 eventQueueIndex;                      // OUT
    NvU64                 queueSize          NV_ALIGN_BYTES(8); // IN
    NvU64                 notificationCount  NV_ALIGN_BYTES(8); // IN
-#if defined(WIN32) || defined(WIN64)
    NvU64                 notificationHandle NV_ALIGN_BYTES(8); // IN
-#endif
    NvU32                 timeStampType;                        // IN (UvmEventTimeStampType)
    NV_STATUS             rmStatus;                             // OUT
 } UVM_CREATE_EVENT_QUEUE_PARAMS;
@@ -271,9 +208,6 @@ typedef struct

 typedef struct
 {
-#ifdef __linux__
-    NvS32         sessionIndex;       // IN
-#endif
    NvU32         eventQueueIndex;    // IN
    NV_STATUS     rmStatus;           // OUT
 } UVM_REMOVE_EVENT_QUEUE_PARAMS;
@@ -285,9 +219,6 @@ typedef struct

 typedef struct
 {
-#ifdef __linux__
-    NvS32         sessionIndex;                       // IN
-#endif
    NvU32         eventQueueIndex;                    // IN
    NvP64         userRODataAddr   NV_ALIGN_BYTES(8); // IN
    NvP64         userRWDataAddr   NV_ALIGN_BYTES(8); // IN
@@ -304,39 +235,12 @@ typedef struct

 typedef struct
 {
-#ifdef __linux__
-    NvS32        sessionIndex;      // IN
-#endif
    NvU32        eventQueueIndex;   // IN
    NvS32        eventType;         // IN
    NvU32        enable;            // IN
    NV_STATUS    rmStatus;          // OUT
 } UVM_EVENT_CTRL_PARAMS;

-//
-// UvmRegisterMpsServer
-//
-#define UVM_REGISTER_MPS_SERVER                                       UVM_IOCTL_BASE(18)
-
-typedef struct
-{
-    NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1];                 // IN
-    NvU32           numGpus;                                       // IN
-    NvU64           serverId                    NV_ALIGN_BYTES(8); // OUT
-    NV_STATUS       rmStatus;                                      // OUT
-} UVM_REGISTER_MPS_SERVER_PARAMS;
-
-//
-// UvmRegisterMpsClient
-//
-#define UVM_REGISTER_MPS_CLIENT                                       UVM_IOCTL_BASE(19)
-
-typedef struct
-{
-    NvU64     serverId  NV_ALIGN_BYTES(8); // IN
-    NV_STATUS rmStatus;                    // OUT
-} UVM_REGISTER_MPS_CLIENT_PARAMS;
-
 //
 // UvmEventGetGpuUuidTable
 //
@@ -442,7 +346,6 @@ typedef struct

 typedef struct
 {
-    NvProcessorUuid gpuUuid;  // IN
    NvHandle        hClient;  // IN
    NvHandle        hChannel; // IN
    NV_STATUS       rmStatus; // OUT
@@ -510,7 +413,6 @@ typedef struct
 typedef struct
 {
    NvU64     base      NV_ALIGN_BYTES(8); // IN
-    NvU64     length    NV_ALIGN_BYTES(8); // IN
    NV_STATUS rmStatus;                    // OUT
 } UVM_FREE_PARAMS;

@@ -1128,8 +1030,8 @@ typedef struct
 typedef struct
 {
    NvU64           base                                    NV_ALIGN_BYTES(8); // IN
-    NvU64           length;                                                    // IN
-    NvU64           flags;                                                     // IN
+    NvU64           length                                  NV_ALIGN_BYTES(8); // IN
+    NvU64           flags                                   NV_ALIGN_BYTES(8); // IN
    NV_STATUS       rmStatus;                                                  // OUT
 } UVM_DISCARD_PARAMS;

--- a/kernel-open/nvidia-uvm/uvm_linux.h
+++ b/kernel-open/nvidia-uvm/uvm_linux.h
@@ -40,10 +40,6 @@
 #include "nv-linux.h"

 #include <linux/log2.h>
-#if defined(NV_PRIO_TREE_PRESENT)
-#include <linux/prio_tree.h>
-#endif
-
 #include <linux/jhash.h>
 #include <linux/rwsem.h>
 #include <linux/rbtree.h>
@@ -117,21 +113,14 @@
 #define NVIDIA_UVM_PRETTY_PRINTING_PREFIX "nvidia-uvm: "
 #define pr_fmt(fmt) NVIDIA_UVM_PRETTY_PRINTING_PREFIX fmt

-// Dummy printing function that maintains syntax and format specifier checking
-// but doesn't print anything and doesn't evaluate the print parameters. This is
-// roughly equivalent to the kernel's no_printk function. We use this instead
-// because:
-// 1) no_printk was not available until 2.6.36
-// 2) Until 4.5 no_printk was implemented as a static function, meaning its
-//    parameters were always evaluated
-#define UVM_NO_PRINT(fmt, ...)          \
-    do {                                \
-        if (0)                          \
-            printk(fmt, ##__VA_ARGS__); \
-    } while (0)
-
 #define NV_UVM_GFP_FLAGS (GFP_KERNEL | __GFP_NOMEMALLOC)

+#if defined(NV_GET_DEV_PAGEMAP_HAS_PGMAP_ARG)
+    #define NV_GET_DEV_PAGEMAP(pfn) get_dev_pagemap(pfn, NULL)
+#else
+    #define NV_GET_DEV_PAGEMAP get_dev_pagemap
+#endif
+
 /* Return a nanosecond-precise value */
 static inline NvU64 NV_GETTIME(void)
 {
@@ -141,25 +130,6 @@ static inline NvU64 NV_GETTIME(void)
    return (NvU64) timespec64_to_ns(&tm);
 }

-// atomic_long_read_acquire and atomic_long_set_release were added in commit
-// b5d47ef9ea5c5fe31d7eabeb79f697629bd9e2cb ("locking/atomics: Switch to
-// generated atomic-long") in v5.1 (2019-05-05).
-// TODO: Bug 3849079: We always use these definitions on newer kernels.
-#define atomic_long_read_acquire uvm_atomic_long_read_acquire
-static inline long uvm_atomic_long_read_acquire(atomic_long_t *p)
-{
-    long val = atomic_long_read(p);
-    smp_mb();
-    return val;
-}
-
-#define atomic_long_set_release uvm_atomic_long_set_release
-static inline void uvm_atomic_long_set_release(atomic_long_t *p, long v)
-{
-    smp_mb();
-    atomic_long_set(p, v);
-}
-
 typedef struct
 {
    struct mem_cgroup *new_memcg;
--- a/kernel-open/nvidia-uvm/uvm_lock.h
+++ b/kernel-open/nvidia-uvm/uvm_lock.h
@@ -407,6 +407,11 @@
 //      Operations allowed while holding this lock
 //      - Pushing work to SEC2 channels
 //
+// - Access counters clear operations
+//     Order: UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS
+//
+//     It protects the parent_gpu's access counters clear tracker.
+//
 // - Concurrent push semaphore
 //      Order: UVM_LOCK_ORDER_PUSH
 //      Semaphore (uvm_semaphore_t)
@@ -432,11 +437,6 @@
 //      Order: UVM_LOCK_ORDER_PMM_ROOT_CHUNK
 //      Exclusive bitlock (mutex) per each root chunk internal to PMM.
 //
-// - Access counters clear operations
-//     Order: UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS
-//
-//     It protects the parent_gpu's access counters clear tracker.
-//
 // - Channel lock
 //      Order: UVM_LOCK_ORDER_CHANNEL
 //      Spinlock (uvm_spinlock_t) or exclusive lock (mutex)
@@ -524,11 +524,11 @@ typedef enum
    UVM_LOCK_ORDER_KEY_ROTATION_WLC,
    UVM_LOCK_ORDER_CSL_WLC_PUSH,
    UVM_LOCK_ORDER_CSL_SEC2_PUSH,
+    UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS,
    UVM_LOCK_ORDER_PUSH,
    UVM_LOCK_ORDER_PMM,
    UVM_LOCK_ORDER_PMM_PMA,
    UVM_LOCK_ORDER_PMM_ROOT_CHUNK,
-    UVM_LOCK_ACCESS_COUNTERS_CLEAR_OPS,
    UVM_LOCK_ORDER_CHANNEL,
    UVM_LOCK_ORDER_WLC_CHANNEL,
    UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST,
--- a/kernel-open/nvidia-uvm/uvm_maxwell.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell.c
@@ -61,6 +61,10 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->non_replayable_faults_supported = false;

+    parent_gpu->access_counters_serialize_clear_ops_by_type = false;
+
+    parent_gpu->access_bits_supported = false;
+
    parent_gpu->fault_cancel_va_supported = false;

    parent_gpu->scoped_atomics_supported = false;
@@ -75,5 +79,7 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->ats.no_ats_range_required = false;

+    parent_gpu->ats.gmmu_pt_depth0_init_required = false;
+
    parent_gpu->conf_computing.per_channel_key_rotation = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_maxwell_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_ce.c
@@ -268,7 +268,7 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
    // Check if the copy is over NVLINK and simulate dropped traffic if there's
    // an NVLINK error.
    // Src address cannot be peer as that wouldn't pass the valid check above.
-    if (uvm_gpu_address_is_peer(gpu, dst) && uvm_gpu_get_injected_nvlink_error(gpu) != NV_OK)
+    if (uvm_gpu_get_injected_nvlink_error(gpu) != NV_OK && uvm_gpu_address_is_peer(gpu, dst))
        size = 0;

    gpu->parent->ce_hal->memcopy_patch_src(push, &src);
--- a/kernel-open/nvidia-uvm/uvm_maxwell_host.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_host.c
@@ -352,7 +352,7 @@ uvm_hal_maxwell_access_counter_query_clear_op_unsupported(uvm_parent_gpu_t *pare
                                                          NvU32 num_entries)
 {
    UVM_ASSERT_MSG(false, "host access_counter_query_clear_op called on Maxwell GPU\n");
-    return UVM_ACCESS_COUNTER_CLEAR_OP_NONE;
+    return UVM_ACCESS_COUNTER_CLEAR_OP_COUNT;
 }

 NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu)
--- a/kernel-open/nvidia-uvm/uvm_maxwell_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_mmu.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2023 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -253,7 +253,7 @@ static NvU64 make_pte_maxwell(uvm_aperture_t aperture, NvU64 address, uvm_prot_t
    else
        pte_bits |= HWCONST64(_MMU, PTE, VOL, TRUE);

-    // aperture 34:32
+    // aperture 34:33
    if (aperture == UVM_APERTURE_SYS)
        aperture_bits = NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY;
    else if (aperture == UVM_APERTURE_VID)
@@ -296,7 +296,7 @@ static NvU64 make_sked_reflected_pte_maxwell(void)
    return pte_bits;
 }

-static NvU64 poisoned_pte_maxwell(void)
+static NvU64 poisoned_pte_maxwell(uvm_page_tree_t *tree)
 {
    // An invalid PTE is also fatal on Maxwell, but a PRIV violation will
    // immediately identify bad PTE usage.
@@ -309,7 +309,7 @@ static NvU64 poisoned_pte_maxwell(void)
    // This address has to fit within 37 bits (max address width of vidmem) and
    // be aligned to page_size.
    NvU64 phys_addr = 0x1bad000000ULL;
-    NvU64 pte_bits = make_pte_maxwell(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
+    NvU64 pte_bits = tree->hal->make_pte(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);

    return WRITE_HWCONST64(pte_bits, _MMU, PTE, PRIVILEGE, TRUE);
 }
@@ -317,8 +317,13 @@ static NvU64 poisoned_pte_maxwell(void)
 // Sparse mappings are not supported.
 static NvU64 make_sparse_pte_maxwell_unsupported(void)
 {
+    NvU64 pte_bits;
+
    UVM_ASSERT_MSG(0, "Sparse mappings unsupported on pre-Pascal GPUs\n");
-    return poisoned_pte_maxwell();
+
+    pte_bits = HWCONST64(_MMU, PTE, VALID, FALSE);
+
+    return pte_bits;
 }

 static uvm_mmu_mode_hal_t maxwell_64_mmu_mode_hal =
--- a/kernel-open/nvidia-uvm/uvm_migrate.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate.c
@@ -588,7 +588,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
                                                            service_context->block_context->make_resident.dest_nid))
                    skipped_migrate = true;
            }
-            else if (uvm_processor_mask_test(&managed_range->va_range.uvm_lite_gpus, dest_id) &&
+            else if (uvm_processor_mask_test(&managed_range->uvm_lite_gpus, dest_id) &&
                     !uvm_va_policy_preferred_location_equal(policy, dest_id, NUMA_NO_NODE)) {
                // Don't migrate to a non-faultable GPU that is in UVM-Lite mode,
                // unless it's the preferred location
--- a/kernel-open/nvidia-uvm/uvm_migrate_pageable.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate_pageable.c
@@ -43,8 +43,6 @@

 #ifdef UVM_MIGRATE_VMA_SUPPORTED

-static struct kmem_cache *g_uvm_migrate_vma_state_cache __read_mostly;
-
 static const gfp_t g_migrate_vma_gfp_flags = NV_UVM_GFP_FLAGS | GFP_HIGHUSER_MOVABLE | __GFP_THISNODE;

 static uvm_sgt_t *uvm_select_sgt(uvm_processor_id_t src_id, int src_nid, migrate_vma_state_t *state)
@@ -1497,7 +1495,7 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
        uvm_migrate_args->dst_node_id = uvm_gpu_numa_node(gpu);
    }

-    state = nv_kmem_cache_zalloc(g_uvm_migrate_vma_state_cache, NV_UVM_GFP_FLAGS);
+    state = uvm_kvmalloc_zero(sizeof(migrate_vma_state_t));
    if (!state)
        return NV_ERR_NO_MEMORY;

@@ -1519,22 +1517,17 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
 out:
    uvm_kvfree(state->dma.sgt_cpu);
    uvm_kvfree(state->cpu_page_mask);
-    kmem_cache_free(g_uvm_migrate_vma_state_cache, state);
+    uvm_kvfree(state);

    return status;
 }

 NV_STATUS uvm_migrate_pageable_init(void)
 {
-    g_uvm_migrate_vma_state_cache = NV_KMEM_CACHE_CREATE("migrate_vma_state_t", migrate_vma_state_t);
-    if (!g_uvm_migrate_vma_state_cache)
-        return NV_ERR_NO_MEMORY;
-
    return NV_OK;
 }

 void uvm_migrate_pageable_exit(void)
 {
-    kmem_cache_destroy_safe(&g_uvm_migrate_vma_state_cache);
 }
 #endif
--- a/kernel-open/nvidia-uvm/uvm_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_mmu.c
@@ -152,9 +152,10 @@ static NV_STATUS phys_mem_allocate_sysmem(uvm_page_tree_t *tree, NvLength size,
 }

 // The aperture may filter the biggest page size:
-// - UVM_APERTURE_VID       biggest page size on vidmem mappings
-// - UVM_APERTURE_SYS       biggest page size on sysmem mappings
-// - UVM_APERTURE_PEER_0-7  biggest page size on peer mappings
+// - UVM_APERTURE_VID                    biggest page size on vidmem mappings
+// - UVM_APERTURE_SYS                    biggest page size on sysmem mappings
+// - UVM_APERTURE_SYS_NON_COHERENT       biggest page size on BAR1 mappings
+// - UVM_APERTURE_PEER_0-7               biggest page size on peer mappings
 static NvU64 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
 {
    UVM_ASSERT(aperture < UVM_APERTURE_DEFAULT);
@@ -306,7 +307,7 @@ static void *uvm_mmu_page_table_cpu_map(uvm_gpu_t *gpu, uvm_mmu_page_table_alloc
    }
    else {
        NvU64 page_offset = offset_in_page(phys_alloc->addr.address);
-        return kmap(uvm_mmu_page_table_page(gpu, phys_alloc)) + page_offset;
+        return (char *)kmap(uvm_mmu_page_table_page(gpu, phys_alloc)) + page_offset;
    }
 }

@@ -392,7 +393,7 @@ static void pde_fill_gpu(uvm_page_tree_t *tree,
    NvU64 pde_data[2], entry_size;
    uvm_gpu_address_t pde_entry_addr = uvm_mmu_gpu_address(tree->gpu, directory->phys_alloc.addr);
    NvU32 max_inline_entries;
-    uvm_push_flag_t push_membar_flag = UVM_PUSH_FLAG_COUNT;
+    uvm_membar_t push_membar;
    uvm_gpu_address_t inline_data_addr;
    uvm_push_inline_data_t inline_data;
    NvU32 entry_count, i, j;
@@ -403,12 +404,7 @@ static void pde_fill_gpu(uvm_page_tree_t *tree,
    UVM_ASSERT(sizeof(pde_data) >= entry_size);

    max_inline_entries = UVM_PUSH_INLINE_DATA_MAX_SIZE / entry_size;
-
-    if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
-        push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_NONE;
-    else if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
-        push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_GPU;
-
+    push_membar = uvm_push_get_and_reset_membar_flag(push);
    pde_entry_addr.address += start_index * entry_size;

    for (i = 0; i < pde_count;) {
@@ -420,11 +416,11 @@ static void pde_fill_gpu(uvm_page_tree_t *tree,
        entry_count = min(pde_count - i, max_inline_entries);

        // No membar is needed until the last memory operation. Otherwise,
-        // use caller's membar flag.
+        // use caller's membar.
        if ((i + entry_count) < pde_count)
-            uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
-        else if (push_membar_flag != UVM_PUSH_FLAG_COUNT)
-            uvm_push_set_flag(push, push_membar_flag);
+            uvm_push_set_membar(push, UVM_MEMBAR_NONE);
+        else
+            uvm_push_set_membar(push, push_membar);

        uvm_push_inline_data_begin(push, &inline_data);
        for (j = 0; j < entry_count; j++) {
@@ -458,6 +454,16 @@ static void pde_fill(uvm_page_tree_t *tree,
        pde_fill_cpu(tree, directory, start_index, pde_count, phys_addr);
 }

+static void phys_mem_init_memset(uvm_gpu_t *gpu, uvm_push_t *push, uvm_page_directory_t *dir, NvU64 value)
+{
+    NvU64 size = dir->phys_alloc.size;
+
+    if (push)
+        gpu->parent->ce_hal->memset_8(push, uvm_mmu_gpu_address(push->gpu, dir->phys_alloc.addr), value, size);
+    else
+        uvm_mmu_page_table_cpu_memset_8(gpu, &dir->phys_alloc, 0, value, size / sizeof(value));
+}
+
 static void phys_mem_init(uvm_page_tree_t *tree, NvU64 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
 {
    NvU64 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
@@ -490,24 +496,38 @@ static void phys_mem_init(uvm_page_tree_t *tree, NvU64 page_size, uvm_page_direc
        }

        // Initialize the memory to a reasonable value.
-        if (push) {
-            tree->gpu->parent->ce_hal->memset_8(push,
-                                                uvm_mmu_gpu_address(tree->gpu, dir->phys_alloc.addr),
-                                                *clear_bits,
-                                                dir->phys_alloc.size);
-        }
-        else {
-            uvm_mmu_page_table_cpu_memset_8(tree->gpu,
-                                            &dir->phys_alloc,
-                                            0,
-                                            *clear_bits,
-                                            dir->phys_alloc.size / sizeof(*clear_bits));
-        }
+        phys_mem_init_memset(tree->gpu, push, dir, *clear_bits);
    }
    else {
+        // Initialize the entire directory allocated page table area due to Bug
+        // 5282495. See comment in ats.gmmu_pt_depth0_init_required declaration.
+        if (dir->depth == 0 && tree->gpu->parent->ats.gmmu_pt_depth0_init_required) {
+            uvm_membar_t push_membar;
+
+            // Retrieve and store the caller's membar, since
+            // phys_mem_init_memset() will consume it.
+            if (push) {
+                push_membar = uvm_push_get_and_reset_membar_flag(push);
+
+                // No membar is required, pde_fill() will push the caller's
+                // membar.
+                uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+            }
+
+            // phys_mem_init_memset() consumes and resets the CE's push pipeline
+            // flag, which is required to avoid WaW issues since pde_fill()
+            // will write to the same range and its first operation is not
+            // pipelined.
+            phys_mem_init_memset(tree->gpu, push, dir, 0);
+
+            if (push) {
+                // Restore the caller's membar for pde_fill().
+                uvm_push_set_membar(push, push_membar);
+            }
+        }
+
        pde_fill(tree, dir, 0, entries_count, phys_allocs, push);
    }
-
 }

 static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
@@ -1671,7 +1691,7 @@ static NV_STATUS poison_ptes(uvm_page_tree_t *tree,

    tree->gpu->parent->ce_hal->memset_8(&push,
                                        uvm_mmu_gpu_address(tree->gpu, pte_dir->phys_alloc.addr),
-                                        tree->hal->poisoned_pte(),
+                                        tree->hal->poisoned_pte(tree),
                                        pte_dir->phys_alloc.size);

    // If both the new PTEs and the parent PDE are in vidmem, then a GPU-
@@ -2388,23 +2408,21 @@ NV_STATUS uvm_mmu_create_peer_identity_mappings(uvm_gpu_t *gpu, uvm_gpu_t *peer)
    uvm_aperture_t aperture;
    NvU64 phys_offset;
    uvm_gpu_identity_mapping_t *peer_mapping;
+    uvm_gpu_phys_address_t phys_address;

    UVM_ASSERT(gpu->parent->peer_copy_mode < UVM_GPU_PEER_COPY_MODE_COUNT);

    if (gpu->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_VIRTUAL || peer->mem_info.size == 0)
        return NV_OK;

-    aperture = uvm_gpu_peer_aperture(gpu, peer);
+    // Use transformation of address 0 to get offset and aperture for all
+    // other addresses.
+    phys_address = uvm_gpu_peer_phys_address(peer, 0, gpu);
+    aperture = phys_address.aperture;
+    phys_offset = phys_address.address;
    page_size = mmu_biggest_page_size(&gpu->address_space_tree, aperture);
    size = UVM_ALIGN_UP(peer->mem_info.max_allocatable_address + 1, page_size);
    peer_mapping = uvm_gpu_get_peer_mapping(gpu, peer->id);
-    phys_offset = 0ULL;
-
-    if (uvm_parent_gpus_are_nvswitch_connected(gpu->parent, peer->parent)) {
-        // Add the 47-bit physical address routing bits for this peer to the
-        // generated PTEs
-        phys_offset = peer->parent->nvswitch_info.fabric_memory_window_start;
-    }

    UVM_ASSERT(page_size);
    UVM_ASSERT(size);
@@ -2983,16 +3001,12 @@ NV_STATUS uvm_mmu_l2_invalidate(uvm_gpu_t *gpu, uvm_aperture_t aperture)
                            UVM_CHANNEL_TYPE_MEMOPS,
                            &push,
                            "L2 cache invalidate");
-    if (status != NV_OK) {
-        UVM_ERR_PRINT("L2 cache invalidation: Failed to begin push, status: %s\n", nvstatusToString(status));
+    if (status != NV_OK) 
        return status;
-    }

    gpu->parent->host_hal->l2_invalidate(&push, aperture);

    status = uvm_push_end_and_wait(&push);
-    if (status != NV_OK) 
-        UVM_ERR_PRINT("ERROR: L2 cache invalidation: Failed to complete push, status: %s\n", nvstatusToString(status));

    return status;
 }
--- a/kernel-open/nvidia-uvm/uvm_mmu.h
+++ b/kernel-open/nvidia-uvm/uvm_mmu.h
@@ -217,7 +217,7 @@ struct uvm_mmu_mode_hal_struct
    // Bit pattern used for debug purposes to clobber PTEs which ought to be
    // unused. In practice this will generate a PRIV violation or a physical
    // memory out-of-range error so we can immediately identify bad PTE usage.
-    NvU64 (*poisoned_pte)(void);
+    NvU64 (*poisoned_pte)(uvm_page_tree_t *tree);

    // Write a PDE bit-pattern to entry based on the data in allocs (which may
    // point to two items for dual PDEs).
@@ -228,7 +228,7 @@ struct uvm_mmu_mode_hal_struct
    // invalid/clean PDE entries.
    void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, uvm_page_directory_t *dir, NvU32 child_index);

-    // size of an entry in a directory/table.  Generally either 8 or 16 bytes.
+    // size of an entry in a directory/table. Generally either 8 or 16 bytes.
    // (in the case of Pascal dual PDEs)
    NvLength (*entry_size)(NvU32 depth);

--- a/kernel-open/nvidia-uvm/uvm_page_tree_test.c
+++ b/kernel-open/nvidia-uvm/uvm_page_tree_test.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -44,6 +44,10 @@
 #include "clc36f.h"
 #include "clc3b5.h"

+// TURING_*
+#include "clc46f.h"
+#include "clc5b5.h"
+
 // AMPERE_*
 #include "clc56f.h"
 #include "clc6b5.h"
@@ -1783,11 +1787,16 @@ static NV_STATUS entry_test_page_size_volta(uvm_gpu_t *gpu, size_t page_size)
    return entry_test_page_size_pascal(gpu, page_size);
 }

-static NV_STATUS entry_test_page_size_ampere(uvm_gpu_t *gpu, size_t page_size)
+static NV_STATUS entry_test_page_size_turing(uvm_gpu_t *gpu, size_t page_size)
 {
    return entry_test_page_size_volta(gpu, page_size);
 }

+static NV_STATUS entry_test_page_size_ampere(uvm_gpu_t *gpu, size_t page_size)
+{
+    return entry_test_page_size_turing(gpu, page_size);
+}
+
 static NV_STATUS entry_test_page_size_hopper(uvm_gpu_t *gpu, size_t page_size)
 {
    uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
@@ -1810,11 +1819,13 @@ typedef NV_STATUS (*entry_test_page_size_func)(uvm_gpu_t *gpu, size_t page_size)

 static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
 {
+    NV_STATUS status = NV_OK;
    static const NvU64 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
    NvU64 pde_bits;
    uvm_mmu_page_table_alloc_t *phys_allocs[2];
    uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999000LL);
    uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
+    uvm_page_tree_t tree;
    uvm_mmu_mode_hal_t *hal;
    uvm_page_directory_t dir;
    NvU64 big_page_size, page_size;
@@ -1895,19 +1906,30 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
                                     0x1BBBBBB000LL,
                                     UVM_PROT_READ_ONLY,
                                     UVM_MMU_PTE_FLAGS_CACHED) == 0x80000002FBBBBBB5LL);
+
+        TEST_NV_CHECK_RET(test_page_tree_init(gpu, big_page_size, &tree));
+        TEST_CHECK_GOTO(tree.hal->poisoned_pte(&tree) == 0x800000011bad0007ull, cleanup_tree);
+        uvm_page_tree_deinit(&tree);
    }

    return NV_OK;
+
+cleanup_tree:
+    uvm_page_tree_deinit(&tree);
+
+    return status;
 }

 static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
 {
+    NV_STATUS status = NV_OK;
    NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
    NvU64 pde_bits[2];
    size_t i, num_page_sizes;
    uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
    uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
    uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
+    uvm_page_tree_t tree;
    uvm_page_directory_t dir;

    // big versions have [11:8] set as well to test the page table merging
@@ -1993,7 +2015,16 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
    for (i = 0; i < num_page_sizes; i++)
        TEST_NV_CHECK_RET(entry_test_page_size(gpu, page_sizes[i]));

+    TEST_NV_CHECK_RET(test_page_tree_init(gpu, UVM_PAGE_SIZE_64K, &tree));
+    TEST_CHECK_GOTO(tree.hal->poisoned_pte(&tree) == 0x1bad000e9ull, cleanup_tree);
+    uvm_page_tree_deinit(&tree);
+
    return NV_OK;
+
+cleanup_tree:
+    uvm_page_tree_deinit(&tree);
+
+    return status;
 }

 static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
@@ -2070,6 +2101,30 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
    return NV_OK;
 }

+static NV_STATUS entry_test_turing(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
+{
+    NV_STATUS status = NV_OK;
+    uvm_page_tree_t tree;
+    NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
+    NvU32 i, num_page_sizes;
+
+    num_page_sizes = get_page_sizes(gpu, page_sizes);
+
+    for (i = 0; i < num_page_sizes; i++)
+        TEST_NV_CHECK_RET(entry_test_page_size(gpu, page_sizes[i]));
+
+    TEST_NV_CHECK_RET(test_page_tree_init(gpu, UVM_PAGE_SIZE_64K, &tree));
+    TEST_CHECK_GOTO(tree.hal->poisoned_pte(&tree) == 0x6000001bad000e9ull, cleanup_tree);
+    uvm_page_tree_deinit(&tree);
+
+    return NV_OK;
+
+cleanup_tree:
+    uvm_page_tree_deinit(&tree);
+
+    return status;
+}
+
 static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
 {
    NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
@@ -2093,6 +2148,7 @@ static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func ent
    uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
    uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999000LL);
    uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0xBBBBBBB000LL);
+    uvm_page_tree_t tree;

    // Big versions have [11:8] set as well to test the page table merging
    uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999900LL);
@@ -2101,6 +2157,7 @@ static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func ent
    uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);

    memset(dirs, 0, sizeof(dirs));
+
    // Fake directory tree.
    for (i = 0; i < ARRAY_SIZE(dirs); i++) {
        dirs[i] = uvm_kvmalloc_zero(sizeof(uvm_page_directory_t) + sizeof(dirs[i]->entries[0]) * 512);
@@ -2272,10 +2329,19 @@ static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func ent
    // sked reflected
    TEST_CHECK_GOTO(hal->make_sked_reflected_pte() == 0xF0F, cleanup);

+    // poisoned - use a fake tree as it is required by poisoned_pte's MMU HAL.
+    // The tests above manually set the MMU HAL but used functions that don't
+    // have a uvm_page_tree_t argument.
+    TEST_NV_CHECK_GOTO(test_page_tree_init(gpu, UVM_PAGE_SIZE_64K, &tree), cleanup);
+    TEST_CHECK_GOTO(tree.hal->poisoned_pte(&tree) == 0x2bad0006f9ull, cleanup_tree);
+
    num_page_sizes = get_page_sizes(gpu, page_sizes);

    for (i = 0; i < num_page_sizes; i++)
-        TEST_NV_CHECK_GOTO(entry_test_page_size(gpu, page_sizes[i]), cleanup);
+        TEST_NV_CHECK_GOTO(entry_test_page_size(gpu, page_sizes[i]), cleanup_tree);
+
+cleanup_tree:
+    uvm_page_tree_deinit(&tree);

 cleanup:
    for (i = 0; i < ARRAY_SIZE(dirs); i++)
@@ -2286,6 +2352,9 @@ cleanup:

 static NV_STATUS entry_test_blackwell(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
 {
+    // We use entry_test_ampere() because we only want to check for an
+    // additional page size, no MMU page table format changes between Hopper and
+    // Blackwell.
    return entry_test_ampere(gpu, entry_test_page_size_blackwell);
 }

@@ -2523,6 +2592,15 @@ static NV_STATUS fake_gpu_init_volta(uvm_gpu_t *fake_gpu)
                         fake_gpu);
 }

+static NV_STATUS fake_gpu_init_turing(uvm_gpu_t *fake_gpu)
+{
+    return fake_gpu_init(TURING_CHANNEL_GPFIFO_A,
+                         TURING_DMA_COPY_A,
+                         NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
+                         0,
+                         fake_gpu);
+}
+
 static NV_STATUS fake_gpu_init_ampere(uvm_gpu_t *fake_gpu)
 {
    return fake_gpu_init(AMPERE_CHANNEL_GPFIFO_A,
@@ -2641,6 +2719,15 @@ static NV_STATUS volta_test_page_tree(uvm_gpu_t *volta)
    return NV_OK;
 }

+static NV_STATUS turing_test_page_tree(uvm_gpu_t *turing)
+{
+    TEST_CHECK_RET(fake_gpu_init_turing(turing) == NV_OK);
+
+    MEM_NV_CHECK_RET(entry_test_turing(turing, entry_test_page_size_turing), NV_OK);
+
+    return NV_OK;
+}
+
 static NV_STATUS ampere_test_page_tree(uvm_gpu_t *ampere)
 {
    NvU32 i, tlb_batch_saved_max_pages;
@@ -2803,6 +2890,7 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
        TEST_NV_CHECK_GOTO(maxwell_test_page_tree(gpu), done);
    TEST_NV_CHECK_GOTO(pascal_test_page_tree(gpu), done);
    TEST_NV_CHECK_GOTO(volta_test_page_tree(gpu), done);
+    TEST_NV_CHECK_GOTO(turing_test_page_tree(gpu), done);
    TEST_NV_CHECK_GOTO(ampere_test_page_tree(gpu), done);
    TEST_NV_CHECK_GOTO(hopper_test_page_tree(gpu), done);
    TEST_NV_CHECK_GOTO(blackwell_test_page_tree(gpu), done);
--- a/kernel-open/nvidia-uvm/uvm_pascal.c
+++ b/kernel-open/nvidia-uvm/uvm_pascal.c
@@ -90,6 +90,10 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->non_replayable_faults_supported = false;

+    parent_gpu->access_counters_serialize_clear_ops_by_type = false;
+
+    parent_gpu->access_bits_supported = false;
+
    parent_gpu->fault_cancel_va_supported = false;

    parent_gpu->scoped_atomics_supported = false;
@@ -104,5 +108,7 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->ats.no_ats_range_required = false;

+    parent_gpu->ats.gmmu_pt_depth0_init_required = false;
+
    parent_gpu->conf_computing.per_channel_key_rotation = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_pascal_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_pascal_mmu.c
@@ -297,7 +297,6 @@ static NvU64 make_pte_pascal(uvm_aperture_t aperture, NvU64 address, uvm_prot_t
        // vid address 32:8
        pte_bits |= HWVALUE64(_MMU_VER2, PTE, ADDRESS_VID, address);

-
        // peer id 35:33
        if (aperture != UVM_APERTURE_VID)
            pte_bits |= HWVALUE64(_MMU_VER2, PTE, ADDRESS_VID_PEER, UVM_APERTURE_PEER_ID(aperture));
@@ -306,6 +305,7 @@ static NvU64 make_pte_pascal(uvm_aperture_t aperture, NvU64 address, uvm_prot_t
        pte_bits |= HWVALUE64(_MMU_VER2, PTE, COMPTAGLINE, 0);
    }

+    // kind 63:56
    pte_bits |= HWVALUE64(_MMU_VER2, PTE, KIND, NV_MMU_PTE_KIND_PITCH);

    return pte_bits;
@@ -327,7 +327,7 @@ static NvU64 make_sparse_pte_pascal(void)
           HWCONST64(_MMU_VER2, PTE, VOL,   TRUE);
 }

-static NvU64 poisoned_pte_pascal(void)
+static NvU64 poisoned_pte_pascal(uvm_page_tree_t *tree)
 {
    // An invalid PTE won't be fatal from faultable units like SM, which is the
    // most likely source of bad PTE accesses.
@@ -340,7 +340,7 @@ static NvU64 poisoned_pte_pascal(void)
    // be aligned to page_size.
    NvU64 phys_addr = 0x1bad000000ULL;

-    NvU64 pte_bits = make_pte_pascal(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
+    NvU64 pte_bits = tree->hal->make_pte(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
    return WRITE_HWCONST64(pte_bits, _MMU_VER2, PTE, PRIVILEGE, TRUE);
 }

--- a/kernel-open/nvidia-uvm/uvm_perf_prefetch.c
+++ b/kernel-open/nvidia-uvm/uvm_perf_prefetch.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -318,6 +318,60 @@ static void compute_prefetch_mask(uvm_va_block_region_t faulted_region,
    }
 }

+// Determine whether prefetching should be applied for the given migration.
+//
+// This function evaluates multiple conditions to decide if prefetching is
+// beneficial:
+//
+// 1. No preferred location policy: Always allow prefetching when no preferred
+//    location is set, as there are no policy constraints.
+//
+// 2. Moving to preferred location: Always allow prefetching when migrating
+//    toward the preferred location, as this aligns with the policy.
+//
+// 3. Confidential Computing exceptions: In CC environments, remote memory
+//    mapping is not always available, forcing memory migration. Allow
+//    prefetching out of the preferred location to facilitate these flows
+//    and improve performance:
+//    - DtoH transfers (migrating to CPU/sysmem)
+//    - HtoD transfers when pages are already resident on CPU
+//
+// Returns true if prefetching logic should be applied, false otherwise.
+static bool should_apply_prefetch_logic(const uvm_va_policy_t *policy,
+                                        uvm_processor_id_t new_residency,
+                                        uvm_va_block_t *va_block,
+                                        uvm_va_block_context_t *va_block_context,
+                                        const uvm_page_mask_t *faulted_pages)
+{
+    // No preferred location set - always allow prefetching
+    if (!UVM_ID_IS_VALID(policy->preferred_location))
+        return true;
+
+    // Moving to preferred location - always allow prefetching
+    if (uvm_id_equal(policy->preferred_location, new_residency))
+        return true;
+
+    // CC sysmem exception logic - allow prefetching out of preferred location
+    // for CC-related sysmem transfers when remote mapping is not available
+    if (!g_uvm_global.conf_computing_enabled)
+        return false;
+
+    // DtoH: migrating to CPU/sysmem
+    if (UVM_ID_IS_CPU(new_residency))
+        return true;
+
+    // HtoD: check if any faulted pages are currently resident on CPU
+    if (UVM_ID_IS_GPU(new_residency)) {
+        const uvm_page_mask_t *cpu_resident_mask = uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE);
+        if (cpu_resident_mask && uvm_page_mask_intersects(faulted_pages, cpu_resident_mask))
+            return true;
+    }
+
+    // PPCIE, MPT CC (P2P access) can do remote mappings - no prefetching
+    // needed
+    return false;
+}
+
 // Within a block we only allow prefetching to a single processor. Therefore,
 // if two processors are accessing non-overlapping regions within the same
 // block they won't benefit from prefetching.
@@ -358,13 +412,16 @@ static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_blo
    if (UVM_ID_IS_CPU(new_residency) || va_block->gpus[uvm_id_gpu_index(new_residency)] != NULL)
        resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency, NUMA_NO_NODE);

-    // If this is a first-touch fault and the destination processor is the
-    // preferred location, populate the whole max_prefetch_region.
+    // - If this is a first-touch fault and the destination processor is the
+    //   preferred location, populate the whole max_prefetch_region.
+    // - Do not prefetch pages out of the preferred location (policy location
+    //   is valid and does not match the new residency), unless confidential
+    //   computing is enabled.
    if (uvm_processor_mask_empty(&va_block->resident) &&
        uvm_id_equal(new_residency, policy->preferred_location)) {
        uvm_page_mask_region_fill(prefetch_pages, max_prefetch_region);
    }
-    else {
+    else if (should_apply_prefetch_logic(policy, new_residency, va_block, va_block_context, faulted_pages)) {
        init_bitmap_tree_from_region(bitmap_tree, max_prefetch_region, resident_mask, faulted_pages);

        update_bitmap_tree_from_va_block(bitmap_tree,
--- a/kernel-open/nvidia-uvm/uvm_perf_thrashing.c
+++ b/kernel-open/nvidia-uvm/uvm_perf_thrashing.c
@@ -197,6 +197,8 @@ typedef struct
        unsigned                          max_resets;

        NvU64                                 pin_ns;
+
+        NvS8                              lapse_stat;
    } params;

    uvm_va_space_t                         *va_space;
@@ -262,13 +264,22 @@ static unsigned uvm_perf_thrashing_pin_threshold = UVM_PERF_THRASHING_PIN_THRESH

 // TODO: Bug 1768615: [uvm] Automatically tune default values for thrashing
 // detection/prevention parameters
-#define UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT 500
+#define UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT 500ULL
 #define UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT_EMULATION (UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT * 800)

+#define UVM_PERF_THRASHING_LAPSE_USEC_MAX (UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT * 1000)
+#define UVM_PERF_THRASHING_LAPSE_USEC_MIN (UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT / 100)
+
 // Lapse of time in microseconds that determines if two consecutive events on
 // the same page can be considered thrashing
 static unsigned uvm_perf_thrashing_lapse_usec = UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT;

+#define UVM_PERF_LAPSE_VOTE_THRESHOLD   32
+
+// Number of lapse intervals greater than uvm_perf_thrashing_lapse_usec,
+// big enough to consider readjusting.
+static unsigned int uvm_perf_lapse_vote_threshold = UVM_PERF_LAPSE_VOTE_THRESHOLD;
+
 #define UVM_PERF_THRASHING_NAP_DEFAULT 1
 #define UVM_PERF_THRASHING_NAP_MAX     100

@@ -309,6 +320,7 @@ module_param(uvm_perf_thrashing_enable,        uint, S_IRUGO);
 module_param(uvm_perf_thrashing_threshold,     uint, S_IRUGO);
 module_param(uvm_perf_thrashing_pin_threshold, uint, S_IRUGO);
 module_param(uvm_perf_thrashing_lapse_usec,    uint, S_IRUGO);
+module_param(uvm_perf_lapse_vote_threshold,    uint, S_IRUGO);
 module_param(uvm_perf_thrashing_nap,           uint, S_IRUGO);
 module_param(uvm_perf_thrashing_epoch,         uint, S_IRUGO);
 module_param(uvm_perf_thrashing_pin,           uint, S_IRUGO);
@@ -324,6 +336,7 @@ static bool g_uvm_perf_thrashing_enable;
 static unsigned g_uvm_perf_thrashing_threshold;
 static unsigned g_uvm_perf_thrashing_pin_threshold;
 static NvU64 g_uvm_perf_thrashing_lapse_usec;
+static unsigned g_uvm_perf_lapse_vote_threshold;
 static NvU64 g_uvm_perf_thrashing_nap;
 static NvU64 g_uvm_perf_thrashing_epoch;
 static NvU64 g_uvm_perf_thrashing_pin;
@@ -1607,6 +1620,29 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
    return hint;
 }

+static void adjust_thrashing_lapse(va_space_thrashing_info_t *ti, NvU64 lapse)
+{
+    // If lapse is non-default, i.e. provided by user explicitly, don't adjust it
+    if (g_uvm_perf_thrashing_lapse_usec != UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT)
+        return;
+
+    // Update statistics without if-else conditionals.
+    ti->params.lapse_stat += 2 * !(lapse < ti->params.lapse_ns) - 1;
+
+    // Voting capped exponential adjustment
+    if (ti->params.lapse_stat >= g_uvm_perf_lapse_vote_threshold &&
+        ti->params.lapse_ns < UVM_PERF_THRASHING_LAPSE_USEC_MAX * 1000)
+        ti->params.lapse_ns += min(ti->params.lapse_ns / 8, UVM_PERF_THRASHING_LAPSE_USEC_MAX / 10 * 1000);
+    else
+    if (-ti->params.lapse_stat <= -(int)g_uvm_perf_lapse_vote_threshold &&
+        ti->params.lapse_ns > UVM_PERF_THRASHING_LAPSE_USEC_MIN * 1000)
+        ti->params.lapse_ns -= max(ti->params.lapse_ns / 8, UVM_PERF_THRASHING_LAPSE_USEC_MIN * 1000);
+    else
+        return;
+
+    ti->params.lapse_stat /= 2;
+}
+
 // Function called on fault that tells the fault handler if any operation
 // should be performed to minimize thrashing. The logic is as follows:
 //
@@ -1710,6 +1746,8 @@ uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,

    last_time_stamp = page_thrashing_get_time_stamp(page_thrashing);

+    adjust_thrashing_lapse(va_space_thrashing, time_stamp - last_time_stamp);
+
    // If the lapse since the last thrashing event is longer than a thrashing
    // lapse we are no longer thrashing
    if (time_stamp - last_time_stamp > va_space_thrashing->params.lapse_ns &&
@@ -2012,6 +2050,8 @@ NV_STATUS uvm_perf_thrashing_init(void)

    INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT);

+    INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_lapse_vote_threshold, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT);
+
    INIT_THRASHING_PARAMETER_NONZERO_MAX(uvm_perf_thrashing_nap,
                                         UVM_PERF_THRASHING_NAP_DEFAULT,
                                         UVM_PERF_THRASHING_NAP_MAX);
--- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c
@@ -98,14 +98,16 @@
 //
 // When a memory allocation from PMA fails and eviction is requested, PMM will
 // check whether it can evict any user memory chunks to satisfy the request.
-// All allocated user memory root chunks are tracked in an LRU list
-// (root_chunks.va_block_used). A root chunk is moved to the tail of that list
-// whenever any of its subchunks is allocated (unpinned) by a VA block (see
-// uvm_pmm_gpu_unpin_allocated()). When a root chunk is selected for eviction,
-// it has the eviction flag set (see pick_root_chunk_to_evict()). This flag
-// affects many of the PMM operations on all of the subchunks of the root chunk
-// being evicted. See usage of (root_)chunk_is_in_eviction(), in particular in
-// chunk_free_locked() and claim_free_chunk().
+// All allocated user memory root chunks are tracked in one of several LRU lists
+// (root_chunks.alloc_list[n]). The list used depends on the state of the chunk
+// (see uvm_pmm_alloc_list_t). A root chunk is moved to the tail of the used
+// list (UVM_PMM_ALLOC_LIST_USED) whenever any of its subchunks is allocated
+// (unpinned) by a VA block (see uvm_pmm_gpu_unpin_allocated()). When a root
+// chunk is selected for eviction, it has the eviction flag set
+// (see pick_root_chunk_to_evict()). This flag affects many of the PMM
+// operations on all of the subchunks of the root chunk being evicted. See usage
+// of (root_)chunk_is_in_eviction(), in particular in chunk_free_locked() and
+// claim_free_chunk().
 //
 // To evict a root chunk, all of its free subchunks are pinned, then all
 // resident pages backed by it are moved to the CPU one VA block at a time.
@@ -645,7 +647,7 @@ static void chunk_update_lists_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk
        else if (root_chunk->chunk.state != UVM_PMM_GPU_CHUNK_STATE_FREE) {
            UVM_ASSERT(root_chunk->chunk.state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT ||
                       root_chunk->chunk.state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
-            list_move_tail(&root_chunk->chunk.list, &pmm->root_chunks.va_block_used);
+            list_move_tail(&root_chunk->chunk.list, &pmm->root_chunks.alloc_list[UVM_PMM_ALLOC_LIST_USED]);
        }
    }

@@ -663,7 +665,6 @@ void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm
    UVM_ASSERT(list_empty(&chunk->list));
    UVM_ASSERT(va_block);
    UVM_ASSERT(chunk->va_block == va_block);
-    UVM_ASSERT(chunk->va_block_page_index < uvm_va_block_num_cpu_pages(va_block));

    uvm_spin_lock(&pmm->list_lock);

@@ -757,12 +758,8 @@ static bool assert_chunk_mergeable(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
        UVM_ASSERT(child->state == first_child->state);

        if ((first_child->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) && uvm_gpu_chunk_is_user(first_child)) {
-            uvm_gpu_chunk_t *prev_child = chunk->suballoc->subchunks[i-1];
-
            UVM_ASSERT(child->va_block == child_va_block);
-            UVM_ASSERT(child->va_block_page_index ==
-                       prev_child->va_block_page_index + uvm_gpu_chunk_get_size(prev_child) / PAGE_SIZE);
-            UVM_ASSERT(child->is_referenced == prev_child->is_referenced);
+            UVM_ASSERT(child->is_referenced == first_child->is_referenced);
        }
    }

@@ -798,7 +795,6 @@ static void merge_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
        UVM_ASSERT(subchunk->va_block);

        chunk->va_block = subchunk->va_block;
-        chunk->va_block_page_index = subchunk->va_block_page_index;
        chunk->is_referenced = subchunk->is_referenced;
    }
    else if (child_state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
@@ -1198,7 +1194,6 @@ void uvm_pmm_gpu_mark_chunk_evicted(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
    UVM_ASSERT(chunk->va_block);

    chunk->va_block = NULL;
-    chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
    chunk_pin(pmm, chunk);

    uvm_spin_unlock(&pmm->list_lock);
@@ -1412,7 +1407,7 @@ static void chunk_start_eviction(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
    uvm_gpu_chunk_set_in_eviction(chunk, true);
 }

-static void root_chunk_update_eviction_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, struct list_head *list)
+static void root_chunk_update_eviction_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_pmm_alloc_list_t alloc_list)
 {
    uvm_spin_lock(&pmm->list_lock);

@@ -1426,7 +1421,7 @@ static void root_chunk_update_eviction_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t
        // eviction lists.
        UVM_ASSERT(!list_empty(&chunk->list));

-        list_move_tail(&chunk->list, list);
+        list_move_tail(&chunk->list, &pmm->root_chunks.alloc_list[alloc_list]);
    }

    uvm_spin_unlock(&pmm->list_lock);
@@ -1434,17 +1429,49 @@ static void root_chunk_update_eviction_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t

 void uvm_pmm_gpu_mark_root_chunk_used(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
 {
-    root_chunk_update_eviction_list(pmm, chunk, &pmm->root_chunks.va_block_used);
+    root_chunk_update_eviction_list(pmm, chunk, UVM_PMM_ALLOC_LIST_USED);
 }

 void uvm_pmm_gpu_mark_root_chunk_unused(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
 {
-    root_chunk_update_eviction_list(pmm, chunk, &pmm->root_chunks.va_block_unused);
+    root_chunk_update_eviction_list(pmm, chunk, UVM_PMM_ALLOC_LIST_UNUSED);
 }

 void uvm_pmm_gpu_mark_root_chunk_discarded(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
 {
-    root_chunk_update_eviction_list(pmm, chunk, &pmm->root_chunks.va_block_discarded);
+    root_chunk_update_eviction_list(pmm, chunk, UVM_PMM_ALLOC_LIST_DISCARDED);
+}
+
+static uvm_pmm_alloc_list_t get_alloc_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
+{
+    uvm_pmm_alloc_list_t alloc_list;
+
+    uvm_assert_spinlock_locked(&pmm->list_lock);
+
+    for (alloc_list = 0; alloc_list < UVM_PMM_ALLOC_LIST_COUNT; alloc_list++) {
+        uvm_gpu_chunk_t *entry;
+        list_for_each_entry(entry, &pmm->root_chunks.alloc_list[alloc_list], list) {
+            if (entry == chunk)
+                return alloc_list;
+        }
+    }
+
+    return UVM_PMM_ALLOC_LIST_COUNT;
+}
+
+static uvm_gpu_chunk_t *get_first_allocated_chunk(uvm_pmm_gpu_t *pmm)
+{
+    uvm_pmm_alloc_list_t alloc_list;
+
+    uvm_assert_spinlock_locked(&pmm->list_lock);
+
+    for (alloc_list = 0; alloc_list < UVM_PMM_ALLOC_LIST_COUNT; alloc_list++) {
+        uvm_gpu_chunk_t *chunk = list_first_chunk(&pmm->root_chunks.alloc_list[alloc_list]);
+        if (chunk)
+            return chunk;
+    }
+
+    return NULL;
 }

 static uvm_gpu_root_chunk_t *pick_root_chunk_to_evict(uvm_pmm_gpu_t *pmm)
@@ -1471,19 +1498,10 @@ static uvm_gpu_root_chunk_t *pick_root_chunk_to_evict(uvm_pmm_gpu_t *pmm)
            UVM_ASSERT(chunk->is_zero);
    }

-    if (!chunk)
-        chunk = list_first_chunk(&pmm->root_chunks.va_block_unused);
-
-    if (!chunk) {
-        // Discarded pages are chosen to be evicted after unused pages,
-        // as we expect some of them to get reverted to used pages.
-        chunk = list_first_chunk(&pmm->root_chunks.va_block_discarded);
-    }
-
    // TODO: Bug 1765193: Move the chunks to the tail of the used list whenever
    // they get mapped.
    if (!chunk)
-        chunk = list_first_chunk(&pmm->root_chunks.va_block_used);
+        chunk = get_first_allocated_chunk(pmm);

    if (chunk)
        chunk_start_eviction(pmm, chunk);
@@ -1492,6 +1510,7 @@ static uvm_gpu_root_chunk_t *pick_root_chunk_to_evict(uvm_pmm_gpu_t *pmm)

    if (chunk)
        return root_chunk_from_chunk(pmm, chunk);
+
    return NULL;
 }

@@ -1867,7 +1886,6 @@ static void init_root_chunk(uvm_pmm_gpu_t *pmm,
    UVM_ASSERT(!chunk->parent);
    UVM_ASSERT(!chunk->suballoc);
    UVM_ASSERT(!chunk->va_block);
-    UVM_ASSERT(chunk->va_block_page_index == PAGES_PER_UVM_VA_BLOCK);
    UVM_ASSERT(list_empty(&chunk->list));
    UVM_ASSERT(uvm_gpu_chunk_get_size(chunk) == UVM_CHUNK_SIZE_MAX);
    UVM_ASSERT(!root_chunk_has_elevated_page(pmm, root_chunk));
@@ -2109,7 +2127,6 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
        subchunk->type = chunk->type;
        uvm_gpu_chunk_set_size(subchunk, subchunk_size);
        subchunk->parent = chunk;
-        subchunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
        subchunk->is_zero = chunk->is_zero;
        INIT_LIST_HEAD(&subchunk->list);

@@ -2121,7 +2138,6 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
            uvm_assert_mutex_locked(&chunk->va_block->lock);

            subchunk->va_block = chunk->va_block;
-            subchunk->va_block_page_index = chunk->va_block_page_index + (i * subchunk_size) / PAGE_SIZE;
            subchunk->is_referenced = chunk->is_referenced;
        }
        else if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
@@ -2140,7 +2156,6 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)

    if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
        chunk->va_block = NULL;
-        chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
        chunk->is_referenced = false;
    }
    else if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
@@ -2154,7 +2169,6 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
            root_chunk->chunk.suballoc->pinned_leaf_chunks += 1;

        chunk->va_block = NULL;
-        chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
        chunk->is_referenced = false;
    }

@@ -2243,7 +2257,6 @@ static void chunk_free_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
            UVM_ASSERT(chunk->va_block);
            UVM_ASSERT(list_empty(&chunk->list));
            chunk->va_block = NULL;
-            chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
            chunk->is_zero = false;
            chunk_pin(pmm, chunk);
        }
@@ -2261,7 +2274,6 @@ static void chunk_free_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
    }

    chunk->va_block = NULL;
-    chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
    chunk->is_zero = false;

    if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED)
@@ -2913,134 +2925,6 @@ cleanup:
    return status;
 }

-typedef struct
-{
-    // Start/end of the physical region to be traversed (IN)
-    NvU64 phys_start;
-    NvU64 phys_end;
-
-    // Pointer to the array of mappins where to store results (OUT)
-    uvm_reverse_map_t *mappings;
-
-    // Number of entries written to mappings (OUT)
-    NvU32 num_mappings;
-} get_chunk_mappings_data_t;
-
-// Chunk traversal function used for phys-to-virt translation. These are the
-// possible return values.
-//
-// - NV_ERR_OUT_OF_RANGE: no allocated physical chunks were found
-// - NV_ERR_MORE_DATA_AVAILABLE: allocated physical chunks were found
-// - NV_OK: allocated physical chunks may have been found. Check num_mappings
-static NV_STATUS get_chunk_mappings_in_range(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, void *data)
-{
-    uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
-    get_chunk_mappings_data_t *get_chunk_mappings_data = (get_chunk_mappings_data_t *)data;
-    NvU64 chunk_end = chunk->address + uvm_gpu_chunk_get_size(chunk) - 1;
-
-    uvm_assert_mutex_locked(&pmm->lock);
-
-    // Kernel chunks do not have assigned VA blocks so we can just skip them
-    if (chunk->type == UVM_PMM_GPU_MEMORY_TYPE_KERNEL)
-        return NV_WARN_NOTHING_TO_DO;
-
-    // This chunk is located before the requested physical range. Skip its
-    // children and keep going
-    if (chunk_end < get_chunk_mappings_data->phys_start)
-        return NV_WARN_NOTHING_TO_DO;
-
-    // We are beyond the search phys range. Stop traversing.
-    if (chunk->address > get_chunk_mappings_data->phys_end) {
-        if (get_chunk_mappings_data->num_mappings > 0)
-            return NV_ERR_MORE_DATA_AVAILABLE;
-        else
-            return NV_ERR_OUT_OF_RANGE;
-    }
-
-    uvm_spin_lock(&pmm->list_lock);
-
-    // Return results for allocated leaf chunks, only
-    if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
-        uvm_reverse_map_t *reverse_map;
-
-        UVM_ASSERT(chunk->va_block);
-        uvm_va_block_retain(chunk->va_block);
-
-        reverse_map = &get_chunk_mappings_data->mappings[get_chunk_mappings_data->num_mappings];
-
-        reverse_map->va_block = chunk->va_block;
-        reverse_map->region = uvm_va_block_region(chunk->va_block_page_index,
-                                                  chunk->va_block_page_index + uvm_gpu_chunk_get_size(chunk) / PAGE_SIZE);
-        reverse_map->owner = gpu->id;
-
-        // If we land in the middle of a chunk, adjust the offset
-        if (get_chunk_mappings_data->phys_start > chunk->address) {
-            NvU64 offset = get_chunk_mappings_data->phys_start - chunk->address;
-
-            reverse_map->region.first += offset / PAGE_SIZE;
-        }
-
-        // If the physical range doesn't cover the whole chunk, adjust num_pages
-        if (get_chunk_mappings_data->phys_end < chunk_end)
-            reverse_map->region.outer -= (chunk_end - get_chunk_mappings_data->phys_end) / PAGE_SIZE;
-
-        ++get_chunk_mappings_data->num_mappings;
-    }
-
-    uvm_spin_unlock(&pmm->list_lock);
-
-    return NV_OK;
-}
-
-NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region_size, uvm_reverse_map_t *out_mappings)
-{
-    NvU64 chunk_base_addr = UVM_ALIGN_DOWN(phys_addr, UVM_CHUNK_SIZE_MAX);
-    NvU64 size_in_chunk = min(UVM_CHUNK_SIZE_MAX - (phys_addr - chunk_base_addr), region_size);
-    NvU32 num_mappings = 0;
-
-    UVM_ASSERT(PAGE_ALIGNED(phys_addr));
-    UVM_ASSERT(PAGE_ALIGNED(region_size));
-
-    uvm_mutex_lock(&pmm->lock);
-
-    // Traverse the whole requested region
-    do {
-        NV_STATUS status = NV_OK;
-        uvm_gpu_root_chunk_t *root_chunk = root_chunk_from_address(pmm, phys_addr);
-        uvm_gpu_chunk_t *chunk = &root_chunk->chunk;
-        get_chunk_mappings_data_t get_chunk_mappings_data;
-
-        get_chunk_mappings_data.phys_start = phys_addr;
-        get_chunk_mappings_data.phys_end = phys_addr + size_in_chunk - 1;
-        get_chunk_mappings_data.mappings = out_mappings + num_mappings;
-        get_chunk_mappings_data.num_mappings = 0;
-
-        // Walk the chunks for the current root chunk
-        status = chunk_walk_pre_order(pmm,
-                                      chunk,
-                                      get_chunk_mappings_in_range,
-                                      &get_chunk_mappings_data);
-        if (status == NV_ERR_OUT_OF_RANGE)
-            break;
-
-        if (get_chunk_mappings_data.num_mappings > 0) {
-            UVM_ASSERT(status == NV_OK || status == NV_ERR_MORE_DATA_AVAILABLE);
-            num_mappings += get_chunk_mappings_data.num_mappings;
-        }
-        else {
-            UVM_ASSERT(status == NV_OK);
-        }
-
-        region_size -= size_in_chunk;
-        phys_addr += size_in_chunk;
-        size_in_chunk = min((NvU64)UVM_CHUNK_SIZE_MAX, region_size);
-    } while (region_size > 0);
-
-    uvm_mutex_unlock(&pmm->lock);
-
-    return num_mappings;
-}
-
 #if UVM_IS_CONFIG_HMM()

 uvm_gpu_chunk_t *uvm_pmm_devmem_page_to_chunk(struct page *page)
@@ -3139,7 +3023,6 @@ static void devmem_page_free(struct page *page)
    UVM_ASSERT(chunk->is_referenced);

    chunk->va_block = NULL;
-    chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
    chunk->is_referenced = false;

    if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
@@ -3477,12 +3360,10 @@ void uvm_pmm_gpu_device_p2p_init(uvm_parent_gpu_t *parent_gpu)

 void uvm_pmm_gpu_device_p2p_deinit(uvm_parent_gpu_t *parent_gpu)
 {
-    unsigned long pci_start_pfn = pci_resource_start(parent_gpu->pci_dev,
-                                                     uvm_device_p2p_static_bar(parent_gpu)) >> PAGE_SHIFT;
-    struct page *p2p_page;
-
    if (parent_gpu->device_p2p_initialised && !uvm_parent_gpu_is_coherent(parent_gpu)) {
-        p2p_page = pfn_to_page(pci_start_pfn);
+        struct page *p2p_page = pfn_to_page(pci_resource_start(parent_gpu->pci_dev,
+                                            uvm_device_p2p_static_bar(parent_gpu)) >> PAGE_SHIFT);
+
        devm_memunmap_pages(&parent_gpu->pci_dev->dev, page_pgmap(p2p_page));
    }

@@ -3546,6 +3427,7 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
        { 0, uvm_mem_kernel_chunk_sizes(gpu)},
    };
    NV_STATUS status = NV_OK;
+    uvm_pmm_alloc_list_t alloc_list;
    size_t i, j, k;

    // UVM_CHUNK_SIZE_INVALID is UVM_CHUNK_SIZE_MAX shifted left by 1. This
@@ -3560,10 +3442,11 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
                INIT_LIST_HEAD(&pmm->free_list[i][j][k]);
        }
    }
-    INIT_LIST_HEAD(&pmm->root_chunks.va_block_used);
-    INIT_LIST_HEAD(&pmm->root_chunks.va_block_unused);
+
+    for (alloc_list = 0; alloc_list < UVM_PMM_ALLOC_LIST_COUNT; alloc_list++)
+        INIT_LIST_HEAD(&pmm->root_chunks.alloc_list[alloc_list]);
+
    INIT_LIST_HEAD(&pmm->root_chunks.va_block_lazy_free);
-    INIT_LIST_HEAD(&pmm->root_chunks.va_block_discarded);
    nv_kthread_q_item_init(&pmm->root_chunks.va_block_lazy_free_q_item, process_lazy_free_entry, pmm);

    uvm_mutex_init(&pmm->lock, UVM_LOCK_ORDER_PMM);
@@ -3620,7 +3503,6 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
        chunk->state = UVM_PMM_GPU_CHUNK_STATE_PMA_OWNED;
        uvm_gpu_chunk_set_size(chunk, UVM_CHUNK_SIZE_MAX);
        chunk->address = i * UVM_CHUNK_SIZE_MAX;
-        chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
    }

    status = uvm_bit_locks_init(&pmm->root_chunks.bitlocks, pmm->root_chunks.count, UVM_LOCK_ORDER_PMM_ROOT_CHUNK);
@@ -4073,3 +3955,55 @@ NV_STATUS uvm_test_pmm_query_pma_stats(UVM_TEST_PMM_QUERY_PMA_STATS_PARAMS *para
    uvm_gpu_release(gpu);
    return NV_OK;
 }
+
+NV_STATUS uvm_test_pmm_get_alloc_list(UVM_TEST_PMM_GET_ALLOC_LIST_PARAMS *params, struct file *filp)
+{
+    uvm_va_space_t *va_space = uvm_va_space_get(filp);
+    uvm_gpu_t *gpu;
+    uvm_va_block_t *va_block;
+    uvm_gpu_chunk_t *chunk;
+    NV_STATUS status = NV_OK;
+
+    // -Wall implies -Wenum-compare, so cast through int to avoid warnings
+    BUILD_BUG_ON((int)UVM_TEST_PMM_ALLOC_LIST_UNUSED    != (int)UVM_PMM_ALLOC_LIST_UNUSED);
+    BUILD_BUG_ON((int)UVM_TEST_PMM_ALLOC_LIST_DISCARDED != (int)UVM_PMM_ALLOC_LIST_DISCARDED);
+    BUILD_BUG_ON((int)UVM_TEST_PMM_ALLOC_LIST_USED      != (int)UVM_PMM_ALLOC_LIST_USED);
+    BUILD_BUG_ON((int)UVM_TEST_PMM_ALLOC_LIST_COUNT     != (int)UVM_PMM_ALLOC_LIST_COUNT);
+
+    uvm_va_space_down_read(va_space);
+
+    gpu = uvm_va_space_get_gpu_by_uuid(va_space, &params->gpu_uuid);
+    if (!gpu) {
+        status = NV_ERR_INVALID_DEVICE;
+        goto out;
+    }
+
+    status = uvm_va_block_find(va_space, params->address, &va_block);
+    if (status != NV_OK)
+        goto out;
+
+    // No chunk or chunk not on an alloc list
+    status = NV_ERR_INVALID_STATE;
+
+    uvm_mutex_lock(&va_block->lock);
+
+    chunk = uvm_va_block_lookup_gpu_chunk(va_block, gpu, params->address);
+    if (chunk) {
+        uvm_pmm_alloc_list_t alloc_list;
+
+        uvm_spin_lock(&gpu->pmm.list_lock);
+        alloc_list = get_alloc_list(&gpu->pmm, chunk);
+        uvm_spin_unlock(&gpu->pmm.list_lock);
+
+        if (alloc_list != UVM_PMM_ALLOC_LIST_COUNT) {
+            params->list_type = alloc_list;
+            status = NV_OK;
+        }
+    }
+
+    uvm_mutex_unlock(&va_block->lock);
+
+out:
+    uvm_va_space_up_read(va_space);
+    return status;
+}
--- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.h
@@ -174,6 +174,34 @@ static void uvm_pmm_list_zero_checks(void)
    BUILD_BUG_ON(UVM_PMM_LIST_ZERO_COUNT > 2);
 }

+// Lists for allocated root chunks. When picking a root chunk to evict, lists
+// with lower numerical order are checked first.
+typedef enum
+{
+    // Root chunks unused by VA blocks, i.e. allocated, but not holding any
+    // resident pages. These take priority when evicting as no data needs to be
+    // migrated for them to be evicted.
+    //
+    // For simplicity, the list is approximate, tracking unused chunks only from
+    // root chunk sized (2M) VA blocks.
+    //
+    // Updated by the VA block code with uvm_pmm_gpu_mark_root_chunk_(un)used().
+    UVM_PMM_ALLOC_LIST_UNUSED,
+
+    // Discarded root GPU chunks, which are still resident on the GPU. Chunks on
+    // this list are evicted with a lower priority than unused chunks because we
+    // expect some of them to get reverted to used pages.
+    //
+    // Updated by the VA block code with
+    // uvm_pmm_gpu_mark_root_chunk_discarded().
+    UVM_PMM_ALLOC_LIST_DISCARDED,
+
+    // Root chunks used by VA blocks, likely with resident pages.
+    UVM_PMM_ALLOC_LIST_USED,
+
+    UVM_PMM_ALLOC_LIST_COUNT
+} uvm_pmm_alloc_list_t;
+
 // Maximum chunk sizes per type of allocation in single GPU.
 // The worst case today is Maxwell with 4 allocations sizes for page tables and
 // 2 page sizes used by uvm_mem_t. Notably one of the allocations for page
@@ -277,7 +305,7 @@ struct uvm_gpu_chunk_struct

        size_t log2_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE);

-        // Start page index within va_block
+        // Start page index within va_block.
        uvm_page_index_t va_block_page_index : order_base_2(PAGES_PER_UVM_VA_BLOCK + 1);

        // This allows determining what PMM owns the chunk. Users of this field
@@ -348,30 +376,12 @@ typedef struct uvm_pmm_gpu_struct
        // Bit locks for the root chunks with 1 bit per each root chunk
        uvm_bit_locks_t bitlocks;

-        // List of root chunks unused by VA blocks, i.e. allocated, but not
-        // holding any resident pages. These take priority when evicting as no
-        // data needs to be migrated for them to be evicted.
-        //
-        // For simplicity, the list is approximate, tracking unused chunks only
-        // from root chunk sized (2M) VA blocks.
-        //
-        // Updated by the VA block code with
-        // uvm_pmm_gpu_mark_root_chunk_(un)used().
-        struct list_head va_block_unused;
-
-        // List of discarded root GPU chunks, which are still mapped on the GPU.
-        // Chunks on this list are evicted with a lower priority than unused chunks.
-        //
-        // Updated by the VA block code with
-        // uvm_pmm_gpu_mark_root_chunk_discarded().
-        struct list_head va_block_discarded;
-
-        // List of root chunks used by VA blocks
-        struct list_head va_block_used;
+        // LRU lists for picking which root chunks to evict
+        struct list_head alloc_list[UVM_PMM_ALLOC_LIST_COUNT];

        // List of chunks needing to be lazily freed and a queue for processing
-        // the list. TODO: Bug 3881835: revisit whether to use nv_kthread_q_t
-        // or workqueue.
+        // the list. TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or
+        // workqueue.
        struct list_head va_block_lazy_free;
        nv_kthread_q_item_t va_block_lazy_free_q_item;
    } root_chunks;
@@ -612,21 +622,6 @@ static uvm_chunk_size_t uvm_chunk_find_prev_size(uvm_chunk_sizes_mask_t chunk_si
    return (uvm_chunk_size_t)1 << __fls(chunk_sizes);
 }

-// Obtain the {va_block, virt_addr} information for the chunks in the given
-// [phys_addr:phys_addr + region_size) range. One entry per chunk is returned.
-// phys_addr and region_size must be page-aligned.
-//
-// Valid translations are written to out_mappings sequentially (there are no
-// gaps). The caller is required to provide enough entries in out_pages for the
-// whole region. The function returns the number of entries written to
-// out_mappings.
-//
-// The returned reverse map is a snapshot: it is stale as soon as it is
-// returned, and the caller is responsible for locking the VA block(s) and
-// checking that the chunks are still there. Also, the VA block(s) are
-// retained, and it's up to the caller to release them.
-NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region_size, uvm_reverse_map_t *out_mappings);
-
 // Iterates over every size in the input mask from smallest to largest
 #define for_each_chunk_size(__size, __chunk_sizes)                                  \
    for ((__size) = (__chunk_sizes) ? uvm_chunk_find_first_size(__chunk_sizes) :    \
@@ -653,4 +648,6 @@ NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region
    for (; (__size) != UVM_CHUNK_SIZE_INVALID;                          \
         (__size) = uvm_chunk_find_prev_size((__chunk_sizes), (__size)))

+NV_STATUS uvm_test_pmm_get_alloc_list(UVM_TEST_PMM_GET_ALLOC_LIST_PARAMS *params, struct file *filp);
+
 #endif
--- a/kernel-open/nvidia-uvm/uvm_pmm_test.c
+++ b/kernel-open/nvidia-uvm/uvm_pmm_test.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -1009,166 +1009,6 @@ NV_STATUS uvm_test_pmm_async_alloc(UVM_TEST_PMM_ASYNC_ALLOC_PARAMS *params, stru
    return status;
 }

-static uvm_reverse_map_t g_reverse_map_entries[PAGES_PER_UVM_VA_BLOCK * 4];
-
-static NV_STATUS test_pmm_reverse_map_single(uvm_gpu_t *gpu, uvm_va_space_t *va_space, NvU64 addr)
-{
-    NV_STATUS status = NV_OK;
-    NvU32 num_translations;
-    uvm_va_block_t *va_block;
-    uvm_gpu_phys_address_t phys_addr;
-    bool is_resident;
-
-    status = uvm_va_block_find(va_space, addr, &va_block);
-    if (status != NV_OK)
-        return status;
-
-    TEST_CHECK_RET(uvm_va_block_size(va_block) == UVM_VA_BLOCK_SIZE);
-
-    // Verify that all pages are populated on the GPU
-    uvm_mutex_lock(&va_block->lock);
-
-    is_resident = uvm_processor_mask_test(&va_block->resident, gpu->id) &&
-                  uvm_page_mask_full(uvm_va_block_resident_mask_get(va_block, gpu->id, NUMA_NO_NODE));
-    if (is_resident)
-        phys_addr = uvm_va_block_gpu_phys_page_address(va_block, 0, gpu);
-
-    uvm_mutex_unlock(&va_block->lock);
-
-    TEST_CHECK_RET(is_resident);
-
-    // In this test a single VA range covers the whole 2MB physical region. We
-    // expect a single translation to be returned for a 2MB chunk.
-    num_translations = uvm_pmm_gpu_phys_to_virt(&gpu->pmm, phys_addr.address, UVM_VA_BLOCK_SIZE, g_reverse_map_entries);
-    TEST_CHECK_RET(num_translations == 1);
-    TEST_CHECK_RET(g_reverse_map_entries[0].va_block == va_block);
-    TEST_CHECK_RET(g_reverse_map_entries[0].region.first == 0);
-    TEST_CHECK_RET(uvm_va_block_region_num_pages(g_reverse_map_entries[0].region) == uvm_va_block_num_cpu_pages(va_block));
-
-    uvm_va_block_release(va_block);
-
-    return NV_OK;
-}
-
-static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t *va_space, NvU64 addr, NvU64 size)
-{
-    uvm_va_range_managed_t *managed_range;
-    uvm_va_block_t *va_block = NULL;
-    uvm_va_block_context_t *va_block_context = NULL;
-    NvU32 num_blocks;
-    NvU32 index = 0;
-    uvm_gpu_phys_address_t phys_addr = {0};
-    bool is_resident;
-
-    // In this test, the [addr:addr + size) VA region contains
-    // several managed ranges with different sizes.
-
-    // Find the first block to compute the base physical address of the root
-    // chunk
-    uvm_for_each_va_range_managed_in(managed_range, va_space, addr, addr + size - 1) {
-        va_block = uvm_va_range_block(managed_range, 0);
-        if (va_block)
-            break;
-    }
-    TEST_CHECK_RET(va_block);
-
-    va_block_context = uvm_va_block_context_alloc(NULL);
-    TEST_CHECK_RET(va_block_context);
-
-    uvm_mutex_lock(&va_block->lock);
-
-    is_resident = uvm_id_equal(uvm_va_block_page_get_closest_resident(va_block, va_block_context, 0, gpu->id), gpu->id);
-    if (is_resident) {
-        phys_addr = uvm_va_block_gpu_phys_page_address(va_block, 0, gpu);
-        phys_addr.address = UVM_ALIGN_DOWN(phys_addr.address, UVM_VA_BLOCK_SIZE);
-    }
-
-    uvm_mutex_unlock(&va_block->lock);
-
-    uvm_va_block_context_free(va_block_context);
-
-    TEST_CHECK_RET(is_resident);
-
-    // Perform the lookup for the whole root chunk
-    num_blocks = uvm_pmm_gpu_phys_to_virt(&gpu->pmm, phys_addr.address, size, g_reverse_map_entries);
-    TEST_CHECK_RET(num_blocks != 0);
-
-    // Iterate over all managed ranges and their VA blocks within the 2MB VA
-    // region. Some blocks are not populated. However, we assume that blocks
-    // have been populated in order so they have been assigned physical
-    // addresses incrementally. Therefore, the reverse translations will show
-    // them in order.
-    uvm_for_each_va_range_managed_in(managed_range, va_space, addr, addr + size - 1) {
-        for_each_va_block_in_va_range(managed_range, va_block) {
-            NvU32 num_va_block_pages = 0;
-
-            // Iterate over all the translations for the current VA block. One
-            // translation per chunk is returned. We compute the total number of
-            // pages covered in the translations to check that they match with
-            // the number of pages in the VA block.
-            while (g_reverse_map_entries[index].va_block == va_block) {
-                uvm_reverse_map_t *reverse_mapping;
-
-                reverse_mapping = &g_reverse_map_entries[index];
-
-                uvm_va_block_release(va_block);
-                num_va_block_pages += uvm_va_block_region_num_pages(reverse_mapping->region);
-                UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_start(reverse_mapping)));
-                UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_end(reverse_mapping)));
-
-                uvm_mutex_lock(&va_block->lock);
-
-                // Verify that all pages are populated on the GPU
-                is_resident = uvm_page_mask_region_full(uvm_va_block_resident_mask_get(va_block, gpu->id, NUMA_NO_NODE),
-                                                        reverse_mapping->region);
-
-                uvm_mutex_unlock(&va_block->lock);
-
-                TEST_CHECK_RET(is_resident);
-
-                ++index;
-            }
-
-            if (num_va_block_pages)
-                TEST_CHECK_RET(num_va_block_pages == uvm_va_block_num_cpu_pages(va_block));
-        }
-    }
-    TEST_CHECK_RET(index == num_blocks);
-
-    return NV_OK;
-}
-
-NV_STATUS uvm_test_pmm_reverse_map(UVM_TEST_PMM_REVERSE_MAP_PARAMS *params, struct file *filp)
-{
-    NV_STATUS status;
-    uvm_gpu_t *gpu;
-    uvm_va_space_t *va_space;
-
-    va_space = uvm_va_space_get(filp);
-
-    // Take the global lock to void interferences from different instances of
-    // the test, since we use global variables
-    uvm_mutex_lock(&g_uvm_global.global_lock);
-    uvm_va_space_down_write(va_space);
-
-    gpu = uvm_va_space_get_gpu_by_uuid(va_space, &params->gpu_uuid);
-    if (!gpu || !uvm_processor_mask_test(&va_space->registered_gpus, gpu->id)) {
-        status = NV_ERR_INVALID_DEVICE;
-        goto exit_unlock;
-    }
-
-    status = test_pmm_reverse_map_single(gpu, va_space, params->range_address1);
-
-    if (status == NV_OK)
-        status = test_pmm_reverse_map_many_blocks(gpu, va_space, params->range_address2, params->range_size2);
-
-exit_unlock:
-    uvm_va_space_up_write(va_space);
-    uvm_mutex_unlock(&g_uvm_global.global_lock);
-
-    return status;
-}
-
 static NV_STATUS test_chunk_with_elevated_page(uvm_gpu_t *gpu)
 {
    uvm_pmm_gpu_t *pmm = &gpu->pmm;
--- a/kernel-open/nvidia-uvm/uvm_procfs.c
+++ b/kernel-open/nvidia-uvm/uvm_procfs.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2018 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -35,17 +35,22 @@
 #if defined(CONFIG_PROC_FS)
  // This parameter enables additional debug procfs entries. It's enabled by
  // default for debug and develop builds and disabled for release builds.
-  int uvm_enable_debug_procfs = UVM_IS_DEBUG() || UVM_IS_DEVELOP();
+  static int uvm_enable_debug_procfs = UVM_IS_DEBUG() || UVM_IS_DEVELOP();
  module_param(uvm_enable_debug_procfs, int, S_IRUGO);
  MODULE_PARM_DESC(uvm_enable_debug_procfs, "Enable debug procfs entries in /proc/" UVM_PROC_DIR_NAME);
 #else
-  int uvm_enable_debug_procfs = 0;
+  static int uvm_enable_debug_procfs = 0;
 #endif

 static struct proc_dir_entry *uvm_proc_dir;
 static struct proc_dir_entry *uvm_proc_gpus;
 static struct proc_dir_entry *uvm_proc_cpu;

+bool uvm_procfs_is_debug_enabled(void)
+{
+    return uvm_enable_debug_procfs != 0;
+}
+
 NV_STATUS uvm_procfs_init(void)
 {
    if (!uvm_procfs_is_enabled())
@@ -80,4 +85,3 @@ struct proc_dir_entry *uvm_procfs_get_cpu_base_dir(void)
 {
    return uvm_proc_cpu;
 }
-
--- a/kernel-open/nvidia-uvm/uvm_procfs.h
+++ b/kernel-open/nvidia-uvm/uvm_procfs.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2019 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -45,10 +45,7 @@ static bool uvm_procfs_is_enabled(void)

 // Is debug procfs enabled? This indicates that debug procfs files should be
 // created.
-static bool uvm_procfs_is_debug_enabled(void)
-{
-    return uvm_enable_debug_procfs != 0;
-}
+bool uvm_procfs_is_debug_enabled(void);

 struct proc_dir_entry *uvm_procfs_get_gpu_base_dir(void);
 struct proc_dir_entry *uvm_procfs_get_cpu_base_dir(void);
@@ -71,8 +68,8 @@ void uvm_procfs_close_callback(void);

 // Defer PM lock acquisition until the respective read() callback
 // is invoked, to ensure the lock is acquired and released by
-// the same thread.  Else the lock tracking validation code must
-// be disabled for this lock, which is undesirable. As a result, 
+// the same thread. Else the lock tracking validation code must
+// be disabled for this lock, which is undesirable. As a result,
 // lockless macro is used below. See bug 2594854 for additional
 // information.
 #define UVM_DEFINE_SINGLE_PROCFS_FILE(name) \
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Maneet Singh	2ccbad25e1	590.48.01	2025-12-18 09:16:33 -08:00
Maneet Singh	a5bfb10e75	590.44.01	2025-12-02 15:32:25 -08:00
Maneet Singh	2af9f1f0f7	580.105.08	2025-11-04 12:45:59 -08:00