575.51.03

575.51.02
570.133.20
2026-01-27 11:39:46 +00:00 · 2025-05-01 22:14:31 -07:00 · 2025-04-17 19:35:59 +02:00 · 2025-04-17 17:56:49 +02:00 · 2025-03-19 14:13:05 +01:00 · 2025-03-03 19:08:20 +01:00
1171 changed files with 311500 additions and 275527 deletions
--- a/README.md
+++ b/README.md
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@@ -86,7 +86,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.124.04\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"575.51.03\"

 ifneq ($(SYSSRCHOST1X),)
 EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
--- a/kernel-open/Makefile
+++ b/kernel-open/Makefile
@@ -71,6 +71,31 @@ else
  CC ?= cc
  LD ?= ld
  OBJDUMP ?= objdump
+  AWK ?= awk
+  # Bake the following awk program in a string. The program is needed to add C++
+  # to the languages excluded from BTF generation.
+  #
+  # Also, unconditionally return success (0) from the awk program, rather than
+  # propagating pahole's return status (with 'exit system(pahole_cmd)'), to
+  # workaround an DW_TAG_rvalue_reference_type error in
+  # kernel/nvidia-modeset.ko.
+  #
+  # BEGIN {
+  #     pahole_cmd = "pahole"
+  #     for (i = 1; i < ARGC; i++) {
+  #         if (ARGV[i] ~ /--lang_exclude=/) {
+  #             pahole_cmd = pahole_cmd sprintf(" %s,c++", ARGV[i])
+  #         } else {
+  #             pahole_cmd = pahole_cmd sprintf(" %s", ARGV[i])
+  #         }
+  #     }
+  #     system(pahole_cmd)
+  # }
+  PAHOLE_AWK_PROGRAM = BEGIN { pahole_cmd = \"pahole\"; for (i = 1; i < ARGC; i++) { if (ARGV[i] ~ /--lang_exclude=/) { pahole_cmd = pahole_cmd sprintf(\" %s,c++\", ARGV[i]); } else { pahole_cmd = pahole_cmd sprintf(\" %s\", ARGV[i]); } } system(pahole_cmd); }
+  # If scripts/pahole-flags.sh is not present in the kernel tree, add PAHOLE and
+  # PAHOLE_AWK_PROGRAM assignments to PAHOLE_VARIABLES; otherwise assign the
+  # empty string to PAHOLE_VARIABLES.
+  PAHOLE_VARIABLES=$(if $(wildcard $(KERNEL_SOURCES)/scripts/pahole-flags.sh),,"PAHOLE=$(AWK) '$(PAHOLE_AWK_PROGRAM)'")

  ifndef ARCH
    ARCH := $(shell uname -m | sed -e 's/i.86/i386/' \
@@ -86,7 +111,7 @@ else
  ifneq ($(filter $(ARCH),i386 x86_64),)
    KERNEL_ARCH = x86
  else
-    ifeq ($(filter $(ARCH),arm64 powerpc),)
+    ifeq ($(filter $(ARCH),arm64 powerpc riscv),)
        $(error Unsupported architecture $(ARCH))
    endif
  endif
@@ -112,7 +137,8 @@ else

  .PHONY: modules module clean clean_conftest modules_install
  modules clean modules_install:
-	@$(MAKE) "LD=$(LD)" "CC=$(CC)" "OBJDUMP=$(OBJDUMP)" $(KBUILD_PARAMS) $@
+	@$(MAKE) "LD=$(LD)" "CC=$(CC)" "OBJDUMP=$(OBJDUMP)" \
+	  $(PAHOLE_VARIABLES) $(KBUILD_PARAMS) $@
 	@if [ "$@" = "modules" ]; then \
 	  for module in $(NV_KERNEL_MODULES); do \
 	    if [ -x split-object-file.sh ]; then \
--- a/kernel-open/common/inc/dce_rm_client_ipc.h
+++ b/kernel-open/common/inc/dce_rm_client_ipc.h
@@ -0,0 +1,35 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _OS_DCE_CLIENT_IPC_H_
+#define _OS_DCE_CLIENT_IPC_H_
+
+// RM IPC Client Types
+
+#define DCE_CLIENT_RM_IPC_TYPE_SYNC             0x0
+#define DCE_CLIENT_RM_IPC_TYPE_EVENT            0x1
+#define DCE_CLIENT_RM_IPC_TYPE_MAX              0x2
+
+void dceclientHandleAsyncRpcCallback(NvU32 handle, NvU32 interfaceType,
+                                     NvU32 msgLength, void *data,
+                                     void *usrCtx);
+#endif
--- a/kernel-open/common/inc/nv-linux.h
+++ b/kernel-open/common/inc/nv-linux.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2001-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2001-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -36,8 +36,7 @@
 #include "nv-timer.h"
 #include "nv-time.h"
 #include "nv-chardev-numbers.h"
-
-#define NV_KERNEL_NAME "Linux"
+#include "nv-platform.h"

 #ifndef AUTOCONF_INCLUDED
 #if defined(NV_GENERATED_AUTOCONF_H_PRESENT)
@@ -239,7 +238,7 @@ NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
 #undef NV_SET_PAGES_UC_PRESENT
 #endif

-#if !defined(NVCPU_AARCH64) && !defined(NVCPU_PPC64LE) && !defined(NVCPU_RISCV64)
+#if !defined(NVCPU_AARCH64) && !defined(NVCPU_RISCV64)
 #if !defined(NV_SET_MEMORY_UC_PRESENT) && !defined(NV_SET_PAGES_UC_PRESENT)
 #error "This driver requires the ability to change memory types!"
 #endif
@@ -345,8 +344,6 @@ extern int nv_pat_mode;

 #define NV_PAGE_COUNT(page) \
  ((unsigned int)page_count(page))
-#define NV_GET_PAGE_COUNT(page_ptr) \
-  (NV_PAGE_COUNT(NV_GET_PAGE_STRUCT(page_ptr->phys_addr)))
 #define NV_GET_PAGE_FLAGS(page_ptr) \
  (NV_GET_PAGE_STRUCT(page_ptr->phys_addr)->flags)

@@ -405,7 +402,7 @@ typedef enum
    NV_MEMORY_TYPE_DEVICE_MMIO, /* All kinds of MMIO referred by NVRM e.g. BARs and MCFG of device */
 } nv_memory_type_t;

-#if defined(NVCPU_AARCH64) || defined(NVCPU_PPC64LE) || defined(NVCPU_RISCV64)
+#if defined(NVCPU_AARCH64) || defined(NVCPU_RISCV64)
 #define NV_ALLOW_WRITE_COMBINING(mt)    1
 #elif defined(NVCPU_X86_64)
 #if defined(NV_ENABLE_PAT_SUPPORT)
@@ -463,10 +460,7 @@ static inline void *nv_vmalloc(unsigned long size)
 #else
    void *ptr = __vmalloc(size, GFP_KERNEL);
 #endif
-    if (ptr)
-    {
-        NV_MEMDBG_ADD(ptr, size);
-    }
+    NV_MEMDBG_ADD(ptr, size);
    return ptr;
 }

@@ -483,10 +477,7 @@ static inline void *nv_ioremap(NvU64 phys, NvU64 size)
 #else
    void *ptr = ioremap(phys, size);
 #endif
-    if (ptr)
-    {
-        NV_MEMDBG_ADD(ptr, size);
-    }
+    NV_MEMDBG_ADD(ptr, size);
    return ptr;
 }

@@ -502,29 +493,12 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
    ptr = ioremap_cache_shared(phys, size);
 #elif defined(NV_IOREMAP_CACHE_PRESENT)
    ptr = ioremap_cache(phys, size);
-#elif defined(NVCPU_PPC64LE)
-    //
-    // ioremap_cache() has been only implemented correctly for ppc64le with
-    // commit f855b2f544d6 in April 2017 (kernel 4.12+). Internally, the kernel
-    // does provide a default implementation of ioremap_cache() that would be
-    // incorrect for our use (creating an uncached mapping) before the
-    // referenced commit, but that implementation is not exported and the
-    // NV_IOREMAP_CACHE_PRESENT conftest doesn't pick it up, and we end up in
-    // this #elif branch.
-    //
-    // At the same time, ppc64le have supported ioremap_prot() since May 2011
-    // (commit 40f1ce7fb7e8, kernel 3.0+) and that covers all kernels we
-    // support on power.
-    //
-    ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
 #else
    return nv_ioremap(phys, size);
 #endif

-    if (ptr)
-    {
-        NV_MEMDBG_ADD(ptr, size);
-    }
+    NV_MEMDBG_ADD(ptr, size);
+
    return ptr;
 }

@@ -539,10 +513,8 @@ static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
    return nv_ioremap_nocache(phys, size);
 #endif

-    if (ptr)
-    {
-        NV_MEMDBG_ADD(ptr, size);
-    }
+    NV_MEMDBG_ADD(ptr, size);
+
    return ptr;
 }

@@ -562,22 +534,19 @@ static NvBool nv_numa_node_has_memory(int node_id)
 #define NV_KMALLOC(ptr, size) \
    { \
        (ptr) = kmalloc(size, NV_GFP_KERNEL); \
-        if (ptr) \
-            NV_MEMDBG_ADD(ptr, size); \
+        NV_MEMDBG_ADD(ptr, size);             \
    }

 #define NV_KZALLOC(ptr, size) \
    { \
        (ptr) = kzalloc(size, NV_GFP_KERNEL); \
-        if (ptr) \
-            NV_MEMDBG_ADD(ptr, size); \
+        NV_MEMDBG_ADD(ptr, size);             \
    }

 #define NV_KMALLOC_ATOMIC(ptr, size) \
    { \
        (ptr) = kmalloc(size, NV_GFP_ATOMIC); \
-        if (ptr) \
-            NV_MEMDBG_ADD(ptr, size); \
+        NV_MEMDBG_ADD(ptr, size);             \
    }

 #if defined(__GFP_RETRY_MAYFAIL)
@@ -591,8 +560,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
 #define NV_KMALLOC_NO_OOM(ptr, size) \
    { \
        (ptr) = kmalloc(size, NV_GFP_NO_OOM); \
-        if (ptr) \
-            NV_MEMDBG_ADD(ptr, size); \
+        NV_MEMDBG_ADD(ptr, size);             \
    }

 #define NV_KFREE(ptr, size) \
@@ -625,9 +593,9 @@ static inline pgprot_t nv_sme_clr(pgprot_t prot)
 #endif // __sme_clr
 }

-static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
+static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot)
 {
-    pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
+    pgprot_t prot = __pgprot(pgprot_val(vm_prot));

 #if defined(pgprot_decrypted)
    return pgprot_decrypted(prot);
@@ -648,41 +616,6 @@ static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
 #endif
 #endif

-static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count,
-                             NvBool cached, NvBool unencrypted)
-{
-    void *ptr;
-    pgprot_t prot = PAGE_KERNEL;
-#if defined(NVCPU_X86_64)
-#if defined(PAGE_KERNEL_NOENC)
-    if (unencrypted)
-    {
-        prot = cached ? nv_adjust_pgprot(PAGE_KERNEL_NOENC, 0) :
-                        nv_adjust_pgprot(NV_PAGE_KERNEL_NOCACHE_NOENC, 0);
-    }
-    else
-#endif
-    {
-        prot = cached ? PAGE_KERNEL : PAGE_KERNEL_NOCACHE;
-    }
-#elif defined(NVCPU_AARCH64)
-    prot = cached ? PAGE_KERNEL : NV_PGPROT_UNCACHED(PAGE_KERNEL);
-#endif
-    /* All memory cached in PPC64LE; can't honor 'cached' input. */
-    ptr = vmap(pages, page_count, VM_MAP, prot);
-    if (ptr)
-    {
-        NV_MEMDBG_ADD(ptr, page_count * PAGE_SIZE);
-    }
-    return (NvUPtr)ptr;
-}
-
-static inline void nv_vunmap(NvUPtr vaddr, NvU32 page_count)
-{
-    vunmap((void *)vaddr);
-    NV_MEMDBG_REMOVE((void *)vaddr, page_count * PAGE_SIZE);
-}
-
 #if defined(NV_GET_NUM_PHYSPAGES_PRESENT)
 #define NV_NUM_PHYSPAGES                get_num_physpages()
 #else
@@ -707,6 +640,47 @@ static inline void nv_vunmap(NvUPtr vaddr, NvU32 page_count)

 #define NV_NUM_CPUS()                   num_possible_cpus()

+#define NV_HAVE_MEMORY_ENCRYPT_DECRYPT 0
+
+#if defined(NVCPU_X86_64) && \
+    NV_IS_EXPORT_SYMBOL_GPL_set_memory_encrypted && \
+    NV_IS_EXPORT_SYMBOL_GPL_set_memory_decrypted
+#undef NV_HAVE_MEMORY_ENCRYPT_DECRYPT
+#define NV_HAVE_MEMORY_ENCRYPT_DECRYPT 1
+#endif
+
+static inline void nv_set_memory_decrypted_zeroed(NvBool unencrypted,
+                                                  unsigned long virt_addr,
+                                                  int num_native_pages,
+                                                  size_t size)
+{
+    if (virt_addr == 0)
+        return;
+
+#if NV_HAVE_MEMORY_ENCRYPT_DECRYPT
+    if (unencrypted)
+    {
+        set_memory_decrypted(virt_addr, num_native_pages);
+        memset((void *)virt_addr, 0, size);
+    }
+#endif
+}
+
+static inline void nv_set_memory_encrypted(NvBool unencrypted,
+                                           unsigned long virt_addr,
+                                           int num_native_pages)
+{
+    if (virt_addr == 0)
+        return;
+
+#if NV_HAVE_MEMORY_ENCRYPT_DECRYPT
+    if (unencrypted)
+    {
+        set_memory_encrypted(virt_addr, num_native_pages);
+    }
+#endif
+}
+
 static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
 {
 #if defined(NV_PHYS_TO_DMA_PRESENT)
@@ -887,94 +861,42 @@ typedef void irqreturn_t;
     (((addr) >> NV_RM_PAGE_SHIFT) ==                                   \
        (((addr) + (size) - 1) >> NV_RM_PAGE_SHIFT)))

-/*
- * The kernel may have a workaround for this, by providing a method to isolate
- * a single 4K page in a given mapping.
- */
-#if (PAGE_SIZE > NV_RM_PAGE_SIZE) && defined(NVCPU_PPC64LE) && defined(NV_PAGE_4K_PFN)
-    #define NV_4K_PAGE_ISOLATION_PRESENT
-    #define NV_4K_PAGE_ISOLATION_MMAP_ADDR(addr)                        \
-        ((NvP64)((void*)(((addr) >> NV_RM_PAGE_SHIFT) << PAGE_SHIFT)))
-    #define NV_4K_PAGE_ISOLATION_MMAP_LEN(size)     PAGE_SIZE
-    #define NV_4K_PAGE_ISOLATION_ACCESS_START(addr)                     \
-        ((NvP64)((void*)((addr) & ~NV_RM_PAGE_MASK)))
-    #define NV_4K_PAGE_ISOLATION_ACCESS_LEN(addr, size)                 \
-        ((((addr) & NV_RM_PAGE_MASK) + size + NV_RM_PAGE_MASK) &        \
-         ~NV_RM_PAGE_MASK)
-    #define NV_PROT_4K_PAGE_ISOLATION NV_PAGE_4K_PFN
-#endif
-
 static inline int nv_remap_page_range(struct vm_area_struct *vma,
    unsigned long virt_addr, NvU64 phys_addr, NvU64 size, pgprot_t prot)
 {
-    int ret = -1;
-
-#if defined(NV_4K_PAGE_ISOLATION_PRESENT) && defined(NV_PROT_4K_PAGE_ISOLATION)
-    if ((size == PAGE_SIZE) &&
-        ((pgprot_val(prot) & NV_PROT_4K_PAGE_ISOLATION) != 0))
-    {
-        /*
-         * remap_4k_pfn() hardcodes the length to a single OS page, and checks
-         * whether applying the page isolation workaround will cause PTE
-         * corruption (in which case it will fail, and this is an unsupported
-         * configuration).
-         */
-#if defined(NV_HASH__REMAP_4K_PFN_PRESENT)
-        ret = hash__remap_4k_pfn(vma, virt_addr, (phys_addr >> PAGE_SHIFT), prot);
-#else
-        ret = remap_4k_pfn(vma, virt_addr, (phys_addr >> PAGE_SHIFT), prot);
-#endif
-    }
-    else
-#endif
-    {
-        ret = remap_pfn_range(vma, virt_addr, (phys_addr >> PAGE_SHIFT), size,
+    return remap_pfn_range(vma, virt_addr, (phys_addr >> PAGE_SHIFT), size,
            prot);
-    }
-
-    return ret;
 }

 static inline int nv_io_remap_page_range(struct vm_area_struct *vma,
-    NvU64 phys_addr, NvU64 size, NvU32 extra_prot, NvU64 start)
+    NvU64 phys_addr, NvU64 size, NvU64 start)
 {
    int ret = -1;
 #if !defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL)
    ret = nv_remap_page_range(vma, start, phys_addr, size,
-        nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
+        nv_adjust_pgprot(vma->vm_page_prot));
 #else
    ret = io_remap_pfn_range(vma, start, (phys_addr >> PAGE_SHIFT),
-        size, nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
+        size, nv_adjust_pgprot(vma->vm_page_prot));
 #endif
    return ret;
 }

 static inline vm_fault_t nv_insert_pfn(struct vm_area_struct *vma,
-    NvU64 virt_addr, NvU64 pfn, NvU32 extra_prot)
+    NvU64 virt_addr, NvU64 pfn)
 {
    /*
     * vm_insert_pfn{,_prot} replaced with vmf_insert_pfn{,_prot} in Linux 4.20
     */
 #if defined(NV_VMF_INSERT_PFN_PROT_PRESENT)
    return vmf_insert_pfn_prot(vma, virt_addr, pfn,
-             __pgprot(pgprot_val(vma->vm_page_prot) | extra_prot));
+             __pgprot(pgprot_val(vma->vm_page_prot)));
 #else
    int ret = -EINVAL;
-    /*
-     * Only PPC64LE (NV_4K_PAGE_ISOLATION_PRESENT) requires extra_prot to be
-     * used when remapping.
-     *
-     * vm_insert_pfn_prot() was added in Linux 4.4, whereas POWER9 support
-     * was added in Linux 4.8.
-     *
-     * Rather than tampering with the vma to make use of extra_prot with
-     * vm_insert_pfn() on older kernels, for now, just fail in this case, as
-     * it's not expected to be used currently.
-     */
 #if defined(NV_VM_INSERT_PFN_PROT_PRESENT)
    ret = vm_insert_pfn_prot(vma, virt_addr, pfn,
-        __pgprot(pgprot_val(vma->vm_page_prot) | extra_prot));
-#elif !defined(NV_4K_PAGE_ISOLATION_PRESENT)
+        __pgprot(pgprot_val(vma->vm_page_prot)));
+#else
    ret = vm_insert_pfn(vma, virt_addr, pfn);
 #endif
    switch (ret)
@@ -1160,11 +1082,6 @@ static inline void nv_kmem_cache_free_stack(nvidia_stack_t *stack)
 typedef struct nvidia_pte_s {
    NvU64           phys_addr;
    unsigned long   virt_addr;
-    NvU64           dma_addr;
-#ifdef CONFIG_XEN
-    unsigned int    guest_pfn;
-#endif
-    unsigned int    page_count;
 } nvidia_pte_t;

 #if defined(CONFIG_DMA_SHARED_BUFFER)
@@ -1205,6 +1122,7 @@ typedef struct nv_alloc_s {
    NvS32         node_id;              /* Node id for memory allocation when node is set in flags */
    void          *import_priv;
    struct sg_table *import_sgt;
+    dma_addr_t     dma_handle;          /* dma handle used by dma_alloc_coherent(), dma_free_coherent() */
 } nv_alloc_t;

 /**
@@ -1430,6 +1348,23 @@ struct os_wait_queue {
    struct completion q;
 };

+/*!
+ * @brief Mapping between clock names and clock handles.
+ *
+ * TEGRA_DISP_WHICH_CLK_MAX: maximum number of clocks
+ * defined in below enum.
+ *
+ * arch/nvalloc/unix/include/nv.h
+ * enum TEGRASOC_WHICH_CLK_MAX;
+ *
+ */
+typedef struct nvsoc_clks_s {
+    struct {
+        struct clk *handles;
+        const char *clkName;
+    } clk[TEGRASOC_WHICH_CLK_MAX];
+} nvsoc_clks_t;
+
 /*
 * To report error in msi/msix when unhandled count reaches a threshold
 */
@@ -1589,6 +1524,8 @@ typedef struct nv_linux_state_s {
    nv_acpi_t* nv_acpi_object;
 #endif

+    nvsoc_clks_t soc_clk_handles;
+
    /* Lock serializing ISRs for different SOC vectors */
    nv_spinlock_t soc_isr_lock;
    void *soc_bh_mutex;
@@ -1788,12 +1725,10 @@ static inline struct kmem_cache *nv_kmem_cache_create(const char *name, unsigned
 */
 static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv)
 {
-#if !defined(NVCPU_PPC64LE)
    if (NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv))
    {
        return NV_ERR_GPU_IS_LOST;
    }
-#endif

    return NV_OK;
 }
--- a/kernel-open/common/inc/nv-msi.h
+++ b/kernel-open/common/inc/nv-msi.h
@@ -26,8 +26,7 @@

 #include "nv-linux.h"

-#if (defined(CONFIG_X86_LOCAL_APIC) || defined(NVCPU_AARCH64) || \
-     defined(NVCPU_PPC64LE)) && \
+#if (defined(CONFIG_X86_LOCAL_APIC) || defined(NVCPU_AARCH64)) && \
    (defined(CONFIG_PCI_MSI) || defined(CONFIG_PCI_USE_VECTOR))
 #define NV_LINUX_PCIE_MSI_SUPPORTED
 #endif
--- a/kernel-open/common/inc/nv-platform.h
+++ b/kernel-open/common/inc/nv-platform.h
@@ -0,0 +1,36 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NV_PLATFORM_H
+#define NV_PLATFORM_H
+
+#include "nv-linux.h"
+
+irqreturn_t     nvidia_isr              (int, void *);
+irqreturn_t     nvidia_isr_kthread_bh   (int, void *);
+
+#define NV_SUPPORTS_PLATFORM_DEVICE 0
+
+#define NV_SUPPORTS_PLATFORM_DISPLAY_DEVICE 0
+
+#endif
--- a/kernel-open/common/inc/nv-proto.h
+++ b/kernel-open/common/inc/nv-proto.h
@@ -41,7 +41,7 @@ void        nv_procfs_remove_gpu        (nv_linux_state_t *);

 int         nvidia_mmap                 (struct file *, struct vm_area_struct *);
 int         nvidia_mmap_helper          (nv_state_t *, nv_linux_file_private_t *, nvidia_stack_t *, struct vm_area_struct *, void *);
-int         nv_encode_caching           (pgprot_t *, NvU32, NvU32);
+int         nv_encode_caching           (pgprot_t *, NvU32, nv_memory_type_t);
 void        nv_revoke_gpu_mappings_locked(nv_state_t *);

 NvUPtr      nv_vm_map_pages             (struct page **, NvU32, NvBool, NvBool);
--- a/kernel-open/common/inc/nv.h
+++ b/kernel-open/common/inc/nv.h
@@ -168,6 +168,15 @@ typedef enum _TEGRASOC_WHICH_CLK
    TEGRASOC_WHICH_CLK_PLLA_DISP,
    TEGRASOC_WHICH_CLK_PLLA_DISPHUB,
    TEGRASOC_WHICH_CLK_PLLA,
+    TEGRASOC_WHICH_CLK_EMC,
+    TEGRASOC_WHICH_CLK_GPU_FIRST,
+    TEGRASOC_WHICH_CLK_GPU_SYS = TEGRASOC_WHICH_CLK_GPU_FIRST,
+    TEGRASOC_WHICH_CLK_GPU_NVD,
+    TEGRASOC_WHICH_CLK_GPU_UPROC,
+    TEGRASOC_WHICH_CLK_GPU_GPC0,
+    TEGRASOC_WHICH_CLK_GPU_GPC1,
+    TEGRASOC_WHICH_CLK_GPU_GPC2,
+    TEGRASOC_WHICH_CLK_GPU_LAST = TEGRASOC_WHICH_CLK_GPU_GPC2,
    TEGRASOC_WHICH_CLK_MAX, // TEGRASOC_WHICH_CLK_MAX is defined for boundary checks only.
 } TEGRASOC_WHICH_CLK;

@@ -283,7 +292,6 @@ typedef struct nv_usermap_access_params_s
    MemoryArea memArea;
    NvU64    access_start;
    NvU64    access_size;
-    NvU64    remap_prot_extra;
    NvBool   contig;
    NvU32    caching;
 } nv_usermap_access_params_t;
@@ -299,7 +307,6 @@ typedef struct nv_alloc_mapping_context_s {
    MemoryArea memArea;
    NvU64  access_start;
    NvU64  access_size;
-    NvU64  remap_prot_extra;
    NvU32  prot;
    NvBool valid;
    NvU32  caching;
@@ -498,6 +505,9 @@ typedef struct nv_state_t
        NvU32 dispIsoStreamId;
        NvU32 dispNisoStreamId;
    } iommus;
+
+    /* Console is managed by drm drivers or NVKMS */
+    NvBool client_managed_console;
 } nv_state_t;

 #define NVFP_TYPE_NONE       0x0
@@ -542,9 +552,9 @@ typedef struct UvmGpuNvlinkInfo_tag                 *nvgpuNvlinkInfo_t;
 typedef struct UvmGpuEccInfo_tag                    *nvgpuEccInfo_t;
 typedef struct UvmGpuFaultInfo_tag                  *nvgpuFaultInfo_t;
 typedef struct UvmGpuAccessCntrInfo_tag             *nvgpuAccessCntrInfo_t;
-typedef struct UvmGpuAccessCntrConfig_tag           *nvgpuAccessCntrConfig_t;
-typedef struct UvmGpuInfo_tag                       nvgpuInfo_t;
-typedef struct UvmGpuClientInfo_tag                 nvgpuClientInfo_t;
+typedef struct UvmGpuAccessCntrConfig_tag            nvgpuAccessCntrConfig_t;
+typedef struct UvmGpuInfo_tag                        nvgpuInfo_t;
+typedef struct UvmGpuClientInfo_tag                  nvgpuClientInfo_t;
 typedef struct UvmPmaAllocationOptions_tag          *nvgpuPmaAllocationOptions_t;
 typedef struct UvmPmaStatistics_tag                 *nvgpuPmaStatistics_t;
 typedef struct UvmGpuMemoryInfo_tag                 *nvgpuMemoryInfo_t;
@@ -564,24 +574,24 @@ typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemor
 * flags
 */

-#define NV_FLAG_OPEN                   0x0001
-#define NV_FLAG_EXCLUDE                0x0002
-#define NV_FLAG_CONTROL                0x0004
-// Unused                              0x0008
-#define NV_FLAG_SOC_DISPLAY            0x0010
-#define NV_FLAG_USES_MSI               0x0020
-#define NV_FLAG_USES_MSIX              0x0040
-#define NV_FLAG_PASSTHRU               0x0080
-#define NV_FLAG_SUSPENDED              0x0100
-#define NV_FLAG_SOC_IGPU               0x0200
+#define NV_FLAG_OPEN                    0x0001
+#define NV_FLAG_EXCLUDE                 0x0002
+#define NV_FLAG_CONTROL                 0x0004
+// Unused                               0x0008
+#define NV_FLAG_SOC_DISPLAY             0x0010
+#define NV_FLAG_USES_MSI                0x0020
+#define NV_FLAG_USES_MSIX               0x0040
+#define NV_FLAG_PASSTHRU                0x0080
+#define NV_FLAG_SUSPENDED               0x0100
+#define NV_FLAG_SOC_IGPU                0x0200
 /* To be set when an FLR needs to be triggered after device shut down. */
-#define NV_FLAG_TRIGGER_FLR            0x0400
-#define NV_FLAG_PERSISTENT_SW_STATE    0x0800
-#define NV_FLAG_IN_RECOVERY            0x1000
-// Unused                              0x2000
-#define NV_FLAG_UNBIND_LOCK            0x4000
+#define NV_FLAG_TRIGGER_FLR             0x0400
+#define NV_FLAG_PERSISTENT_SW_STATE     0x0800
+#define NV_FLAG_IN_RECOVERY             0x1000
+#define NV_FLAG_PCI_REMOVE_IN_PROGRESS  0x2000
+#define NV_FLAG_UNBIND_LOCK             0x4000
 /* To be set when GPU is not present on the bus, to help device teardown */
-#define NV_FLAG_IN_SURPRISE_REMOVAL    0x8000
+#define NV_FLAG_IN_SURPRISE_REMOVAL     0x8000

 typedef enum
 {
@@ -795,7 +805,7 @@ NV_STATUS  NV_API_CALL  nv_alias_pages           (nv_state_t *, NvU32, NvU64, Nv
 NV_STATUS  NV_API_CALL  nv_alloc_pages           (nv_state_t *, NvU32, NvU64, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
 NV_STATUS  NV_API_CALL  nv_free_pages            (nv_state_t *, NvU32, NvBool, NvU32, void *);

-NV_STATUS  NV_API_CALL  nv_register_user_pages   (nv_state_t *, NvU64, NvU64 *, void *, void **);
+NV_STATUS  NV_API_CALL  nv_register_user_pages   (nv_state_t *, NvU64, NvU64 *, void *, void **, NvBool);
 void       NV_API_CALL  nv_unregister_user_pages (nv_state_t *, NvU64, void **, void **);

 NV_STATUS NV_API_CALL   nv_register_peer_io_mem  (nv_state_t *, NvU64 *, NvU64, void **);
@@ -915,6 +925,15 @@ NV_STATUS NV_API_CALL nv_get_phys_pages          (void *, void *, NvU32 *);

 void      NV_API_CALL nv_get_disp_smmu_stream_ids (nv_state_t *, NvU32 *, NvU32 *);

+NV_STATUS NV_API_CALL nv_clk_get_handles         (nv_state_t *);
+void      NV_API_CALL nv_clk_clear_handles       (nv_state_t *);
+NV_STATUS NV_API_CALL nv_enable_clk              (nv_state_t *, TEGRASOC_WHICH_CLK);
+void      NV_API_CALL nv_disable_clk             (nv_state_t *, TEGRASOC_WHICH_CLK);
+NV_STATUS NV_API_CALL nv_get_curr_freq           (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32 *);
+NV_STATUS NV_API_CALL nv_get_max_freq            (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32 *);
+NV_STATUS NV_API_CALL nv_get_min_freq            (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32 *);
+NV_STATUS NV_API_CALL nv_set_freq                (nv_state_t *, TEGRASOC_WHICH_CLK, NvU32);
+
 /*
 * ---------------------------------------------------------------------------
 *
@@ -942,6 +961,7 @@ NvBool     NV_API_CALL  rm_isr                   (nvidia_stack_t *, nv_state_t *
 void       NV_API_CALL  rm_isr_bh                (nvidia_stack_t *, nv_state_t *);
 void       NV_API_CALL  rm_isr_bh_unlocked       (nvidia_stack_t *, nv_state_t *);
 NvBool     NV_API_CALL  rm_is_msix_allowed       (nvidia_stack_t *, nv_state_t *);
+NvBool     NV_API_CALL  rm_wait_for_bar_firewall (nvidia_stack_t *, NvU32 domain, NvU8 bus, NvU8 device, NvU8 function, NvU16 devId);
 NV_STATUS  NV_API_CALL  rm_power_management      (nvidia_stack_t *, nv_state_t *, nv_pm_action_t);
 NV_STATUS  NV_API_CALL  rm_stop_user_channels    (nvidia_stack_t *, nv_state_t *);
 NV_STATUS  NV_API_CALL  rm_restart_user_channels (nvidia_stack_t *, nv_state_t *);
@@ -1040,6 +1060,9 @@ void       NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);

 NvBool     NV_API_CALL rm_is_altstack_in_use(void);

+void       NV_API_CALL rm_notify_gpu_addition(nvidia_stack_t *, nv_state_t *);
+void       NV_API_CALL rm_notify_gpu_removal(nvidia_stack_t *, nv_state_t *);
+
 /* vGPU VFIO specific functions */
 NV_STATUS  NV_API_CALL  nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *,
                                               NvU32 *, NvU32 *, NvU32);
@@ -1054,7 +1077,7 @@ NV_STATUS  NV_API_CALL  nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *,
 NV_STATUS  NV_API_CALL  nv_gpu_bind_event(nvidia_stack_t *, NvU32, NvBool *);
 NV_STATUS  NV_API_CALL  nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *);

-NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*);
+NV_STATUS NV_API_CALL nv_check_usermap_access_params(nv_state_t*, const nv_usermap_access_params_t*);
 nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
 void       NV_API_CALL  nv_flush_coherent_cpu_cache_range(nv_state_t *nv, NvU64 cpu_virtual, NvU64 size);

--- a/kernel-open/common/inc/nv_common_utils.h
+++ b/kernel-open/common/inc/nv_common_utils.h
@@ -0,0 +1,120 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NV_COMMON_UTILS_H__
+#define __NV_COMMON_UTILS_H__
+
+#include "nvtypes.h"
+#include "nvmisc.h"
+
+#if !defined(TRUE)
+#define TRUE NV_TRUE
+#endif
+
+#if !defined(FALSE)
+#define FALSE NV_FALSE
+#endif
+
+#define NV_IS_UNSIGNED(x) ((__typeof__(x))-1 > 0)
+
+/* Get the length of a statically-sized array. */
+#define ARRAY_LEN(_arr) (sizeof(_arr) / sizeof(_arr[0]))
+
+#define NV_INVALID_HEAD         0xFFFFFFFF
+
+#define NV_INVALID_CONNECTOR_PHYSICAL_INFORMATION (~0)
+
+#if !defined(NV_MIN)
+# define NV_MIN(a,b) (((a)<(b))?(a):(b))
+#endif
+
+#define NV_MIN3(a,b,c) NV_MIN(NV_MIN(a, b), c)
+#define NV_MIN4(a,b,c,d) NV_MIN3(NV_MIN(a,b),c,d)
+
+#if !defined(NV_MAX)
+# define NV_MAX(a,b) (((a)>(b))?(a):(b))
+#endif
+
+#define NV_MAX3(a,b,c) NV_MAX(NV_MAX(a, b), c)
+#define NV_MAX4(a,b,c,d) NV_MAX3(NV_MAX(a,b),c,d)
+
+static inline int NV_LIMIT_VAL_TO_MIN_MAX(int val, int min, int max)
+{
+    if (val < min) {
+        return min;
+    }
+    if (val > max) {
+        return max;
+    }
+    return val;
+}
+
+#define NV_ROUNDUP_DIV(x,y) ((x) / (y) + (((x) % (y)) ? 1 : 0))
+
+/*
+ * Macros used for computing palette entries:
+ *
+ * NV_UNDER_REPLICATE(val, source_size, result_size) expands a value
+ * of source_size bits into a value of target_size bits by shifting
+ * the source value into the high bits and replicating the high bits
+ * of the value into the low bits of the result.
+ *
+ * PALETTE_DEPTH_SHIFT(val, w) maps a colormap entry for a component
+ * that has w bits to an appropriate entry in a LUT of 256 entries.
+ */
+static inline unsigned int NV_UNDER_REPLICATE(unsigned short val,
+                                              int source_size,
+                                              int result_size)
+{
+    return (val << (result_size - source_size)) |
+        (val >> ((source_size << 1) - result_size));
+}
+
+
+static inline unsigned short PALETTE_DEPTH_SHIFT(unsigned short val, int depth)
+{
+    return NV_UNDER_REPLICATE(val, depth, 8);
+}
+
+/*
+ *  Use __builtin_ffs where it is supported, or provide an equivalent
+ *  implementation for platforms like riscv where it is not.
+ */
+#if defined(__GNUC__) && !NVCPU_IS_RISCV64
+static inline int nv_ffs(int x)
+{
+    return __builtin_ffs(x);
+}
+#else
+static inline int nv_ffs(int x)
+{
+    if (x == 0)
+        return 0;
+
+    LOWESTBITIDX_32(x);
+
+    return 1 + x;
+}
+#endif
+
+#endif /* __NV_COMMON_UTILS_H__ */
--- a/kernel-open/common/inc/nv_dpy_id.h
+++ b/kernel-open/common/inc/nv_dpy_id.h
@@ -0,0 +1,370 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2010-2014 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * This header file defines the types NVDpyId and NVDpyIdList, as well
+ * as inline functions to manipulate these types.  NVDpyId and
+ * NVDpyIdList should be treated as opaque by includers of this header
+ * file.
+ */
+
+#ifndef __NV_DPY_ID_H__
+#define __NV_DPY_ID_H__
+
+#include "nvtypes.h"
+#include "nvmisc.h"
+#include "nv_common_utils.h"
+#include <nvlimits.h> /* NV_MAX_SUBDEVICES */
+
+typedef struct {
+    NvU32 opaqueDpyId;
+} NVDpyId;
+
+typedef struct {
+    NvU32 opaqueDpyIdList;
+} NVDpyIdList;
+
+#define NV_DPY_ID_MAX_SUBDEVICES NV_MAX_SUBDEVICES
+#define NV_DPY_ID_MAX_DPYS_IN_LIST 32
+
+/*
+ * For use in combination with nvDpyIdToPrintFormat(); e.g.,
+ *
+ * printf("dpy id: " NV_DPY_ID_PRINT_FORMAT "\n",
+ *        nvDpyIdToPrintFormat(dpyId));
+ *
+ * The includer should not make assumptions about the return type of
+ * nvDpyIdToPrintFormat().
+ */
+#define NV_DPY_ID_PRINT_FORMAT "0x%08x"
+
+/* functions to return an invalid DpyId and empty DpyIdList */
+
+static inline NVDpyId nvInvalidDpyId(void)
+{
+    NVDpyId dpyId = { 0 };
+    return dpyId;
+}
+
+static inline NVDpyIdList nvEmptyDpyIdList(void)
+{
+    NVDpyIdList dpyIdList = { 0 };
+    return dpyIdList;
+}
+
+static inline NVDpyIdList nvAllDpyIdList(void)
+{
+    NVDpyIdList dpyIdList = { ~0U };
+    return dpyIdList;
+}
+
+static inline void
+nvEmptyDpyIdListSubDeviceArray(NVDpyIdList dpyIdList[NV_DPY_ID_MAX_SUBDEVICES])
+{
+    int dispIndex;
+    for (dispIndex = 0; dispIndex < NV_DPY_ID_MAX_SUBDEVICES; dispIndex++) {
+        dpyIdList[dispIndex] = nvEmptyDpyIdList();
+    }
+}
+
+/* set operations on DpyIds and DpyIdLists: Add, Subtract, Intersect, Xor */
+
+static inline __attribute__ ((warn_unused_result))
+NVDpyIdList nvAddDpyIdToDpyIdList(NVDpyId dpyId, NVDpyIdList dpyIdList)
+{
+    NVDpyIdList tmpDpyIdList;
+    tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList |
+        dpyId.opaqueDpyId;
+    return tmpDpyIdList;
+}
+
+/* Passing an invalid display ID makes this function return an empty list. */
+static inline __attribute__ ((warn_unused_result))
+NVDpyIdList nvAddDpyIdToEmptyDpyIdList(NVDpyId dpyId)
+{
+    NVDpyIdList tmpDpyIdList;
+    tmpDpyIdList.opaqueDpyIdList =  dpyId.opaqueDpyId;
+    return tmpDpyIdList;
+}
+
+static inline __attribute__ ((warn_unused_result))
+NVDpyIdList nvAddDpyIdListToDpyIdList(NVDpyIdList dpyIdListA,
+                                      NVDpyIdList dpyIdListB)
+{
+    NVDpyIdList tmpDpyIdList;
+    tmpDpyIdList.opaqueDpyIdList = dpyIdListB.opaqueDpyIdList |
+        dpyIdListA.opaqueDpyIdList;
+    return tmpDpyIdList;
+}
+
+/* Returns: dpyIdList - dpyId */
+static inline __attribute__ ((warn_unused_result))
+NVDpyIdList nvDpyIdListMinusDpyId(NVDpyIdList dpyIdList, NVDpyId dpyId)
+{
+    NVDpyIdList tmpDpyIdList;
+    tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList &
+        (~dpyId.opaqueDpyId);
+    return tmpDpyIdList;
+}
+
+/* Returns: dpyIdListA - dpyIdListB */
+static inline __attribute__ ((warn_unused_result))
+NVDpyIdList nvDpyIdListMinusDpyIdList(NVDpyIdList dpyIdListA,
+                                      NVDpyIdList dpyIdListB)
+{
+    NVDpyIdList tmpDpyIdList;
+    tmpDpyIdList.opaqueDpyIdList = dpyIdListA.opaqueDpyIdList &
+        (~dpyIdListB.opaqueDpyIdList);
+    return tmpDpyIdList;
+}
+
+static inline __attribute__ ((warn_unused_result))
+NVDpyIdList nvIntersectDpyIdAndDpyIdList(NVDpyId dpyId, NVDpyIdList dpyIdList)
+{
+    NVDpyIdList tmpDpyIdList;
+    tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList &
+        dpyId.opaqueDpyId;
+    return tmpDpyIdList;
+}
+
+static inline __attribute__ ((warn_unused_result))
+NVDpyIdList nvIntersectDpyIdListAndDpyIdList(NVDpyIdList dpyIdListA,
+                                             NVDpyIdList dpyIdListB)
+{
+    NVDpyIdList tmpDpyIdList;
+    tmpDpyIdList.opaqueDpyIdList = dpyIdListA.opaqueDpyIdList &
+        dpyIdListB.opaqueDpyIdList;
+    return tmpDpyIdList;
+}
+
+static inline __attribute__ ((warn_unused_result))
+NVDpyIdList nvXorDpyIdAndDpyIdList(NVDpyId dpyId, NVDpyIdList dpyIdList)
+{
+    NVDpyIdList tmpDpyIdList;
+    tmpDpyIdList.opaqueDpyIdList = dpyIdList.opaqueDpyIdList ^
+        dpyId.opaqueDpyId;
+    return tmpDpyIdList;
+}
+
+static inline __attribute__ ((warn_unused_result))
+NVDpyIdList nvXorDpyIdListAndDpyIdList(NVDpyIdList dpyIdListA,
+                                       NVDpyIdList dpyIdListB)
+{
+    NVDpyIdList tmpDpyIdList;
+    tmpDpyIdList.opaqueDpyIdList = dpyIdListA.opaqueDpyIdList ^
+        dpyIdListB.opaqueDpyIdList;
+    return tmpDpyIdList;
+}
+
+
+/* boolean checks */
+
+static inline NvBool nvDpyIdIsInDpyIdList(NVDpyId dpyId,
+                                          NVDpyIdList dpyIdList)
+{
+    return !!(dpyIdList.opaqueDpyIdList & dpyId.opaqueDpyId);
+}
+
+static inline NvBool nvDpyIdIsInvalid(NVDpyId dpyId)
+{
+    return (dpyId.opaqueDpyId == 0);
+}
+
+static inline NvBool nvDpyIdListIsEmpty(NVDpyIdList dpyIdList)
+{
+    return (dpyIdList.opaqueDpyIdList == 0);
+}
+
+static inline NvBool
+nvDpyIdListSubDeviceArrayIsEmpty(NVDpyIdList
+                                 dpyIdList[NV_DPY_ID_MAX_SUBDEVICES])
+{
+    int dispIndex;
+    for (dispIndex = 0; dispIndex < NV_DPY_ID_MAX_SUBDEVICES; dispIndex++) {
+        if (!nvDpyIdListIsEmpty(dpyIdList[dispIndex])) {
+            return NV_FALSE;
+        }
+    }
+    return NV_TRUE;
+}
+
+
+static inline NvBool nvDpyIdsAreEqual(NVDpyId dpyIdA, NVDpyId dpyIdB)
+{
+    return (dpyIdA.opaqueDpyId == dpyIdB.opaqueDpyId);
+}
+
+static inline NvBool nvDpyIdListsAreEqual(NVDpyIdList dpyIdListA,
+                                          NVDpyIdList dpyIdListB)
+{
+    return (dpyIdListA.opaqueDpyIdList == dpyIdListB.opaqueDpyIdList);
+}
+
+static inline NvBool nvDpyIdListIsASubSetofDpyIdList(NVDpyIdList dpyIdListA,
+                                                     NVDpyIdList dpyIdListB)
+{
+    NVDpyIdList intersectedDpyIdList =
+        nvIntersectDpyIdListAndDpyIdList(dpyIdListA, dpyIdListB);
+
+    return nvDpyIdListsAreEqual(intersectedDpyIdList, dpyIdListA);
+}
+
+
+/*
+ * retrieve the individual dpyIds from dpyIdList; if dpyId is invalid,
+ * start at the beginning of the list; otherwise, start at the dpyId
+ * after the specified dpyId
+ */
+
+static inline __attribute__ ((warn_unused_result))
+NVDpyId nvNextDpyIdInDpyIdListUnsorted(NVDpyId dpyId, NVDpyIdList dpyIdList)
+{
+    if (nvDpyIdIsInvalid(dpyId)) {
+        dpyId.opaqueDpyId = 1;
+    } else {
+        dpyId.opaqueDpyId <<= 1;
+    }
+
+    while (dpyId.opaqueDpyId) {
+
+        if (nvDpyIdIsInDpyIdList(dpyId, dpyIdList)) {
+            return dpyId;
+        }
+
+        dpyId.opaqueDpyId <<= 1;
+    }
+
+    /* no dpyIds left in dpyIdlist; return the invalid dpyId */
+
+    return nvInvalidDpyId();
+}
+
+#define FOR_ALL_DPY_IDS(_dpyId, _dpyIdList)                           \
+    for ((_dpyId) = nvNextDpyIdInDpyIdListUnsorted(nvInvalidDpyId(),  \
+                                                   (_dpyIdList));     \
+         !nvDpyIdIsInvalid(_dpyId);                                   \
+         (_dpyId) = nvNextDpyIdInDpyIdListUnsorted((_dpyId),          \
+                                                   (_dpyIdList)))
+
+/* report how many dpyIds are in the dpyIdList */
+
+static inline int nvCountDpyIdsInDpyIdList(NVDpyIdList dpyIdList)
+{
+    return nvPopCount32(dpyIdList.opaqueDpyIdList);
+}
+
+static inline int
+nvCountDpyIdsInDpyIdListSubDeviceArray(NVDpyIdList
+                                       dpyIdList[NV_DPY_ID_MAX_SUBDEVICES])
+{
+    int dispIndex, n = 0;
+
+    for (dispIndex = 0; dispIndex < NV_DPY_ID_MAX_SUBDEVICES; dispIndex++) {
+        n += nvCountDpyIdsInDpyIdList(dpyIdList[dispIndex]);
+    }
+
+    return n;
+}
+
+/* convert between dpyId/dpyIdList and NV-CONTROL values */
+
+static inline int nvDpyIdToNvControlVal(NVDpyId dpyId)
+{
+    return (int) dpyId.opaqueDpyId;
+}
+
+static inline int nvDpyIdListToNvControlVal(NVDpyIdList dpyIdList)
+{
+    return (int) dpyIdList.opaqueDpyIdList;
+}
+
+static inline NVDpyId nvNvControlValToDpyId(int val)
+{
+    NVDpyId dpyId;
+    dpyId.opaqueDpyId = (val == 0) ? 0 : 1 << (nv_ffs(val)-1);
+    return dpyId;
+}
+
+static inline NVDpyIdList nvNvControlValToDpyIdList(int val)
+{
+    NVDpyIdList dpyIdList;
+    dpyIdList.opaqueDpyIdList = val;
+    return dpyIdList;
+}
+
+
+/* convert between dpyId and NvU32 */
+
+static inline NVDpyId nvNvU32ToDpyId(NvU32 val)
+{
+    NVDpyId dpyId;
+    dpyId.opaqueDpyId = (val == 0) ? 0 : 1 << (nv_ffs(val)-1);
+    return dpyId;
+}
+
+static inline NVDpyIdList nvNvU32ToDpyIdList(NvU32 val)
+{
+    NVDpyIdList dpyIdList;
+    dpyIdList.opaqueDpyIdList = val;
+    return dpyIdList;
+}
+
+static inline NvU32 nvDpyIdToNvU32(NVDpyId dpyId)
+{
+    return dpyId.opaqueDpyId;
+}
+
+static inline NvU32 nvDpyIdListToNvU32(NVDpyIdList dpyIdList)
+{
+    return dpyIdList.opaqueDpyIdList;
+}
+
+/* Return the bit position of dpyId: a number in the range [0..31]. */
+static inline NvU32 nvDpyIdToIndex(NVDpyId dpyId)
+{
+    return nv_ffs(dpyId.opaqueDpyId) - 1;
+}
+
+/* Return a display ID that is not in the list passed in. */
+
+static inline NVDpyId nvNewDpyId(NVDpyIdList excludeList)
+{
+    NVDpyId dpyId;
+    if (~excludeList.opaqueDpyIdList == 0) {
+        return nvInvalidDpyId();
+    }
+    dpyId.opaqueDpyId =
+        1U << (nv_ffs(~excludeList.opaqueDpyIdList) - 1);
+    return dpyId;
+}
+
+/* See comment for NV_DPY_ID_PRINT_FORMAT. */
+static inline NvU32 nvDpyIdToPrintFormat(NVDpyId dpyId)
+{
+    return nvDpyIdToNvU32(dpyId);
+}
+
+/* Prevent usage of opaque values. */
+#define opaqueDpyId __ERROR_ACCESS_ME_VIA_NV_DPY_ID_H
+#define opaqueDpyIdList __ERROR_ACCESS_ME_VIA_NV_DPY_ID_H
+
+#endif /* __NV_DPY_ID_H__ */
--- a/src/common/unix/common/utils/interface/nv_smg.h
+++ b/src/common/unix/common/utils/interface/nv_smg.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -20,8 +20,8 @@
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
-#ifndef __NV_SMG_H__
-#define __NV_SMG_H__
+#ifndef __NV_MIG_TYPES_H__
+#define __NV_MIG_TYPES_H__

 #ifdef __cplusplus
 extern "C" {
@@ -29,25 +29,12 @@ extern "C" {

 #include "nvtypes.h"

-/*
- * The simplest required abstraction for accessing RM independent of the
- * calling component which may be a kernel module or userspace driver.
- */
-typedef NvU32 (*NVSubdevSMGRMControl) (void *ctx, NvU32 object, NvU32 cmd, void *params, NvU32 paramsSize);
-typedef NvU32 (*NVSubdevSMGRMAlloc) (void *ctx, NvU32 parent, NvU32 object, NvU32 cls, void *allocParams);
-typedef NvU32 (*NVSubdevSMGRMFree) (void *ctx, NvU32 parent, NvU32 object);
+typedef NvU32 MIGDeviceId;

-NvBool NVSubdevSMGSetPartition(void *ctx,
-                               NvU32 subdevHandle,
-                               const char *computeInstUuid,
-                               NvU32 gpuInstSubscriptionHdl,
-                               NvU32 computeInstSubscriptionHdl,
-                               NVSubdevSMGRMControl rmControl,
-                               NVSubdevSMGRMAlloc rmAlloc,
-                               NVSubdevSMGRMFree rmFree);
+#define NO_MIG_DEVICE 0L

 #ifdef __cplusplus
 }
 #endif

-#endif /* __NV_SMG_H__ */
+#endif /* __NV_MIG_TYPES_H__ */
--- a/kernel-open/common/inc/nv_uvm_interface.h
+++ b/kernel-open/common/inc/nv_uvm_interface.h
@@ -660,14 +660,20 @@ NV_STATUS nvUvmInterfaceServiceDeviceInterruptsRM(uvmGpuDeviceHandle device);
    RM will propagate the update to all channels using the provided VA space.
    All channels must be idle when this call is made.

+    If the pageDirectory is in system memory then a CPU physical address must be
+    provided. RM will establish and manage the DMA mapping for the
+    pageDirectory.
+
    Arguments:
      vaSpace[IN}         - VASpace Object
-      physAddress[IN]     - Physical address of new page directory
+      physAddress[IN]     - Physical address of new page directory. If
+                            !bVidMemAperture this is a CPU physical address.
      numEntries[IN]      - Number of entries including previous PDE which will be copied
      bVidMemAperture[IN] - If set pageDirectory will reside in VidMem aperture else sysmem
      pasid[IN]           - PASID (Process Address Space IDentifier) of the process
                            corresponding to the VA space. Ignored unless the VA space
                            object has ATS enabled.
+      dmaAddress[OUT]     - DMA mapping created for physAddress.

    Error codes:
      NV_ERR_GENERIC
@@ -675,7 +681,8 @@ NV_STATUS nvUvmInterfaceServiceDeviceInterruptsRM(uvmGpuDeviceHandle device);
 */
 NV_STATUS nvUvmInterfaceSetPageDirectory(uvmGpuAddressSpaceHandle vaSpace,
                                         NvU64 physAddress, unsigned numEntries,
-                                         NvBool bVidMemAperture, NvU32 pasid);
+                                         NvBool bVidMemAperture, NvU32 pasid,
+                                         NvU64 *dmaAddress);

 /*******************************************************************************
    nvUvmInterfaceUnsetPageDirectory
@@ -1056,7 +1063,7 @@ NV_STATUS nvUvmInterfaceDestroyAccessCntrInfo(uvmGpuDeviceHandle device,
 */
 NV_STATUS nvUvmInterfaceEnableAccessCntr(uvmGpuDeviceHandle device,
                                         UvmGpuAccessCntrInfo *pAccessCntrInfo,
-                                         UvmGpuAccessCntrConfig *pAccessCntrConfig);
+                                         const UvmGpuAccessCntrConfig *pAccessCntrConfig);

 /*******************************************************************************
    nvUvmInterfaceDisableAccessCntr
@@ -1862,5 +1869,4 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
 NV_STATUS nvUvmInterfaceCslLogEncryption(UvmCslContext *uvmCslContext,
                                         UvmCslOperation operation,
                                         NvU32 bufferSize);
-
 #endif // _NV_UVM_INTERFACE_H_
--- a/kernel-open/common/inc/nv_uvm_types.h
+++ b/kernel-open/common/inc/nv_uvm_types.h
@@ -268,6 +268,7 @@ typedef struct UvmGpuChannelInfo_tag

    // The errorNotifier is filled out when the channel hits an RC error.
    NvNotification    *errorNotifier;
+
    NvNotification    *keyRotationNotifier;

    NvU32              hwRunlistId;
@@ -297,6 +298,7 @@ typedef struct UvmGpuChannelInfo_tag
    NvU64              gpFifoGpuVa;
    NvU64              gpPutGpuVa;
    NvU64              gpGetGpuVa;
+
    // GPU VA of work submission offset is needed in Confidential Computing
    // so CE channels can ring doorbell of other channels as required for
    // WLC/LCIC work submission
@@ -374,6 +376,9 @@ typedef struct
    // True if the CE can be used for P2P transactions
    NvBool p2p:1;

+    // True if the CE supports encryption
+    NvBool secure:1;
+
    // Mask of physical CEs assigned to this LCE
    //
    // The value returned by RM for this field may change when a GPU is
@@ -1007,17 +1012,17 @@ typedef struct UvmGpuFaultInfo_tag
        NvU32 replayableFaultMask;

        // Fault buffer CPU mapping
-        void*  bufferAddress;
-        //
        // When Confidential Computing is disabled, the mapping points to the
        // actual HW fault buffer.
        //
        // When Confidential Computing is enabled, the mapping points to a
        // copy of the HW fault buffer. This "shadow buffer" is maintained
        // by GSP-RM.
+        void*  bufferAddress;

        // Size, in bytes, of the fault buffer pointed by bufferAddress.
        NvU32  bufferSize;
+
        // Mapping pointing to the start of the fault buffer metadata containing
        // a 16Byte authentication tag and a valid byte. Always NULL when
        // Confidential Computing is disabled.
@@ -1103,24 +1108,9 @@ typedef enum
    UVM_ACCESS_COUNTER_GRANULARITY_16G = 4,
 } UVM_ACCESS_COUNTER_GRANULARITY;

-typedef enum
-{
-    UVM_ACCESS_COUNTER_USE_LIMIT_NONE = 1,
-    UVM_ACCESS_COUNTER_USE_LIMIT_QTR  = 2,
-    UVM_ACCESS_COUNTER_USE_LIMIT_HALF = 3,
-    UVM_ACCESS_COUNTER_USE_LIMIT_FULL = 4,
-} UVM_ACCESS_COUNTER_USE_LIMIT;
-
 typedef struct UvmGpuAccessCntrConfig_tag
 {
-    NvU32 mimcGranularity;
-
-    NvU32 momcGranularity;
-
-    NvU32 mimcUseLimit;
-
-    NvU32 momcUseLimit;
-
+    NvU32 granularity;
    NvU32 threshold;
 } UvmGpuAccessCntrConfig;

--- a/kernel-open/common/inc/nvi2c.h
+++ b/kernel-open/common/inc/nvi2c.h
@@ -0,0 +1,37 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _NV_I2C_H_
+#define _NV_I2C_H_
+
+#define NV_I2C_MSG_WR 0x0000
+#define NV_I2C_MSG_RD 0x0001
+
+typedef struct nv_i2c_msg_s
+{
+    NvU16 addr;
+    NvU16 flags;
+    NvU16 len;
+    NvU8* buf;
+} nv_i2c_msg_t;
+
+#endif
--- a/kernel-open/common/inc/nvimpshared.h
+++ b/kernel-open/common/inc/nvimpshared.h
@@ -0,0 +1,96 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/******************************************************************************\
+*                                                                              *
+*  Description:                                                                *
+*    Accommodates sharing of IMP-related structures between kernel interface   *
+*    files and core RM.                                                        *
+*                                                                              *
+\******************************************************************************/
+
+#pragma once
+
+#include <nvtypes.h>
+#if defined(_MSC_VER)
+#pragma warning(disable:4324)
+#endif
+
+//
+// This file was generated with FINN, an NVIDIA coding tool.
+// Source file:      nvimpshared.finn
+//
+
+
+
+
+//
+// There are only a small number of discrete dramclk frequencies available on
+// the system.  This structure contains IMP-relevant information associated
+// with a specific dramclk frequency.
+//
+typedef struct DRAM_CLK_INSTANCE {
+    NvU32 dram_clk_freq_khz;
+
+    NvU32 mchub_clk_khz;
+
+    NvU32 mc_clk_khz;
+
+    NvU32 max_iso_bw_kbps;
+
+    //
+    // switch_latency_ns is the maximum time required to switch the dramclk
+    // frequency to the frequency specified in dram_clk_freq_khz.
+    //
+    NvU32 switch_latency_ns;
+} DRAM_CLK_INSTANCE;
+
+//
+// This table is used to collect information from other modules that is needed
+// for RM IMP calculations.  (Used on Tegra only.)
+//
+typedef struct TEGRA_IMP_IMPORT_DATA {
+    //
+    // max_iso_bw_kbps stores the maximum possible ISO bandwidth available to
+    // display, assuming display is the only active ISO client.  (Note that ISO
+    // bandwidth will typically be allocated to multiple clients, so display
+    // will generally not have access to the maximum possible bandwidth.)
+    //
+    NvU32             max_iso_bw_kbps;
+
+    // On Orin, each dram channel is 16 bits wide.
+    NvU32             num_dram_channels;
+
+    //
+    // dram_clk_instance stores entries for all possible dramclk frequencies,
+    // sorted by dramclk frequency in increasing order.
+    //
+    // "24" is expected to be larger than the actual number of required entries
+    // (which is provided by a BPMP API), but it can be increased if necessary.
+    //
+    // num_dram_clk_entries is filled in with the actual number of distinct
+    // dramclk entries.
+    //
+    NvU32             num_dram_clk_entries;
+    DRAM_CLK_INSTANCE dram_clk_instance[24];
+} TEGRA_IMP_IMPORT_DATA;
--- a/kernel-open/common/inc/nvkms-api-types.h
+++ b/kernel-open/common/inc/nvkms-api-types.h
@@ -640,22 +640,28 @@ enum NvKmsInputColorRange {
     * If DEFAULT is provided, driver will assume full range for RGB formats
     * and limited range for YUV formats.
     */
-    NVKMS_INPUT_COLORRANGE_DEFAULT = 0,
+    NVKMS_INPUT_COLOR_RANGE_DEFAULT = 0,

-    NVKMS_INPUT_COLORRANGE_LIMITED = 1,
+    NVKMS_INPUT_COLOR_RANGE_LIMITED = 1,

-    NVKMS_INPUT_COLORRANGE_FULL = 2,
+    NVKMS_INPUT_COLOR_RANGE_FULL = 2,
 };

 enum NvKmsInputColorSpace {
-    /* Unknown colorspace; no de-gamma will be applied */
-    NVKMS_INPUT_COLORSPACE_NONE = 0,
+    /* Unknown colorspace */
+    NVKMS_INPUT_COLOR_SPACE_NONE = 0,

-    /* Linear, Rec.709 [-0.5, 7.5) */
-    NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR = 1,
+    NVKMS_INPUT_COLOR_SPACE_BT601 = 1,
+    NVKMS_INPUT_COLOR_SPACE_BT709 = 2,
+    NVKMS_INPUT_COLOR_SPACE_BT2020 = 3,
+    NVKMS_INPUT_COLOR_SPACE_BT2100 = NVKMS_INPUT_COLOR_SPACE_BT2020,

-    /* PQ, Rec.2020 unity */
-    NVKMS_INPUT_COLORSPACE_BT2100_PQ = 2,
+    NVKMS_INPUT_COLOR_SPACE_SCRGB = 4
+};
+
+enum NvKmsInputTf {
+    NVKMS_INPUT_TF_LINEAR = 0,
+    NVKMS_INPUT_TF_PQ = 1
 };

 enum NvKmsOutputColorimetry {
--- a/kernel-open/common/inc/nvkms-kapi.h
+++ b/kernel-open/common/inc/nvkms-kapi.h
@@ -24,8 +24,10 @@
 #if !defined(__NVKMS_KAPI_H__)

 #include "nvtypes.h"
+#include "nv_mig_types.h"

 #include "nv-gpu-info.h"
+#include "nv_dpy_id.h"
 #include "nvkms-api-types.h"
 #include "nvkms-format.h"

@@ -173,12 +175,18 @@ struct NvKmsKapiDeviceResourcesInfo {
        NvBool  supportsSyncpts;

        NvBool requiresVrrSemaphores;
+
+        NvBool  supportsInputColorRange;
+        NvBool  supportsInputColorSpace;
    } caps;

    NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
    NvBool supportsICtCp[NVKMS_KAPI_LAYER_MAX];

    struct NvKmsKapiLutCaps lutCaps;
+
+    NvU64 vtFbBaseAddress;
+    NvU64 vtFbSize;
 };

 #define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
@@ -204,6 +212,7 @@ struct NvKmsKapiConnectorInfo {
    NvU32        numIncompatibleConnectors;
    NvKmsKapiConnector incompatibleConnectorHandles[NVKMS_KAPI_MAX_CONNECTORS];

+    NVDpyIdList dynamicDpyIdList;
 };

 struct NvKmsKapiStaticDisplayInfo {
@@ -222,6 +231,8 @@ struct NvKmsKapiStaticDisplayInfo {
    NvKmsKapiDisplay possibleCloneHandles[NVKMS_KAPI_MAX_CLONE_DISPLAYS];

    NvU32 headMask;
+
+    NvBool isDpMST;
 };

 struct NvKmsKapiSyncParams {
@@ -260,7 +271,8 @@ struct NvKmsKapiLayerConfig {
        NvBool enabled;
    } hdrMetadata;

-    enum NvKmsOutputTf tf;
+    enum NvKmsInputTf inputTf;
+    enum NvKmsOutputTf outputTf;

    NvU8 minPresentInterval;
    NvBool tearing;
@@ -272,6 +284,7 @@ struct NvKmsKapiLayerConfig {
    NvU16 dstWidth, dstHeight;

    enum NvKmsInputColorSpace inputColorSpace;
+    enum NvKmsInputColorRange inputColorRange;

    struct {
        NvBool enabled;
@@ -315,7 +328,10 @@ struct NvKmsKapiLayerRequestedConfig {
        NvBool dstXYChanged            : 1;
        NvBool dstWHChanged            : 1;
        NvBool cscChanged              : 1;
-        NvBool tfChanged               : 1;
+        NvBool inputTfChanged          : 1;
+        NvBool outputTfChanged         : 1;
+        NvBool inputColorSpaceChanged  : 1;
+        NvBool inputColorRangeChanged  : 1;
        NvBool hdrMetadataChanged      : 1;
        NvBool matrixOverridesChanged  : 1;
        NvBool ilutChanged             : 1;
@@ -481,6 +497,8 @@ struct NvKmsKapiEvent {
 struct NvKmsKapiAllocateDeviceParams {
    /* [IN] GPU ID obtained from enumerateGpus() */
    NvU32 gpuId;
+    /* [IN] MIG device if requested */
+    MIGDeviceId migDevice;

    /* [IN] Private data of device allocator */
    void *privateData;
@@ -563,6 +581,11 @@ typedef enum NvKmsKapiRegisterWaiterResultRec {

 typedef void NvKmsKapiSuspendResumeCallbackFunc(NvBool suspend);

+struct NvKmsKapiGpuInfo {
+    nv_gpu_info_t gpuInfo;
+    MIGDeviceId   migDevice;
+};
+
 struct NvKmsKapiFunctionsTable {

    /*!
@@ -586,7 +609,7 @@ struct NvKmsKapiFunctionsTable {
     *
     * \return  Count of enumerated gpus.
     */
-    NvU32 (*enumerateGpus)(nv_gpu_info_t *gpuInfo);
+    NvU32 (*enumerateGpus)(struct NvKmsKapiGpuInfo *kapiGpuInfo);

    /*!
     * Allocate an NVK device using which you can query/allocate resources on
@@ -1559,6 +1582,26 @@ struct NvKmsKapiFunctionsTable {
        NvS32 index
    );

+    /*!
+     * Check or wait on a head's LUT notifier.
+     *
+     * \param [in]  device              A device allocated using allocateDevice().
+     *
+     * \param [in]  head                The head to check for LUT completion.
+     *
+     * \param [in]  waitForCompletion   If true, wait for the notifier in NvKms
+     *                                  before returning.
+     *
+     * \param [out] complete            Returns whether the notifier has completed.
+     */
+    NvBool
+    (*checkLutNotifier)
+    (
+        struct NvKmsKapiDevice *device,
+        NvU32 head,
+        NvBool waitForCompletion
+    );
+
    /*
     * Notify NVKMS that the system's framebuffer console has been disabled and
     * the reserved allocation for the old framebuffer console can be unmapped.
--- a/kernel-open/common/inc/nvmisc.h
+++ b/kernel-open/common/inc/nvmisc.h
@@ -701,11 +701,6 @@ nvPrevPow2_U64(const NvU64 x )
    }                                                       \
 }

-//
-// Bug 4851259: Newly added functions must be hidden from certain HS-signed
-// ucode compilers to avoid signature mismatch.
-//
-#ifndef NVDEC_1_0
 /*!
 * Returns the position of nth set bit in the given mask.
 *
@@ -735,8 +730,6 @@ nvGetNthSetBitIndex32(NvU32 mask, NvU32 n)
    return -1;
 }

-#endif // NVDEC_1_0
-
 //
 // Size to use when declaring variable-sized arrays
 //
@@ -780,12 +773,15 @@ nvGetNthSetBitIndex32(NvU32 mask, NvU32 n)
 // Returns the offset (in bytes) of 'member' in struct 'type'.
 #ifndef NV_OFFSETOF
    #if defined(__GNUC__) && (__GNUC__ > 3)
-        #define NV_OFFSETOF(type, member)   ((NvU32)__builtin_offsetof(type, member))
+        #define NV_OFFSETOF(type, member)   ((NvUPtr) __builtin_offsetof(type, member))
    #else
-        #define NV_OFFSETOF(type, member)    ((NvU32)(NvU64)&(((type *)0)->member)) // shouldn't we use PtrToUlong? But will need to include windows header.
+        #define NV_OFFSETOF(type, member)    ((NvUPtr) &(((type *)0)->member))
    #endif
 #endif

+// Given a pointer and the member it is of the parent struct, return a pointer to the parent struct
+#define NV_CONTAINEROF(ptr, type, member) ((type *) (((NvUPtr) ptr) - NV_OFFSETOF(type, member)))
+
 //
 // Performs a rounded division of b into a (unsigned). For SIGNED version of
 // NV_ROUNDED_DIV() macro check the comments in bug 769777.
--- a/kernel-open/common/inc/nvstatuscodes.h
+++ b/kernel-open/common/inc/nvstatuscodes.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -159,6 +159,11 @@ NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_NOT_READY,         0x00000081, "Nvlink Fabri
 NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_FAILURE,           0x00000082, "Nvlink Fabric Probe failed")
 NV_STATUS_CODE(NV_ERR_GPU_MEMORY_ONLINING_FAILURE,     0x00000083, "GPU Memory Onlining failed")
 NV_STATUS_CODE(NV_ERR_REDUCTION_MANAGER_NOT_AVAILABLE, 0x00000084, "Reduction Manager is not available")
+NV_STATUS_CODE(NV_ERR_THRESHOLD_CROSSED,               0x00000085, "A fatal threshold has been crossed")
+NV_STATUS_CODE(NV_ERR_RESOURCE_RETIREMENT_ERROR,       0x00000086, "An error occurred while trying to retire a resource")
+NV_STATUS_CODE(NV_ERR_FABRIC_STATE_OUT_OF_SYNC,        0x00000087, "NVLink fabric state cached by the driver is out of sync")
+NV_STATUS_CODE(NV_ERR_BUFFER_FULL,                     0x00000088, "Buffer is full")
+NV_STATUS_CODE(NV_ERR_BUFFER_EMPTY,                    0x00000089, "Buffer is empty")

 // Warnings:
 NV_STATUS_CODE(NV_WARN_HOT_SWITCH,                     0x00010001, "WARNING Hot switch")
@@ -169,5 +174,6 @@ NV_STATUS_CODE(NV_WARN_MORE_PROCESSING_REQUIRED,       0x00010005, "WARNING More
 NV_STATUS_CODE(NV_WARN_NOTHING_TO_DO,                  0x00010006, "WARNING Nothing to do")
 NV_STATUS_CODE(NV_WARN_NULL_OBJECT,                    0x00010007, "WARNING NULL object found")
 NV_STATUS_CODE(NV_WARN_OUT_OF_RANGE,                   0x00010008, "WARNING value out of range")
+NV_STATUS_CODE(NV_WARN_THRESHOLD_CROSSED,              0x00010009, "WARNING Threshold has been crossed")

 #endif /* SDK_NVSTATUSCODES_H */
--- a/kernel-open/common/inc/os-interface.h
+++ b/kernel-open/common/inc/os-interface.h
@@ -229,6 +229,7 @@ extern NvU64 os_page_mask;
 extern NvU8  os_page_shift;
 extern NvBool os_cc_enabled;
 extern NvBool os_cc_sev_snp_enabled;
+extern NvBool os_cc_sme_enabled;
 extern NvBool os_cc_snp_vtom_enabled;
 extern NvBool os_cc_tdx_enabled;
 extern NvBool os_dma_buf_enabled;
--- a/kernel-open/common/inc/os_dsi_panel_props.h
+++ b/kernel-open/common/inc/os_dsi_panel_props.h
@@ -0,0 +1,387 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _OS_DSI_PANEL_PARAMS_H_
+#define _OS_DSI_PANEL_PARAMS_H_
+
+#define DSI_GENERIC_LONG_WRITE                  0x29
+#define DSI_DCS_LONG_WRITE                      0x39
+#define DSI_GENERIC_SHORT_WRITE_1_PARAMS        0x13
+#define DSI_GENERIC_SHORT_WRITE_2_PARAMS        0x23
+#define DSI_DCS_WRITE_0_PARAM                   0x05
+#define DSI_DCS_WRITE_1_PARAM                   0x15
+#define DSI_DCS_READ_PARAM                      0x06
+#define DSI_DCS_COMPRESSION_MODE		        0x07
+#define DSI_DCS_PPS_LONG_WRITE			        0x0A
+
+#define DSI_DCS_SET_ADDR_MODE                   0x36
+#define DSI_DCS_EXIT_SLEEP_MODE                 0x11
+#define DSI_DCS_ENTER_SLEEP_MODE                0x10
+#define DSI_DCS_SET_DISPLAY_ON                  0x29
+#define DSI_DCS_SET_DISPLAY_OFF                 0x28
+#define DSI_DCS_SET_TEARING_EFFECT_OFF          0x34
+#define DSI_DCS_SET_TEARING_EFFECT_ON           0x35
+#define DSI_DCS_NO_OP                           0x0
+#define DSI_NULL_PKT_NO_DATA                    0x9
+#define DSI_BLANKING_PKT_NO_DATA                0x19
+#define DSI_DCS_SET_COMPRESSION_METHOD          0xC0
+
+/* DCS commands for command mode */
+#define DSI_ENTER_PARTIAL_MODE                  0x12
+#define DSI_SET_PIXEL_FORMAT                    0x3A
+#define DSI_AREA_COLOR_MODE                     0x4C
+#define DSI_SET_PARTIAL_AREA                    0x30
+#define DSI_SET_PAGE_ADDRESS                    0x2B
+#define DSI_SET_ADDRESS_MODE                    0x36
+#define DSI_SET_COLUMN_ADDRESS                  0x2A
+#define DSI_WRITE_MEMORY_START                  0x2C
+#define DSI_WRITE_MEMORY_CONTINUE               0x3C
+
+#define PKT_ID0(id)     ((((id) & 0x3f) << 3) | \
+                        (((DSI_ENABLE) & 0x1) << 9))
+#define PKT_LEN0(len)   (((len) & 0x7) << 0)
+#define PKT_ID1(id)     ((((id) & 0x3f) << 13) | \
+                        (((DSI_ENABLE) & 0x1) << 19))
+#define PKT_LEN1(len)   (((len) & 0x7) << 10)
+#define PKT_ID2(id)     ((((id) & 0x3f) << 23) | \
+                        (((DSI_ENABLE) & 0x1) << 29))
+#define PKT_LEN2(len)   (((len) & 0x7) << 20)
+#define PKT_ID3(id)     ((((id) & 0x3f) << 3) | \
+                        (((DSI_ENABLE) & 0x1) << 9))
+#define PKT_LEN3(len)   (((len) & 0x7) << 0)
+#define PKT_ID4(id)     ((((id) & 0x3f) << 13) | \
+                        (((DSI_ENABLE) & 0x1) << 19))
+#define PKT_LEN4(len)   (((len) & 0x7) << 10)
+#define PKT_ID5(id)     ((((id) & 0x3f) << 23) | \
+                        (((DSI_ENABLE) & 0x1) << 29))
+#define PKT_LEN5(len)   (((len) & 0x7) << 20)
+#define PKT_LP          (((DSI_ENABLE) & 0x1) << 30)
+#define NUMOF_PKT_SEQ   12
+
+/* DSI pixel data format, enum values should match with dt-bindings in tegra-panel.h */
+typedef enum
+{
+    DSI_PIXEL_FORMAT_16BIT_P,
+    DSI_PIXEL_FORMAT_18BIT_P,
+    DSI_PIXEL_FORMAT_18BIT_NP,
+    DSI_PIXEL_FORMAT_24BIT_P,
+    DSI_PIXEL_FORMAT_8BIT_DSC,
+    DSI_PIXEL_FORMAT_12BIT_DSC,
+    DSI_PIXEL_FORMAT_16BIT_DSC,
+    DSI_PIXEL_FORMAT_10BIT_DSC,
+    DSI_PIXEL_FORMAT_30BIT_P,
+    DSI_PIXEL_FORMAT_36BIT_P,
+} DSIPIXELFORMAT;
+
+/* DSI virtual channel number */
+typedef enum
+{
+    DSI_VIRTUAL_CHANNEL_0,
+    DSI_VIRTUAL_CHANNEL_1,
+    DSI_VIRTUAL_CHANNEL_2,
+    DSI_VIRTUAL_CHANNEL_3,
+} DSIVIRTUALCHANNEL;
+
+/* DSI transmit method for video data */
+typedef enum
+{
+    DSI_VIDEO_TYPE_VIDEO_MODE,
+    DSI_VIDEO_TYPE_COMMAND_MODE,
+} DSIVIDEODATAMODE;
+
+/* DSI HS clock mode */
+typedef enum
+{
+    DSI_VIDEO_CLOCK_CONTINUOUS,
+    DSI_VIDEO_CLOCK_TX_ONLY,
+} DSICLOCKMODE;
+
+/* DSI burst mode setting in video mode. Each mode is assigned with a
+ * fixed value. The rationale behind this is to avoid change of these
+ * values, since the calculation of dsi clock depends on them. */
+typedef enum
+{
+    DSI_VIDEO_NON_BURST_MODE = 0,
+    DSI_VIDEO_NON_BURST_MODE_WITH_SYNC_END = 1,
+    DSI_VIDEO_BURST_MODE_LOWEST_SPEED = 2,
+    DSI_VIDEO_BURST_MODE_LOW_SPEED = 3,
+    DSI_VIDEO_BURST_MODE_MEDIUM_SPEED = 4,
+    DSI_VIDEO_BURST_MODE_FAST_SPEED = 5,
+    DSI_VIDEO_BURST_MODE_FASTEST_SPEED = 6,
+} DSIVIDEOBURSTMODE;
+
+/* DSI Ganged Mode */
+typedef enum
+{
+    DSI_GANGED_SYMMETRIC_LEFT_RIGHT = 1,
+    DSI_GANGED_SYMMETRIC_EVEN_ODD = 2,
+    DSI_GANGED_SYMMETRIC_LEFT_RIGHT_OVERLAP = 3,
+} DSIGANGEDTYPE;
+
+typedef enum
+{
+    DSI_LINK0,
+    DSI_LINK1,
+} DSILINKNUM;
+
+/* DSI Command Packet type */
+typedef enum
+{
+    DSI_PACKET_CMD,
+    DSI_DELAY_MS,
+    DSI_GPIO_SET,
+    DSI_SEND_FRAME,
+    DSI_PACKET_VIDEO_VBLANK_CMD,
+    DSI_DELAY_US,
+} DSICMDPKTTYPE;
+
+/* DSI Phy type */
+typedef enum
+{
+    DSI_DPHY,
+    DSI_CPHY,
+} DSIPHYTYPE;
+
+enum {
+    DSI_GPIO_LCD_RESET,
+    DSI_GPIO_PANEL_EN,
+    DSI_GPIO_PANEL_EN_1,
+    DSI_GPIO_BL_ENABLE,
+    DSI_GPIO_BL_PWM,
+    DSI_GPIO_AVDD_AVEE_EN,
+    DSI_GPIO_VDD_1V8_LCD_EN,
+    DSI_GPIO_TE,
+    DSI_GPIO_BRIDGE_EN_0,
+    DSI_GPIO_BRIDGE_EN_1,
+    DSI_GPIO_BRIDGE_REFCLK_EN,
+    DSI_N_GPIO_PANEL, /* add new gpio above this entry */
+};
+
+enum
+{
+    DSI_DISABLE,
+    DSI_ENABLE,
+};
+
+
+typedef struct
+{
+    NvU8      cmd_type;
+    NvU8      data_id;
+    union
+    {
+        NvU16 data_len;
+        NvU16 delay_ms;
+        NvU16 delay_us;
+        NvU32 gpio;
+        NvU16 frame_cnt;
+        struct
+        {
+            NvU8 data0;
+            NvU8 data1;
+        } sp;
+    } sp_len_dly;
+    NvU32      *pdata;
+    NvU8      link_id;
+    NvBool    club_cmd;
+} DSI_CMD, *PDSICMD;
+
+typedef struct
+{
+    NvU16             t_hsdexit_ns;
+    NvU16             t_hstrail_ns;
+    NvU16             t_datzero_ns;
+    NvU16             t_hsprepare_ns;
+    NvU16             t_hsprebegin_ns;
+    NvU16             t_hspost_ns;
+
+    NvU16             t_clktrail_ns;
+    NvU16             t_clkpost_ns;
+    NvU16             t_clkzero_ns;
+    NvU16             t_tlpx_ns;
+
+    NvU16             t_clkprepare_ns;
+    NvU16             t_clkpre_ns;
+    NvU16             t_wakeup_ns;
+
+    NvU16             t_taget_ns;
+    NvU16             t_tasure_ns;
+    NvU16             t_tago_ns;
+} DSI_PHY_TIMING_IN_NS;
+
+typedef struct
+{
+    NvU32 hActive;
+    NvU32 vActive;
+    NvU32 hFrontPorch;
+    NvU32 vFrontPorch;
+    NvU32 hBackPorch;
+    NvU32 vBackPorch;
+    NvU32 hSyncWidth;
+    NvU32 vSyncWidth;
+    NvU32 hPulsePolarity;
+    NvU32 vPulsePolarity;
+    NvU32 pixelClkRate;
+} DSITIMINGS, *PDSITIMINGS;
+
+typedef struct
+{
+    NvU8              n_data_lanes;                   /* required */
+    NvU8              pixel_format;                   /* required */
+    NvU8              refresh_rate;                   /* required */
+    NvU8              rated_refresh_rate;
+    NvU8              panel_reset;                    /* required */
+    NvU8              virtual_channel;                /* required */
+    NvU8              dsi_instance;
+    NvU16             dsi_panel_rst_gpio;
+    NvU16             dsi_panel_bl_en_gpio;
+    NvU16             dsi_panel_bl_pwm_gpio;
+    NvU16             even_odd_split_width;
+    NvU8              controller_vs;
+
+    NvBool            panel_has_frame_buffer; /* required*/
+
+    /* Deprecated. Use DSI_SEND_FRAME panel command instead. */
+    NvBool            panel_send_dc_frames;
+
+    DSI_CMD           *dsi_init_cmd;          /* required */
+    NvU16             n_init_cmd;             /* required */
+    NvU32             *dsi_init_cmd_array;
+    NvU32             init_cmd_array_size;
+    NvBool            sendInitCmdsEarly;
+
+    DSI_CMD           *dsi_early_suspend_cmd;
+    NvU16             n_early_suspend_cmd;
+    NvU32             *dsi_early_suspend_cmd_array;
+    NvU32             early_suspend_cmd_array_size;
+
+    DSI_CMD           *dsi_late_resume_cmd;
+    NvU16             n_late_resume_cmd;
+    NvU32             *dsi_late_resume_cmd_array;
+    NvU32             late_resume_cmd_array_size;
+
+    DSI_CMD           *dsi_postvideo_cmd;
+    NvU16             n_postvideo_cmd;
+    NvU32             *dsi_postvideo_cmd_array;
+    NvU32             postvideo_cmd_array_size;
+
+    DSI_CMD           *dsi_suspend_cmd;               /* required */
+    NvU16             n_suspend_cmd;                  /* required */
+    NvU32             *dsi_suspend_cmd_array;
+    NvU32             suspend_cmd_array_size;
+
+    NvU8              video_data_type;                /* required */
+    NvU8              video_clock_mode;
+    NvU8              video_burst_mode;
+    NvU8              ganged_type;
+    NvU16             ganged_overlap;
+    NvBool            ganged_swap_links;
+    NvBool            ganged_write_to_all_links;
+    NvU8              split_link_type;
+
+    NvU8              suspend_aggr;
+
+    NvU16             panel_buffer_size_byte;
+    NvU16             panel_reset_timeout_msec;
+
+    NvBool            hs_cmd_mode_supported;
+    NvBool            hs_cmd_mode_on_blank_supported;
+    NvBool            enable_hs_clock_on_lp_cmd_mode;
+    NvBool            no_pkt_seq_eot; /* 1st generation panel may not
+                                       * support eot. Don't set it for
+                                       * most panels.*/
+    const NvU32       *pktSeq;
+    NvU32             *pktSeq_array;
+    NvU32             pktSeq_array_size;
+    NvBool            skip_dsi_pkt_header;
+    NvBool            power_saving_suspend;
+    NvBool            suspend_stop_stream_late;
+    NvBool            dsi2lvds_bridge_enable;
+    NvBool            dsi2edp_bridge_enable;
+
+    NvU32             max_panel_freq_khz;
+    NvU32             lp_cmd_mode_freq_khz;
+    NvU32             lp_read_cmd_mode_freq_khz;
+    NvU32             hs_clk_in_lp_cmd_mode_freq_khz;
+    NvU32             burst_mode_freq_khz;
+    NvU32             fpga_freq_khz;
+
+    NvU32             te_gpio;
+    NvBool            te_polarity_low;
+    NvBool            dsiEnVRR;
+    NvBool            dsiVrrPanelSupportsTe;
+    NvBool            dsiForceSetTePin;
+
+    int               panel_gpio[DSI_N_GPIO_PANEL];
+    NvBool            panel_gpio_populated;
+
+    NvU32             dpd_dsi_pads;
+
+    DSI_PHY_TIMING_IN_NS phyTimingNs;
+
+    NvU8              *bl_name;
+
+    NvBool            lp00_pre_panel_wakeup;
+    NvBool            ulpm_not_supported;
+    NvBool            use_video_host_fifo_for_cmd;
+    NvBool            dsi_csi_loopback;
+    NvBool            set_max_timeout;
+    NvBool            use_legacy_dphy_core;
+    // Swap P/N pins polarity of all data lanes
+    NvBool            swap_data_lane_polarity;
+    // Swap P/N pins polarity of clock lane
+    NvBool            swap_clock_lane_polarity;
+    // Reverse clock polarity for partition A/B. 1st SOT bit goes on negedge of Clock lane
+    NvBool            reverse_clock_polarity;
+    // DSI Lane Crossbar. Allocating xbar array for max number of lanes
+    NvBool            lane_xbar_exists;
+    NvU32             lane_xbar_ctrl[8];
+    NvU32             refresh_rate_adj;
+
+    NvU8              dsiPhyType;
+    NvBool            en_data_scrambling;
+
+    NvU32             dsipll_vco_rate_hz;
+    NvU32             dsipll_clkoutpn_rate_hz;
+    NvU32             dsipll_clkouta_rate_hz;
+    NvU32             vpll0_rate_hz;
+
+    DSITIMINGS        dsiTimings;
+
+    // DSC Parameters
+    NvBool            dsiDscEnable;
+    NvU32             dsiDscBpp;
+    NvU32             dsiDscNumSlices;
+    NvU32             dsiDscSliceWidth;
+    NvU32             dsiDscSliceHeight;
+    NvBool            dsiDscEnBlockPrediction;
+    NvBool            dsiDscEnDualDsc;
+    NvU32             dsiDscDecoderMajorVersion;
+    NvU32             dsiDscDecoderMinorVersion;
+    NvBool            dsiDscUseCustomPPS;
+    NvU32             dsiDscCustomPPSData[32];
+
+    // Driver allocates memory for PPS cmd to be sent to Panel
+    NvBool            ppsCmdMemAllocated;
+} DSI_PANEL_INFO;
+
+#endif
--- a/kernel-open/common/inc/os_gpio.h
+++ b/kernel-open/common/inc/os_gpio.h
@@ -0,0 +1,32 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2020 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _OS_GPIO_H_
+#define _OS_GPIO_H_
+
+typedef enum
+{
+    NV_OS_GPIO_FUNC_HOTPLUG_A,
+    NV_OS_GPIO_FUNC_HOTPLUG_B,
+} NV_OS_GPIO_FUNC_NAMES;
+
+#endif
--- a/kernel-open/common/inc/rm-gpu-ops.h
+++ b/kernel-open/common/inc/rm-gpu-ops.h
@@ -81,9 +81,9 @@ NV_STATUS  NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack
 NV_STATUS  NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
 NV_STATUS  NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
-NV_STATUS  NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, nvgpuAccessCntrConfig_t);
+NV_STATUS  NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, const nvgpuAccessCntrConfig_t *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_disable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
-NV_STATUS  NV_API_CALL  rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32);
+NV_STATUS  NV_API_CALL  rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32, NvU64 *);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_unset_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_get_nvlink_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuNvlinkInfo_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_p2p_object_create(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuDeviceHandle_t, NvHandle *);
--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
@@ -662,27 +662,6 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_PCI_GET_DOMAIN_BUS_AND_SLOT_PRESENT" "" "functions"
        ;;

-        hash__remap_4k_pfn)
-            #
-            # Determine if the hash__remap_4k_pfn() function is
-            # present.
-            #
-            # Added by commit 6cc1a0ee4ce2 ("powerpc/mm/radix: Add radix
-            # callback for pmd accessors") in v4.7 (committed 2016-04-29).
-            # Present only in arch/powerpc
-            #
-            CODE="
-            #if defined(NV_ASM_BOOK3S_64_HASH_64K_H_PRESENT)
-            #include <linux/mm.h>
-            #include <asm/book3s/64/hash-64k.h>
-            #endif
-            void conftest_hash__remap_4k_pfn(void) {
-                hash__remap_4k_pfn();
-            }"
-
-            compile_check_conftest "$CODE" "NV_HASH__REMAP_4K_PFN_PRESENT" "" "functions"
-        ;;
-
        register_cpu_notifier)
            #
            # Determine if register_cpu_notifier() is present
@@ -1633,7 +1612,6 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_PHYS_TO_DMA_PRESENT" "" "functions"
        ;;

-
        dma_attr_macros)
           #
           # Determine if the NV_DMA_ATTR_SKIP_CPU_SYNC_PRESENT macro present.
@@ -2441,6 +2419,45 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_DRM_ATOMIC_HELPER_LEGACY_GAMMA_SET_PRESENT" "" "functions"
        ;;

+        drm_plane_create_color_properties)
+            #
+            # Determine if the function drm_plane_create_color_properties() is
+            # present.
+            #
+            # Added by commit 80f690e9e3a6 ("drm: Add optional COLOR_ENCODING
+            # and COLOR_RANGE properties to drm_plane") in v4.17 (2018-02-19).
+            #
+            CODE="
+            #include <linux/types.h>
+            #if defined(NV_DRM_DRM_COLOR_MGMT_H_PRESENT)
+            #include <drm/drm_color_mgmt.h>
+            #endif
+            void conftest_drm_plane_create_color_properties(void) {
+                drm_plane_create_color_properties();
+            }"
+
+            compile_check_conftest "$CODE" "NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT" "" "functions"
+        ;;
+
+        drm_format_info_has_is_yuv)
+            #
+            # Determine if struct drm_format_info has .is_yuv member.
+            #
+            # Added by commit ce2d54619a10 ("drm/fourcc: Add is_yuv field to
+            # drm_format_info to denote if format is yuv") in v4.19
+            # (2018-07-17).
+            #
+            CODE="
+            #if defined(NV_DRM_DRM_FOURCC_H_PRESENT)
+            #include <drm/drm_fourcc.h>
+            #endif
+            int conftest_drm_format_info_has_is_yuv(void) {
+                return offsetof(struct drm_format_info, is_yuv);
+            }"
+
+            compile_check_conftest "$CODE" "NV_DRM_FORMAT_INFO_HAS_IS_YUV" "" "types"
+        ;;
+
        pci_stop_and_remove_bus_device)
            #
            # Determine if the pci_stop_and_remove_bus_device() function is present.
@@ -3519,60 +3536,6 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_VM_OPS_FAULT_REMOVED_VMA_ARG" "" "types"
        ;;

-        pnv_npu2_init_context)
-            #
-            # Determine if the pnv_npu2_init_context() function is
-            # present and the signature of its callback.
-            #
-            # Added by commit 1ab66d1fbada ("powerpc/powernv: Introduce
-            # address translation services for Nvlink2") in v4.12
-            # (2017-04-03).
-            #
-            echo "$CONFTEST_PREAMBLE
-            #if defined(NV_ASM_POWERNV_H_PRESENT)
-            #include <linux/pci.h>
-            #include <asm/powernv.h>
-            #endif
-            void conftest_pnv_npu2_init_context(void) {
-                pnv_npu2_init_context();
-            }" > conftest$$.c
-
-            $CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
-            rm -f conftest$$.c
-            if [ -f conftest$$.o ]; then
-                echo "#undef NV_PNV_NPU2_INIT_CONTEXT_PRESENT" | append_conftest "functions"
-                echo "#undef NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID" | append_conftest "functions"
-                rm -f conftest$$.o
-                return
-            fi
-
-            echo "#define NV_PNV_NPU2_INIT_CONTEXT_PRESENT" | append_conftest "functions"
-
-            # Check the callback signature
-            echo "$CONFTEST_PREAMBLE
-            #if defined(NV_ASM_POWERNV_H_PRESENT)
-            #include <linux/pci.h>
-            #include <asm/powernv.h>
-            #endif
-
-            struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
-                unsigned long flags,
-                void (*cb)(struct npu_context *, void *),
-                void *priv) {
-                return NULL;
-            }" > conftest$$.c
-
-            $CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
-            rm -f conftest$$.c
-            if [ -f conftest$$.o ]; then
-                echo "#define NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID" | append_conftest "functions"
-                rm -f conftest$$.o
-                return
-            fi
-
-            echo "#undef NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID" | append_conftest "functions"
-        ;;
-
        of_get_ibm_chip_id)
            #
            # Determine if the of_get_ibm_chip_id() function is present.
@@ -5289,6 +5252,45 @@ compile_test() {

            compile_check_conftest "$CODE" "NV_FOLLOW_PFN_PRESENT" "" "functions"
        ;;
+
+        follow_pte_arg_vma)
+            #
+            # Determine if the first argument of follow_pte is
+            # mm_struct or vm_area_struct.
+            #
+            # The first argument was changed from mm_struct to vm_area_struct by
+            # commit 29ae7d96d166 ("mm: pass VMA instead of MM to follow_pte()")
+            #
+            CODE="
+            #include <linux/mm.h>
+
+            typeof(follow_pte) conftest_follow_pte_has_vma_arg;
+            int conftest_follow_pte_has_vma_arg(struct vm_area_struct *vma,
+                                                unsigned long address,
+                                                pte_t **ptep,
+                                                spinlock_t **ptl) {
+                return 0;
+            }"
+
+            compile_check_conftest "$CODE" "NV_FOLLOW_PTE_ARG1_VMA" "" "types"
+        ;;
+
+        ptep_get)
+            #
+            # Determine if ptep_get() is present.
+            #
+            # ptep_get() was added by commit 481e980a7c19
+            # ("mm: Allow arches to provide ptep_get()")
+            #
+            CODE="
+            #include <linux/mm.h>
+            void conftest_ptep_get(void) {
+                ptep_get();
+            }"
+
+            compile_check_conftest "$CODE" "NV_PTEP_GET_PRESENT" "" "functions"
+        ;;
+
        drm_plane_atomic_check_has_atomic_state_arg)
            #
            # Determine if drm_plane_helper_funcs::atomic_check takes 'state'
@@ -5478,6 +5480,31 @@ compile_test() {
            fi
        ;;

+        of_property_for_each_u32_has_internal_args)
+            #
+            # Determine if the internal arguments for the macro
+            # of_property_for_each_u32() are present.
+            #
+            # Commit 9722c3b66e21 ("of: remove internal arguments from
+            # of_property_for_each_u32()") removes two arguments from
+            # of_property_for_each_u32() which are used internally within
+            # the macro and so do not need to be passed. This change was
+            # made for Linux v6.11.
+            #
+            CODE="
+            #include <linux/of.h>
+            void conftest_of_property_for_each_u32(struct device_node *np,
+                                                   char *propname) {
+                struct property *iparam1;
+                const __be32 *iparam2;
+                u32 val;
+
+                of_property_for_each_u32(np, propname, iparam1, iparam2, val);
+            }"
+
+            compile_check_conftest "$CODE" "NV_OF_PROPERTY_FOR_EACH_U32_HAS_INTERNAL_ARGS" "" "types"
+        ;;
+
        of_property_read_variable_u8_array)
            #
            # Determine if of_property_read_variable_u8_array is present
@@ -5574,8 +5601,8 @@ compile_test() {

        of_dma_configure)
            #
-            # Determine if of_dma_configure() function is present, and how
-            # many arguments it takes.
+            # Determine if of_dma_configure() function is present, if it
+            # returns int, and how many arguments it takes.
            #
            # Added by commit 591c1ee465ce ("of: configure the platform
            # device dma parameters") in v3.16.  However, it was a static,
@@ -5585,6 +5612,10 @@ compile_test() {
            # commit 1f5c69aa51f9 ("of: Move of_dma_configure() to device.c
            # to help re-use") in v4.1.
            #
+            # Its return type was changed from void to int by commit
+            # 7b07cbefb68d ("iommu: of: Handle IOMMU lookup failure with
+            # deferred probing or error") in v4.12.
+            #
            # It subsequently began taking a third parameter with commit
            # 3d6ce86ee794 ("drivers: remove force dma flag from buses")
            # in v4.18.
@@ -5609,6 +5640,7 @@ compile_test() {

                echo "#undef NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
                echo "#undef NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT" | append_conftest "functions"
+                echo "#undef NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
            else
                echo "#define NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"

@@ -5627,6 +5659,26 @@ compile_test() {
                if [ -f conftest$$.o ]; then
                    rm -f conftest$$.o
                    echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 3" | append_conftest "functions"
+
+                    echo "$CONFTEST_PREAMBLE
+                    #if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
+                    #include <linux/of_device.h>
+                    #endif
+
+                    int conftest_of_dma_configure_has_int_return_type(void) {
+                        return of_dma_configure(NULL, NULL, false);
+                    }" > conftest$$.c
+
+                    $CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
+                    rm -f conftest$$.c
+
+                    if [ -f conftest$$.o ]; then
+                        rm -f conftest$$.o
+                        echo "#define NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
+                    else
+                        echo "#undef NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
+                    fi
+
                    return
                fi

@@ -5645,6 +5697,26 @@ compile_test() {
                if [ -f conftest$$.o ]; then
                    rm -f conftest$$.o
                    echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 2" | append_conftest "functions"
+
+                    echo "$CONFTEST_PREAMBLE
+                    #if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
+                    #include <linux/of_device.h>
+                    #endif
+
+                    int conftest_of_dma_configure_has_int_return_type(void) {
+                        return of_dma_configure(NULL, NULL);
+                    }" > conftest$$.c
+
+                    $CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
+                    rm -f conftest$$.c
+
+                    if [ -f conftest$$.o ]; then
+                        rm -f conftest$$.o
+                        echo "#define NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
+                    else
+                        echo "#undef NV_OF_DMA_CONFIGURE_HAS_INT_RETURN_TYPE" | append_conftest "functions"
+                    fi
+
                    return
                fi
            fi
@@ -7523,6 +7595,34 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_FOLIO_TEST_SWAPCACHE_PRESENT" "" "functions"
        ;;

+        platform_driver_struct_remove_returns_void)
+            #
+            # Determine if the 'platform_driver' structure 'remove' function
+            # pointer returns void.
+            #
+            # Commit 0edb555a65d1 ("platform: Make platform_driver::remove()
+            # return void") updated the platform_driver structure 'remove'
+            # callback to return void instead of int in Linux v6.11-rc1.
+            #
+            echo "$CONFTEST_PREAMBLE
+            #include <linux/platform_device.h>
+            int conftest_platform_driver_struct_remove_returns_void(struct platform_device *pdev,
+                                                                    struct platform_driver *driver) {
+                return driver->remove(pdev);
+            }" > conftest$$.c
+
+            $CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
+            rm -f conftest$$.c
+
+            if [ -f conftest$$.o ]; then
+                rm -f conftest$$.o
+
+                echo "#undef NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" | append_conftest "types"
+            else
+                echo "#define NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" | append_conftest "types"
+            fi
+            ;;
+
        module_import_ns_takes_constant)
            #
            # Determine if the MODULE_IMPORT_NS macro takes a string literal
@@ -7540,6 +7640,62 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_MODULE_IMPORT_NS_TAKES_CONSTANT" "" "generic"
        ;;

+        assign_str)
+            #
+            # Determine whether the __assign_str() macro, used in tracepoint
+            # event definitions, has the 'src' parameter.
+            #
+            # The 'src' parameter was removed by commit 2c92ca849fcc
+            # ("tracing/treewide: Remove second parameter of __assign_str()") in
+            # v6.10.
+            #
+            # The expected usage of __assign_str() inside the TRACE_EVENT()
+            # macro, which involves multiple include passes and assumes it is
+            # in a header file, requires a non-standard conftest approach of
+            # producing both a header and a C file.
+            #
+            echo "$CONFTEST_PREAMBLE
+            #undef TRACE_SYSTEM
+            #define TRACE_SYSTEM conftest
+
+            #if !defined(_TRACE_CONFTEST_H) || defined(TRACE_HEADER_MULTI_READ)
+            #define _TRACE_CONFTEST_H
+            #include <linux/tracepoint.h>
+            TRACE_EVENT(conftest,
+                TP_PROTO(const char *s),
+                TP_ARGS(s),
+                TP_STRUCT__entry(__string(s, s)),
+                TP_fast_assign(__assign_str(s);),
+                TP_printk(\"%s\", __get_str(s))
+            );
+            #endif
+
+            #undef TRACE_INCLUDE_PATH
+            #define TRACE_INCLUDE_PATH .
+            #define TRACE_INCLUDE_FILE conftest$$
+            #include <trace/define_trace.h>
+            " > conftest$$.h
+
+            echo "$CONFTEST_PREAMBLE
+            #define CREATE_TRACE_POINTS
+            #include \"conftest$$.h\"
+
+            void conftest_assign_str(void) {
+                trace_conftest(\"conftest\");
+            }
+            " > conftest$$.c
+
+            $CC $CFLAGS -c conftest$$.c >/dev/null 2>&1
+            rm -f conftest$$.c conftest$$.h
+
+            if [ -f conftest$$.o ]; then
+                rm -f conftest$$.o
+
+                echo "#define NV_ASSIGN_STR_ARGUMENT_COUNT 1" | append_conftest "functions"
+            else
+                echo "#define NV_ASSIGN_STR_ARGUMENT_COUNT 2" | append_conftest "functions"
+            fi
+        ;;

        drm_driver_has_date)
            #
@@ -7565,6 +7721,33 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_DRM_DRIVER_HAS_DATE" "" "types"
        ;;

+        drm_connector_helper_funcs_mode_valid_has_const_mode_arg)
+            #
+            # Determine if the 'mode' pointer argument is const in
+            # drm_connector_helper_funcs::mode_valid.
+            #
+            # The 'mode' pointer argument in
+            # drm_connector_helper_funcs::mode_valid was made const by commit
+            # 26d6fd81916e ("drm/connector: make mode_valid take a const struct
+            # drm_display_mode") in linux-next, expected in v6.15.
+            #
+            CODE="
+            #if defined(NV_DRM_DRM_ATOMIC_HELPER_H_PRESENT)
+            #include <drm/drm_atomic_helper.h>
+            #endif
+
+            static int conftest_drm_connector_mode_valid(struct drm_connector *connector,
+                                                         const struct drm_display_mode *mode) {
+                return 0;
+            }
+
+            const struct drm_connector_helper_funcs conftest_drm_connector_helper_funcs = {
+                .mode_valid = conftest_drm_connector_mode_valid,
+            };"
+
+            compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG" "" "types"
+        ;;
+
        # When adding a new conftest entry, please use the correct format for
        # specifying the relevant upstream Linux kernel commit.  Please
        # avoid specifying -rc kernels, and only use SHAs that actually exist
--- a/kernel-open/header-presence-tests.mk
+++ b/kernel-open/header-presence-tests.mk
@@ -14,6 +14,7 @@ NV_HEADER_PRESENCE_TESTS = \
  drm/drm_encoder.h \
  drm/drm_atomic_uapi.h \
  drm/drm_drv.h \
+  drm/drm_edid.h \
  drm/drm_fbdev_generic.h \
  drm/drm_fbdev_ttm.h \
  drm/drm_client_setup.h \
@@ -65,13 +66,10 @@ NV_HEADER_PRESENCE_TESTS = \
  linux/nvhost.h \
  linux/nvhost_t194.h \
  linux/host1x-next.h \
-  asm/book3s/64/hash-64k.h \
  asm/set_memory.h \
  asm/prom.h \
-  asm/powernv.h \
  linux/atomic.h \
  asm/barrier.h \
-  asm/opal-api.h \
  sound/hdaudio.h \
  asm/pgtable_types.h \
  asm/page.h \
--- a/kernel-open/nvidia-drm/nvidia-drm-conftest.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-conftest.h
@@ -62,6 +62,20 @@
 #undef NV_DRM_FENCE_AVAILABLE
 #endif

+#if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) &&                             \
+    defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
+#define NV_DRM_FBDEV_AVAILABLE
+#define NV_DRM_FBDEV_GENERIC_AVAILABLE
+#endif
+
+#if defined(NV_DRM_FBDEV_TTM_SETUP_PRESENT) &&                                 \
+    defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
+#if IS_ENABLED(CONFIG_DRM_TTM_HELPER)
+#define NV_DRM_FBDEV_AVAILABLE
+#define NV_DRM_FBDEV_TTM_AVAILABLE
+#endif
+#endif
+
 #if defined(NV_DRM_CLIENT_SETUP_PRESENT) &&                                    \
    (defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT) ||   \
     defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT))
--- a/kernel-open/nvidia-drm/nvidia-drm-connector.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-connector.c
@@ -314,7 +314,11 @@ static int nv_drm_connector_get_modes(struct drm_connector *connector)
 }

 static int nv_drm_connector_mode_valid(struct drm_connector    *connector,
+#if defined(NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG)
+                                       const struct drm_display_mode *mode)
+#else
                                       struct drm_display_mode *mode)
+#endif
 {
    struct drm_device *dev = connector->dev;
    struct nv_drm_device *nv_dev = to_nv_device(dev);
--- a/kernel-open/nvidia-drm/nvidia-drm-crtc.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-crtc.c
@@ -372,23 +372,88 @@ cursor_plane_req_config_update(struct drm_plane *plane,
        old_config.dstY != req_config->dstY;
 }

-static void free_drm_lut_surface(struct kref *ref)
+static void release_drm_nvkms_surface(struct nv_drm_nvkms_surface *drm_nvkms_surface)
 {
-    struct nv_drm_lut_surface *drm_lut_surface =
-        container_of(ref, struct nv_drm_lut_surface, refcount);
-    struct NvKmsKapiDevice *pDevice = drm_lut_surface->pDevice;
+    struct NvKmsKapiDevice *pDevice = drm_nvkms_surface->pDevice;

-    BUG_ON(drm_lut_surface->nvkms_surface == NULL);
-    BUG_ON(drm_lut_surface->nvkms_memory == NULL);
-    BUG_ON(drm_lut_surface->buffer == NULL);
+    BUG_ON(drm_nvkms_surface->nvkms_surface == NULL);
+    BUG_ON(drm_nvkms_surface->nvkms_memory == NULL);
+    BUG_ON(drm_nvkms_surface->buffer == NULL);

-    nvKms->destroySurface(pDevice, drm_lut_surface->nvkms_surface);
-    nvKms->unmapMemory(pDevice, drm_lut_surface->nvkms_memory,
+    nvKms->destroySurface(pDevice, drm_nvkms_surface->nvkms_surface);
+    nvKms->unmapMemory(pDevice, drm_nvkms_surface->nvkms_memory,
                       NVKMS_KAPI_MAPPING_TYPE_KERNEL,
-                       drm_lut_surface->buffer);
-    nvKms->freeMemory(pDevice, drm_lut_surface->nvkms_memory);
+                       drm_nvkms_surface->buffer);
+    nvKms->freeMemory(pDevice, drm_nvkms_surface->nvkms_memory);
+}

-    nv_drm_free(drm_lut_surface);
+static int init_drm_nvkms_surface(struct nv_drm_device *nv_dev,
+    struct nv_drm_nvkms_surface *drm_nvkms_surface,
+    struct nv_drm_nvkms_surface_params *surface_params)
+{
+    struct NvKmsKapiDevice *pDevice = nv_dev->pDevice;
+    NvU8 compressible = 0; // No compression
+
+    struct NvKmsKapiCreateSurfaceParams params = {};
+    struct NvKmsKapiMemory *surface_mem;
+    struct NvKmsKapiSurface *surface;
+    void *buffer;
+
+    params.format = surface_params->format;
+    params.width = surface_params->width;
+    params.height = surface_params->height;
+
+    /* Allocate displayable memory. */
+    if (nv_dev->hasVideoMemory) {
+        surface_mem =
+            nvKms->allocateVideoMemory(pDevice,
+                                       NvKmsSurfaceMemoryLayoutPitch,
+                                       NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
+                                       surface_params->surface_size,
+                                       &compressible);
+    } else {
+        surface_mem =
+            nvKms->allocateSystemMemory(pDevice,
+                                        NvKmsSurfaceMemoryLayoutPitch,
+                                        NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
+                                        surface_params->surface_size,
+                                        &compressible);
+    }
+    if (surface_mem == NULL) {
+        return -ENOMEM;
+    }
+
+    /* Map memory in order to populate it. */
+    if (!nvKms->mapMemory(pDevice, surface_mem,
+                          NVKMS_KAPI_MAPPING_TYPE_KERNEL,
+                          &buffer)) {
+        nvKms->freeMemory(pDevice, surface_mem);
+        return -ENOMEM;
+    }
+
+    params.planes[0].memory = surface_mem;
+    params.planes[0].offset = 0;
+    params.planes[0].pitch = surface_params->surface_size;
+
+    /* Create surface. */
+    surface = nvKms->createSurface(pDevice, &params);
+    if (surface == NULL) {
+        nvKms->unmapMemory(pDevice, surface_mem,
+                           NVKMS_KAPI_MAPPING_TYPE_KERNEL, buffer);
+        nvKms->freeMemory(pDevice, surface_mem);
+        return -ENOMEM;
+    }
+
+    /* Pack into struct nv_drm_nvkms_surface. */
+    drm_nvkms_surface->pDevice = pDevice;
+    drm_nvkms_surface->nvkms_memory = surface_mem;
+    drm_nvkms_surface->nvkms_surface = surface;
+    drm_nvkms_surface->buffer = buffer;
+
+    /* Init refcount. */
+    kref_init(&drm_nvkms_surface->refcount);
+
+    return 0;
 }

 static struct nv_drm_lut_surface *alloc_drm_lut_surface(
@@ -399,86 +464,49 @@ static struct nv_drm_lut_surface *alloc_drm_lut_surface(
    NvU32 num_vss_header_entries,
    NvU32 num_entries)
 {
-    struct NvKmsKapiDevice *pDevice = nv_dev->pDevice;
    struct nv_drm_lut_surface *drm_lut_surface;
-    NvU8 compressible = 0; // No compression
-    size_t size =
+    const size_t surface_size =
        (((num_vss_header_entries + num_entries) *
          NVKMS_LUT_CAPS_LUT_ENTRY_SIZE) + 255) & ~255; // 256-byte aligned

-    struct NvKmsKapiMemory *surface_mem;
-    struct NvKmsKapiSurface *surface;
-    struct NvKmsKapiCreateSurfaceParams params = {};
-    NvU16 *lut_data;
+    struct nv_drm_nvkms_surface_params params = {};

-    /* Allocate displayable memory. */
-    if (nv_dev->hasVideoMemory) {
-        surface_mem =
-            nvKms->allocateVideoMemory(pDevice,
-                                       NvKmsSurfaceMemoryLayoutPitch,
-                                       NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
-                                       size,
-                                       &compressible);
-    } else {
-        surface_mem =
-            nvKms->allocateSystemMemory(pDevice,
-                                        NvKmsSurfaceMemoryLayoutPitch,
-                                        NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
-                                        size,
-                                        &compressible);
-    }
-    if (surface_mem == NULL) {
-        return NULL;
-    }
-
-    /* Map memory in order to populate it. */
-    if (!nvKms->mapMemory(pDevice, surface_mem,
-                          NVKMS_KAPI_MAPPING_TYPE_KERNEL,
-                          (void **) &lut_data)) {
-        nvKms->freeMemory(pDevice, surface_mem);
-        return NULL;
-    }
-
-    /* Create surface. */
    params.format = NvKmsSurfaceMemoryFormatR16G16B16A16;
    params.width = num_vss_header_entries + num_entries;
    params.height = 1;
-    params.planes[0].memory = surface_mem;
-    params.planes[0].offset = 0;
-    params.planes[0].pitch = size;
+    params.surface_size = surface_size;

-    surface = nvKms->createSurface(pDevice, &params);
-    if (surface == NULL) {
-        nvKms->unmapMemory(pDevice, surface_mem,
-                           NVKMS_KAPI_MAPPING_TYPE_KERNEL, (void *) lut_data);
-        nvKms->freeMemory(pDevice, surface_mem);
-        return NULL;
-    }
-
-    /* Pack into struct nv_drm_lut_surface. */
    drm_lut_surface = nv_drm_calloc(1, sizeof(struct nv_drm_lut_surface));
    if (drm_lut_surface == NULL) {
-        nvKms->destroySurface(pDevice, surface);
-        nvKms->unmapMemory(pDevice, surface_mem,
-                           NVKMS_KAPI_MAPPING_TYPE_KERNEL, (void *) lut_data);
-        nvKms->freeMemory(pDevice, surface_mem);
        return NULL;
    }
-    drm_lut_surface->pDevice = pDevice;
-    drm_lut_surface->nvkms_memory = surface_mem;
-    drm_lut_surface->nvkms_surface = surface;
-    drm_lut_surface->buffer = lut_data;
+
+    if (init_drm_nvkms_surface(nv_dev, &drm_lut_surface->base, &params) != 0) {
+        nv_drm_free(drm_lut_surface);
+        return NULL;
+    }
+
    drm_lut_surface->properties.vssSegments = num_vss_header_segments;
    drm_lut_surface->properties.vssType = vss_type;
    drm_lut_surface->properties.lutEntries = num_entries;
    drm_lut_surface->properties.entryFormat = entry_format;

-    /* Init refcount. */
-    kref_init(&drm_lut_surface->refcount);
-
    return drm_lut_surface;
 }

+static void free_drm_lut_surface(struct kref *ref)
+{
+    struct nv_drm_nvkms_surface *drm_nvkms_surface =
+        container_of(ref, struct nv_drm_nvkms_surface, refcount);
+    struct nv_drm_lut_surface *drm_lut_surface =
+        container_of(drm_nvkms_surface, struct nv_drm_lut_surface, base);
+
+    // Clean up base
+    release_drm_nvkms_surface(drm_nvkms_surface);
+
+    nv_drm_free(drm_lut_surface);
+}
+
 static NvU32 fp32_lut_interp(
    NvU16 entry0,
    NvU16 entry1,
@@ -582,7 +610,7 @@ static struct nv_drm_lut_surface *create_drm_ilut_surface_vss(
        return NULL;
    }

-    lut_data = (NvU16 *) drm_lut_surface->buffer;
+    lut_data = (NvU16 *) drm_lut_surface->base.buffer;

    /* Calculate VSS header. */
    if (vss_header_seg_sizes != NULL) {
@@ -733,7 +761,7 @@ static struct nv_drm_lut_surface *create_drm_ilut_surface_legacy(
        return NULL;
    }

-    lut_data = (NvU16 *) drm_lut_surface->buffer;
+    lut_data = (NvU16 *) drm_lut_surface->base.buffer;

    /* Fill LUT surface. */
    for (entry_idx = 0; entry_idx < NVKMS_LUT_ARRAY_SIZE; entry_idx++) {
@@ -799,7 +827,7 @@ static struct nv_drm_lut_surface *create_drm_tmo_surface(
        return NULL;
    }

-    lut_data = (NvU16 *) drm_lut_surface->buffer;
+    lut_data = (NvU16 *) drm_lut_surface->base.buffer;

    /* Calculate linear VSS header. */
    for (entry_idx = 0; entry_idx < NUM_VSS_HEADER_ENTRIES; entry_idx++) {
@@ -901,7 +929,7 @@ static struct nv_drm_lut_surface *create_drm_olut_surface_vss(
        return NULL;
    }

-    lut_data = (NvU16 *) drm_lut_surface->buffer;
+    lut_data = (NvU16 *) drm_lut_surface->base.buffer;

    /* Calculate VSS header. */
    if (vss_header_seg_sizes != NULL) {
@@ -1021,7 +1049,7 @@ static struct nv_drm_lut_surface *create_drm_olut_surface_legacy(
        return NULL;
    }

-    lut_data = (NvU16 *) drm_lut_surface->buffer;
+    lut_data = (NvU16 *) drm_lut_surface->base.buffer;

    /* Fill LUT surface. */
    for (entry_idx = 0; entry_idx < NVKMS_LUT_ARRAY_SIZE; entry_idx++) {
@@ -1057,6 +1085,74 @@ update_matrix_override(struct drm_property_blob *blob,
    return enabled;
 }

+static enum NvKmsInputColorSpace nv_get_nvkms_input_colorspace(
+    enum nv_drm_input_color_space colorSpace)
+{
+    switch (colorSpace) {
+        case NV_DRM_INPUT_COLOR_SPACE_NONE:
+            return NVKMS_INPUT_COLOR_SPACE_NONE;
+        case NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR:
+            return NVKMS_INPUT_COLOR_SPACE_BT709;
+        case NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ:
+            return NVKMS_INPUT_COLOR_SPACE_BT2100;
+        default:
+            /* We shouldn't hit this */
+            WARN_ON("Unsupported input colorspace");
+            return NVKMS_INPUT_COLOR_SPACE_NONE;
+    }
+}
+
+static enum NvKmsInputTf nv_get_nvkms_input_tf(
+    enum nv_drm_input_color_space colorSpace)
+{
+    switch (colorSpace) {
+        case NV_DRM_INPUT_COLOR_SPACE_NONE:
+            return NVKMS_INPUT_TF_LINEAR;
+        case NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR:
+            return NVKMS_INPUT_TF_LINEAR;
+        case NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ:
+            return NVKMS_INPUT_TF_PQ;
+        default:
+            /* We shouldn't hit this */
+            WARN_ON("Unsupported input colorspace");
+            return NVKMS_INPUT_TF_LINEAR;
+    }
+}
+
+#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
+static enum NvKmsInputColorSpace nv_drm_color_encoding_to_nvkms_colorspace(
+    enum drm_color_encoding color_encoding)
+{
+    switch(color_encoding) {
+        case DRM_COLOR_YCBCR_BT601:
+            return NVKMS_INPUT_COLOR_SPACE_BT601;
+        case DRM_COLOR_YCBCR_BT709:
+            return NVKMS_INPUT_COLOR_SPACE_BT709;
+        case DRM_COLOR_YCBCR_BT2020:
+            return NVKMS_INPUT_COLOR_SPACE_BT2020;
+        default:
+            /* We shouldn't hit this */
+            WARN_ON("Unsupported DRM color_encoding");
+            return NVKMS_INPUT_COLOR_SPACE_NONE;
+    }
+}
+
+static enum NvKmsInputColorRange nv_drm_color_range_to_nvkms_color_range(
+    enum drm_color_range color_range)
+{
+    switch(color_range) {
+        case DRM_COLOR_YCBCR_FULL_RANGE:
+            return NVKMS_INPUT_COLOR_RANGE_FULL;
+        case DRM_COLOR_YCBCR_LIMITED_RANGE:
+            return NVKMS_INPUT_COLOR_RANGE_LIMITED;
+        default:
+            /* We shouldn't hit this */
+            WARN_ON("Unsupported DRM color_range");
+            return NVKMS_INPUT_COLOR_RANGE_DEFAULT;
+    }
+}
+#endif
+
 static int
 plane_req_config_update(struct drm_plane *plane,
                        struct drm_plane_state *plane_state,
@@ -1190,8 +1286,37 @@ plane_req_config_update(struct drm_plane *plane,
        nv_plane->defaultCompositionMode;
 #endif

-    req_config->config.inputColorSpace =
-        nv_drm_plane_state->input_colorspace;
+#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
+    if ((nv_drm_plane_state->input_colorspace == NV_DRM_INPUT_COLOR_SPACE_NONE) &&
+        nv_drm_format_is_yuv(plane_state->fb->format->format)) {
+
+        if (nv_plane->supportsColorProperties) {
+            req_config->config.inputColorSpace =
+                nv_drm_color_encoding_to_nvkms_colorspace(plane_state->color_encoding);
+            req_config->config.inputColorRange =
+                nv_drm_color_range_to_nvkms_color_range(plane_state->color_range);
+        } else {
+            req_config->config.inputColorSpace = NVKMS_INPUT_COLOR_SPACE_NONE;
+            req_config->config.inputColorRange = NVKMS_INPUT_COLOR_RANGE_DEFAULT;
+        }
+        req_config->config.inputTf = NVKMS_INPUT_TF_LINEAR;
+    } else {
+#endif
+        req_config->config.inputColorSpace =
+            nv_get_nvkms_input_colorspace(nv_drm_plane_state->input_colorspace);
+        req_config->config.inputColorRange = NVKMS_INPUT_COLOR_RANGE_DEFAULT;
+        req_config->config.inputTf =
+            nv_get_nvkms_input_tf(nv_drm_plane_state->input_colorspace);
+#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
+    }
+#endif
+
+    req_config->flags.inputTfChanged =
+        (old_config.inputTf != req_config->config.inputTf);
+    req_config->flags.inputColorSpaceChanged =
+        (old_config.inputColorSpace != req_config->config.inputColorSpace);
+    req_config->flags.inputColorRangeChanged =
+        (old_config.inputColorRange != req_config->config.inputColorRange);

    req_config->config.syncParams.preSyncptSpecified = false;
    req_config->config.syncParams.postSyncptRequested = false;
@@ -1240,10 +1365,10 @@ plane_req_config_update(struct drm_plane *plane,

        switch (info_frame->eotf) {
            case HDMI_EOTF_SMPTE_ST2084:
-                req_config->config.tf = NVKMS_OUTPUT_TF_PQ;
+                req_config->config.outputTf = NVKMS_OUTPUT_TF_PQ;
                break;
            case HDMI_EOTF_TRADITIONAL_GAMMA_SDR:
-                req_config->config.tf =
+                req_config->config.outputTf =
                    NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR;
                break;
            default:
@@ -1254,7 +1379,7 @@ plane_req_config_update(struct drm_plane *plane,
        req_config->config.hdrMetadata.enabled = true;
    } else {
        req_config->config.hdrMetadata.enabled = false;
-        req_config->config.tf = NVKMS_OUTPUT_TF_NONE;
+        req_config->config.outputTf = NVKMS_OUTPUT_TF_NONE;
    }

    req_config->flags.hdrMetadataChanged =
@@ -1264,7 +1389,7 @@ plane_req_config_update(struct drm_plane *plane,
                &req_config->config.hdrMetadata.val,
                sizeof(struct NvKmsHDRStaticMetadata)));

-    req_config->flags.tfChanged = (old_config.tf != req_config->config.tf);
+    req_config->flags.outputTfChanged = (old_config.outputTf != req_config->config.outputTf);
 #endif

    req_config->config.matrixOverrides.enabled.lmsCtm =
@@ -1295,7 +1420,7 @@ plane_req_config_update(struct drm_plane *plane,

    if (nv_drm_plane_state->degamma_changed) {
        if (nv_drm_plane_state->degamma_drm_lut_surface != NULL) {
-            kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->refcount,
+            kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->base.refcount,
                     free_drm_lut_surface);
            nv_drm_plane_state->degamma_drm_lut_surface = NULL;
        }
@@ -1327,7 +1452,7 @@ plane_req_config_update(struct drm_plane *plane,
        if (nv_drm_plane_state->degamma_drm_lut_surface != NULL) {
            req_config->config.ilut.enabled = NV_TRUE;
            req_config->config.ilut.lutSurface =
-                nv_drm_plane_state->degamma_drm_lut_surface->nvkms_surface;
+                nv_drm_plane_state->degamma_drm_lut_surface->base.nvkms_surface;
            req_config->config.ilut.offset = 0;
            req_config->config.ilut.vssSegments =
                nv_drm_plane_state->degamma_drm_lut_surface->properties.vssSegments;
@@ -1346,7 +1471,7 @@ plane_req_config_update(struct drm_plane *plane,

    if (nv_drm_plane_state->tmo_changed) {
        if (nv_drm_plane_state->tmo_drm_lut_surface != NULL) {
-            kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->refcount,
+            kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->base.refcount,
                     free_drm_lut_surface);
            nv_drm_plane_state->tmo_drm_lut_surface = NULL;
        }
@@ -1363,7 +1488,7 @@ plane_req_config_update(struct drm_plane *plane,
        if (nv_drm_plane_state->tmo_drm_lut_surface != NULL) {
            req_config->config.tmo.enabled = NV_TRUE;
            req_config->config.tmo.lutSurface =
-                nv_drm_plane_state->tmo_drm_lut_surface->nvkms_surface;
+                nv_drm_plane_state->tmo_drm_lut_surface->base.nvkms_surface;
            req_config->config.tmo.offset = 0;
            req_config->config.tmo.vssSegments =
                nv_drm_plane_state->tmo_drm_lut_surface->properties.vssSegments;
@@ -1870,7 +1995,7 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
    nv_plane_state->degamma_drm_lut_surface =
        nv_old_plane_state->degamma_drm_lut_surface;
    if (nv_plane_state->degamma_drm_lut_surface) {
-        kref_get(&nv_plane_state->degamma_drm_lut_surface->refcount);
+        kref_get(&nv_plane_state->degamma_drm_lut_surface->base.refcount);
    }

    nv_plane_state->tmo_lut = nv_old_plane_state->tmo_lut;
@@ -1881,7 +2006,7 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
    nv_plane_state->tmo_drm_lut_surface =
        nv_old_plane_state->tmo_drm_lut_surface;
    if (nv_plane_state->tmo_drm_lut_surface) {
-        kref_get(&nv_plane_state->tmo_drm_lut_surface->refcount);
+        kref_get(&nv_plane_state->tmo_drm_lut_surface->base.refcount);
    }

    return &nv_plane_state->base;
@@ -1909,13 +2034,13 @@ static inline void __nv_drm_plane_atomic_destroy_state(

    nv_drm_property_blob_put(nv_drm_plane_state->degamma_lut);
    if (nv_drm_plane_state->degamma_drm_lut_surface != NULL) {
-        kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->refcount,
+        kref_put(&nv_drm_plane_state->degamma_drm_lut_surface->base.refcount,
                 free_drm_lut_surface);
    }

    nv_drm_property_blob_put(nv_drm_plane_state->tmo_lut);
    if (nv_drm_plane_state->tmo_drm_lut_surface != NULL) {
-        kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->refcount,
+        kref_put(&nv_drm_plane_state->tmo_drm_lut_surface->base.refcount,
                 free_drm_lut_surface);
    }
 }
@@ -2113,7 +2238,7 @@ nv_drm_atomic_crtc_duplicate_state(struct drm_crtc *crtc)
    }
    nv_state->regamma_divisor = nv_old_state->regamma_divisor;
    if (nv_state->regamma_drm_lut_surface) {
-        kref_get(&nv_state->regamma_drm_lut_surface->refcount);
+        kref_get(&nv_state->regamma_drm_lut_surface->base.refcount);
    }
    nv_state->regamma_changed = false;

@@ -2142,7 +2267,7 @@ static void nv_drm_atomic_crtc_destroy_state(struct drm_crtc *crtc,

    nv_drm_property_blob_put(nv_state->regamma_lut);
    if (nv_state->regamma_drm_lut_surface != NULL) {
-        kref_put(&nv_state->regamma_drm_lut_surface->refcount,
+        kref_put(&nv_state->regamma_drm_lut_surface->base.refcount,
                 free_drm_lut_surface);
    }

@@ -2386,7 +2511,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,

    if (nv_crtc_state->regamma_changed) {
        if (nv_crtc_state->regamma_drm_lut_surface != NULL) {
-            kref_put(&nv_crtc_state->regamma_drm_lut_surface->refcount,
+            kref_put(&nv_crtc_state->regamma_drm_lut_surface->base.refcount,
                     free_drm_lut_surface);
            nv_crtc_state->regamma_drm_lut_surface = NULL;
        }
@@ -2417,7 +2542,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
        if (nv_crtc_state->regamma_drm_lut_surface != NULL) {
            req_config->modeSetConfig.olut.enabled = NV_TRUE;
            req_config->modeSetConfig.olut.lutSurface =
-                nv_crtc_state->regamma_drm_lut_surface->nvkms_surface;
+                nv_crtc_state->regamma_drm_lut_surface->base.nvkms_surface;
            req_config->modeSetConfig.olut.offset = 0;
            req_config->modeSetConfig.olut.vssSegments =
                nv_crtc_state->regamma_drm_lut_surface->properties.vssSegments;
@@ -2521,7 +2646,7 @@ static void nv_drm_plane_install_properties(
    if (nv_dev->nv_input_colorspace_property) {
        drm_object_attach_property(
            &plane->base, nv_dev->nv_input_colorspace_property,
-            NVKMS_INPUT_COLORSPACE_NONE);
+            NV_DRM_INPUT_COLOR_SPACE_NONE);
    }

    if (supportsICtCp) {
@@ -2531,17 +2656,14 @@ static void nv_drm_plane_install_properties(
                &plane->base, nv_dev->nv_hdr_output_metadata_property, 0);
        }
 #endif
+    }

-        /*
-         * The old DRM_OBJECT_MAX_PROPERTY limit of 24 is too small to
-         * accomodate all of the properties for the ICtCp pipeline.
-         *
-         * Commit 1e13c5644c44 ("drm/drm_mode_object: increase max objects to
-         * accommodate new color props") in Linux v6.8 increased the limit to
-         * 64. To be safe, require this before attaching any properties for the
-         * ICtCp pipeline.
-         */
-        if (DRM_OBJECT_MAX_PROPERTY >= 64) {
+    /*
+     * Per-plane HDR properties get us dangerously close to the 24 property
+     * limit on kernels that don't support NV_DRM_USE_EXTENDED_PROPERTIES.
+     */
+    if (NV_DRM_USE_EXTENDED_PROPERTIES) {
+        if (supportsICtCp) {
            if (nv_dev->nv_plane_lms_ctm_property) {
                drm_object_attach_property(
                    &plane->base, nv_dev->nv_plane_lms_ctm_property, 0);
@@ -2568,36 +2690,36 @@ static void nv_drm_plane_install_properties(
                    NVKMS_LUT_ARRAY_SIZE);
            }
        }
-    }

-    if (nv_dev->nv_plane_blend_ctm_property) {
-        drm_object_attach_property(
-            &plane->base, nv_dev->nv_plane_blend_ctm_property, 0);
-    }
+        if (nv_dev->nv_plane_blend_ctm_property) {
+            drm_object_attach_property(
+                &plane->base, nv_dev->nv_plane_blend_ctm_property, 0);
+        }

-    if (nv_plane->ilut_caps.supported) {
-        if (nv_plane->ilut_caps.vssSupport == NVKMS_LUT_VSS_SUPPORTED) {
-            if (nv_dev->nv_plane_degamma_tf_property) {
-                drm_object_attach_property(
-                    &plane->base, nv_dev->nv_plane_degamma_tf_property,
-                    NV_DRM_TRANSFER_FUNCTION_DEFAULT);
+        if (nv_plane->ilut_caps.supported) {
+            if (nv_plane->ilut_caps.vssSupport == NVKMS_LUT_VSS_SUPPORTED) {
+                if (nv_dev->nv_plane_degamma_tf_property) {
+                    drm_object_attach_property(
+                        &plane->base, nv_dev->nv_plane_degamma_tf_property,
+                        NV_DRM_TRANSFER_FUNCTION_DEFAULT);
+                }
+                if (nv_dev->nv_plane_degamma_multiplier_property) {
+                    /* Default to 1 in S31.32 Sign-Magnitude Format */
+                    nv_plane_state->degamma_multiplier = ((uint64_t) 1) << 32;
+                    drm_object_attach_property(
+                        &plane->base, nv_dev->nv_plane_degamma_multiplier_property,
+                        nv_plane_state->degamma_multiplier);
+                }
            }
-            if (nv_dev->nv_plane_degamma_multiplier_property) {
-                /* Default to 1 in S31.32 Sign-Magnitude Format */
-                nv_plane_state->degamma_multiplier = ((uint64_t) 1) << 32;
+            if (nv_dev->nv_plane_degamma_lut_property) {
                drm_object_attach_property(
-                    &plane->base, nv_dev->nv_plane_degamma_multiplier_property,
-                    nv_plane_state->degamma_multiplier);
+                    &plane->base, nv_dev->nv_plane_degamma_lut_property, 0);
+            }
+            if (nv_dev->nv_plane_degamma_lut_size_property) {
+                drm_object_attach_property(
+                    &plane->base, nv_dev->nv_plane_degamma_lut_size_property,
+                    NVKMS_LUT_ARRAY_SIZE);
            }
-        }
-        if (nv_dev->nv_plane_degamma_lut_property) {
-            drm_object_attach_property(
-                &plane->base, nv_dev->nv_plane_degamma_lut_property, 0);
-        }
-        if (nv_dev->nv_plane_degamma_lut_size_property) {
-            drm_object_attach_property(
-                &plane->base, nv_dev->nv_plane_degamma_lut_size_property,
-                NVKMS_LUT_ARRAY_SIZE);
        }
    }
 }
@@ -2776,6 +2898,29 @@ nv_drm_plane_create(struct drm_device *dev,
        goto failed_plane_init;
    }

+#if defined(NV_DRM_PLANE_CREATE_COLOR_PROPERTIES_PRESENT)
+    if (pResInfo->caps.supportsInputColorSpace &&
+        pResInfo->caps.supportsInputColorRange) {
+
+        nv_plane->supportsColorProperties = true;
+
+        drm_plane_create_color_properties(
+            plane,
+            NVBIT(DRM_COLOR_YCBCR_BT601) |
+            NVBIT(DRM_COLOR_YCBCR_BT709) |
+            NVBIT(DRM_COLOR_YCBCR_BT2020),
+            NVBIT(DRM_COLOR_YCBCR_FULL_RANGE) |
+            NVBIT(DRM_COLOR_YCBCR_LIMITED_RANGE),
+            DRM_COLOR_YCBCR_BT709,
+            DRM_COLOR_YCBCR_FULL_RANGE
+        );
+    } else {
+        nv_plane->supportsColorProperties = false;
+    }
+#else
+    nv_plane->supportsColorProperties = false;
+#endif
+
    drm_plane_helper_add(plane, &nv_plane_helper_funcs);

    if (plane_type != DRM_PLANE_TYPE_CURSOR) {
--- a/kernel-open/nvidia-drm/nvidia-drm-crtc.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-crtc.h
@@ -191,6 +191,13 @@ struct nv_drm_plane {
     */
    uint32_t layer_idx;

+    /**
+     * @supportsColorProperties
+     *
+     * If true, supports the COLOR_ENCODING and COLOR_RANGE properties.
+     */
+    bool supportsColorProperties;
+
    struct NvKmsLUTCaps ilut_caps;
    struct NvKmsLUTCaps tmo_caps;
 };
@@ -203,10 +210,23 @@ static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
    return container_of(plane, struct nv_drm_plane, base);
 }

-struct nv_drm_lut_surface {
+struct nv_drm_nvkms_surface {
    struct NvKmsKapiDevice *pDevice;
    struct NvKmsKapiMemory *nvkms_memory;
    struct NvKmsKapiSurface *nvkms_surface;
+    void *buffer;
+    struct kref refcount;
+};
+
+struct nv_drm_nvkms_surface_params {
+    NvU32 width;
+    NvU32 height;
+    size_t surface_size;
+    enum NvKmsSurfaceMemoryFormat format;
+};
+
+struct nv_drm_lut_surface {
+    struct nv_drm_nvkms_surface base;
    struct {
        NvU32 vssSegments;
        enum NvKmsLUTVssType vssType;
@@ -215,14 +235,12 @@ struct nv_drm_lut_surface {
        enum NvKmsLUTFormat entryFormat;

    } properties;
-    void *buffer;
-    struct kref refcount;
 };

 struct nv_drm_plane_state {
    struct drm_plane_state base;
    s32 __user *fd_user_ptr;
-    enum NvKmsInputColorSpace input_colorspace;
+    enum nv_drm_input_color_space input_colorspace;
 #if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
    struct drm_property_blob *hdr_output_metadata;
 #endif
--- a/kernel-open/nvidia-drm/nvidia-drm-drv.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c
@@ -35,6 +35,8 @@
 #include "nvidia-drm-gem-nvkms-memory.h"
 #include "nvidia-drm-gem-user-memory.h"
 #include "nvidia-drm-gem-dma-buf.h"
+#include "nvidia-drm-utils.h"
+#include "nv_dpy_id.h"

 #if defined(NV_DRM_AVAILABLE)

@@ -90,6 +92,7 @@

 #include <linux/pci.h>
 #include <linux/workqueue.h>
+#include <linux/sort.h>

 /*
 * Commit fcd70cd36b9b ("drm: Split out drm_probe_helper.h")
@@ -120,15 +123,15 @@ static int nv_drm_revoke_sub_ownership(struct drm_device *dev);

 static struct nv_drm_device *dev_list = NULL;

-static char* nv_get_input_colorspace_name(
-    enum NvKmsInputColorSpace colorSpace)
+static const char* nv_get_input_colorspace_name(
+    enum nv_drm_input_color_space colorSpace)
 {
    switch (colorSpace) {
-        case NVKMS_INPUT_COLORSPACE_NONE:
+        case NV_DRM_INPUT_COLOR_SPACE_NONE:
            return "None";
-        case NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR:
+        case NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR:
            return "scRGB Linear FP16";
-        case NVKMS_INPUT_COLORSPACE_BT2100_PQ:
+        case NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ:
            return "BT.2100 PQ";
        default:
            /* We shoudn't hit this */
@@ -284,6 +287,123 @@ done:
    mutex_unlock(&nv_dev->lock);
 }

+struct nv_drm_mst_display_info {
+    NvKmsKapiDisplay handle;
+    NvBool isDpMST;
+    char dpAddress[NVKMS_DP_ADDRESS_STRING_LENGTH];
+};
+
+/*
+ * Helper function to get DpMST display info.
+ * dpMSTDisplayInfos is allocated dynamically,
+ * so it needs to be freed after finishing the query.
+ */
+static int nv_drm_get_mst_display_infos
+(
+    struct nv_drm_device *nv_dev,
+    NvKmsKapiDisplay hDisplay,
+    struct nv_drm_mst_display_info **dpMSTDisplayInfos,
+    NvU32 *nDynamicDisplays
+)
+{
+    struct NvKmsKapiStaticDisplayInfo *displayInfo = NULL;
+    struct NvKmsKapiStaticDisplayInfo *dynamicDisplayInfo = NULL;
+    struct NvKmsKapiConnectorInfo *connectorInfo = NULL;
+    struct nv_drm_mst_display_info *displayInfos = NULL;
+    NvU32 i = 0;
+    int ret = 0;
+    NVDpyId dpyId;
+    *nDynamicDisplays = 0;
+
+    /* Query NvKmsKapiStaticDisplayInfo and NvKmsKapiConnectorInfo */
+
+    if ((displayInfo = nv_drm_calloc(1, sizeof(*displayInfo))) == NULL) {
+        ret = -ENOMEM;
+        goto done;
+    }
+
+    if ((dynamicDisplayInfo = nv_drm_calloc(1, sizeof(*dynamicDisplayInfo))) == NULL) {
+        ret = -ENOMEM;
+        goto done;
+    }
+
+    if (!nvKms->getStaticDisplayInfo(nv_dev->pDevice, hDisplay, displayInfo)) {
+        ret = -EINVAL;
+        goto done;
+    }
+
+    connectorInfo = nvkms_get_connector_info(nv_dev->pDevice,
+                displayInfo->connectorHandle);
+
+    if (IS_ERR(connectorInfo)) {
+        ret = PTR_ERR(connectorInfo);
+        goto done;
+    }
+
+
+    *nDynamicDisplays = nvCountDpyIdsInDpyIdList(connectorInfo->dynamicDpyIdList);
+
+    if (*nDynamicDisplays == 0) {
+        goto done;
+    }
+
+    if ((displayInfos = nv_drm_calloc(*nDynamicDisplays, sizeof(*displayInfos))) == NULL) {
+        ret = -ENOMEM;
+        goto done;
+    }
+
+    FOR_ALL_DPY_IDS(dpyId, connectorInfo->dynamicDpyIdList) {
+        if (!nvKms->getStaticDisplayInfo(nv_dev->pDevice,
+                    nvDpyIdToNvU32(dpyId),
+                    dynamicDisplayInfo)) {
+            ret = -EINVAL;
+            nv_drm_free(displayInfos);
+            goto done;
+        }
+
+        displayInfos[i].handle = dynamicDisplayInfo->handle;
+        displayInfos[i].isDpMST = dynamicDisplayInfo->isDpMST;
+        memcpy(displayInfos[i].dpAddress, dynamicDisplayInfo->dpAddress, sizeof(dynamicDisplayInfo->dpAddress));
+
+        i++;
+    }
+
+    *dpMSTDisplayInfos = displayInfos;
+
+done:
+
+    nv_drm_free(displayInfo);
+
+    nv_drm_free(dynamicDisplayInfo);
+
+    nv_drm_free(connectorInfo);
+
+    return ret;
+}
+
+static int nv_drm_disp_cmp (const void *l, const void *r)
+{
+    struct nv_drm_mst_display_info *l_info = (struct nv_drm_mst_display_info *)l;
+    struct nv_drm_mst_display_info *r_info = (struct nv_drm_mst_display_info *)r;
+
+    return strcmp(l_info->dpAddress, r_info->dpAddress);
+}
+
+/*
+ * Helper function to sort the dpAddress in terms of string.
+ * This function is to create DRM connectors ID order deterministically.
+ * It's not numerically.
+ */
+static void nv_drm_sort_dynamic_displays_by_dp_addr
+(
+    struct nv_drm_mst_display_info *infos,
+    int nDynamicDisplays
+)
+{
+    sort(infos, nDynamicDisplays, sizeof(*infos), nv_drm_disp_cmp, NULL);
+}
+
+
 /*
 * Helper function to initialize drm_device::mode_config from
 * NvKmsKapiDevice's resource information.
@@ -365,9 +485,11 @@ static void nv_drm_enumerate_encoders_and_connectors
                    nv_dev,
                    "Failed to enumurate NvKmsKapiDisplay handles");
            } else {
-                NvU32 i;
+                NvU32 i, j;
+                NvU32 nDynamicDisplays = 0;

                for (i = 0; i < nDisplays; i++) {
+                    struct nv_drm_mst_display_info *displayInfos = NULL;
                    struct drm_encoder *encoder =
                        nv_drm_add_encoder(dev, hDisplays[i]);

@@ -377,6 +499,34 @@ static void nv_drm_enumerate_encoders_and_connectors
                            "Failed to add connector for NvKmsKapiDisplay 0x%08x",
                            hDisplays[i]);
                    }
+
+                    if (nv_drm_get_mst_display_infos(nv_dev, hDisplays[i],
+                            &displayInfos, &nDynamicDisplays)) {
+                        NV_DRM_DEV_LOG_ERR(
+                                nv_dev,
+                                "Failed to get dynamic displays");
+                    } else if (nDynamicDisplays) {
+                        nv_drm_sort_dynamic_displays_by_dp_addr(displayInfos, nDynamicDisplays);
+
+                        for (j = 0; j < nDynamicDisplays; j++) {
+                            if (displayInfos[j].isDpMST) {
+                                struct drm_encoder *mst_encoder =
+                                    nv_drm_add_encoder(dev, displayInfos[j].handle);
+
+                                NV_DRM_DEV_DEBUG_DRIVER(nv_dev, "found DP MST port display handle %u",
+                                        displayInfos[j].handle);
+
+                                if (IS_ERR(mst_encoder)) {
+                                    NV_DRM_DEV_LOG_ERR(
+                                            nv_dev,
+                                            "Failed to add connector for NvKmsKapiDisplay 0x%08x",
+                                            displayInfos[j].handle);
+                                }
+                            }
+                        }
+
+                        nv_drm_free(displayInfos);
+                    }
                }
            }

@@ -602,6 +752,7 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
    memset(&allocateDeviceParams, 0, sizeof(allocateDeviceParams));

    allocateDeviceParams.gpuId = nv_dev->gpu_info.gpu_id;
+    allocateDeviceParams.migDevice = nv_dev->gpu_mig_device;

    allocateDeviceParams.privateData = nv_dev;
    allocateDeviceParams.eventCallback = nv_drm_event_callback;
@@ -672,6 +823,9 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)

    nv_dev->requiresVrrSemaphores = resInfo.caps.requiresVrrSemaphores;

+    nv_dev->vtFbBaseAddress = resInfo.vtFbBaseAddress;
+    nv_dev->vtFbSize = resInfo.vtFbSize;
+
 #if defined(NV_DRM_FORMAT_MODIFIERS_PRESENT)
    gen = nv_dev->pageKindGeneration;
    kind = nv_dev->genericPageKind;
@@ -855,6 +1009,62 @@ static void nv_drm_master_set(struct drm_device *dev,
 }
 #endif

+static
+int nv_drm_reset_input_colorspace(struct drm_device *dev)
+{
+    struct drm_atomic_state *state;
+    struct drm_plane_state *plane_state;
+    struct drm_plane *plane;
+    struct nv_drm_plane_state *nv_drm_plane_state;
+    struct drm_modeset_acquire_ctx ctx;
+    int ret = 0;
+    bool do_reset = false;
+    NvU32 flags = 0;
+
+    state = drm_atomic_state_alloc(dev);
+    if (!state)
+        return -ENOMEM;
+
+#if defined(DRM_MODESET_ACQUIRE_INTERRUPTIBLE)
+    flags |= DRM_MODESET_ACQUIRE_INTERRUPTIBLE;
+#endif
+    drm_modeset_acquire_init(&ctx, flags);
+    state->acquire_ctx = &ctx;
+
+    nv_drm_for_each_plane(plane, dev) {
+        plane_state = drm_atomic_get_plane_state(state, plane);
+        if (IS_ERR(plane_state)) {
+            ret = PTR_ERR(plane_state);
+            goto out;
+        }
+
+        nv_drm_plane_state = to_nv_drm_plane_state(plane_state);
+        if (nv_drm_plane_state) {
+            if (nv_drm_plane_state->input_colorspace != NV_DRM_INPUT_COLOR_SPACE_NONE) {
+                nv_drm_plane_state->input_colorspace = NV_DRM_INPUT_COLOR_SPACE_NONE;
+                do_reset = true;
+            }
+        }
+    }
+
+    if (do_reset) {
+        ret = drm_atomic_commit(state);
+    }
+
+out:
+#if defined(NV_DRM_ATOMIC_STATE_REF_COUNTING_PRESENT)
+    drm_atomic_state_put(state);
+#else
+    // In case of success, drm_atomic_commit() takes care to cleanup and free state.
+    if (ret != 0) {
+        drm_atomic_state_free(state);
+    }
+#endif
+    drm_modeset_drop_locks(&ctx);
+    drm_modeset_acquire_fini(&ctx);
+
+    return ret;
+}

 #if defined(NV_DRM_MASTER_DROP_HAS_FROM_RELEASE_ARG)
 static
@@ -898,6 +1108,12 @@ void nv_drm_master_drop(struct drm_device *dev, struct drm_file *file_priv)
        drm_modeset_unlock_all(dev);

        nvKms->releaseOwnership(nv_dev->pDevice);
+    } else {
+        int err = nv_drm_reset_input_colorspace(dev);
+        if (err != 0) {
+            NV_DRM_DEV_LOG_WARN(nv_dev,
+            "nv_drm_reset_input_colorspace failed with error code: %d !", err);
+        }
    }
 }
 #endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
@@ -935,6 +1151,7 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
    }

    params->gpu_id = nv_dev->gpu_info.gpu_id;
+    params->mig_device = nv_dev->gpu_mig_device;
    params->primary_index = dev->primary->index;
    params->supports_alloc = false;
    params->generic_page_kind = 0;
@@ -1725,7 +1942,7 @@ static const struct file_operations nv_drm_fops = {

    .llseek         = noop_llseek,

-#if defined(NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT)
+#if defined(FOP_UNSIGNED_OFFSET)
    .fop_flags   = FOP_UNSIGNED_OFFSET,
 #endif
 };
@@ -1967,16 +2184,16 @@ void nv_drm_update_drm_driver_features(void)
 /*
 * Helper function for allocate/register DRM device for given NVIDIA GPU ID.
 */
-void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
+void nv_drm_register_drm_device(const struct NvKmsKapiGpuInfo *gpu_info)
 {
    struct nv_drm_device *nv_dev = NULL;
    struct drm_device *dev = NULL;
-    struct device *device = gpu_info->os_device_ptr;
+    struct device *device = gpu_info->gpuInfo.os_device_ptr;
    bool bus_is_pci;

    DRM_DEBUG(
        "Registering device for NVIDIA GPU ID 0x08%x",
-        gpu_info->gpu_id);
+        gpu_info->gpuInfo.gpu_id);

    /* Allocate NVIDIA-DRM device */

@@ -1988,7 +2205,8 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
        return;
    }

-    nv_dev->gpu_info = *gpu_info;
+    nv_dev->gpu_info = gpu_info->gpuInfo;
+    nv_dev->gpu_mig_device = gpu_info->migDevice;

 #if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
    mutex_init(&nv_dev->lock);
@@ -2045,9 +2263,30 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
            aperture_remove_conflicting_pci_devices(pdev, nv_drm_driver.name);
 #endif
            nvKms->framebufferConsoleDisabled(nv_dev->pDevice);
+        } else {
+            resource_size_t base = (resource_size_t) nv_dev->vtFbBaseAddress;
+            resource_size_t size = (resource_size_t) nv_dev->vtFbSize;
+
+            if (base > 0 && size > 0) {
+#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_PRESENT)
+
+#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_DRIVER_ARG)
+                drm_aperture_remove_conflicting_framebuffers(base, size, false, &nv_drm_driver);
+#elif defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_NO_PRIMARY_ARG)
+                drm_aperture_remove_conflicting_framebuffers(base, size, &nv_drm_driver);
+#else
+                drm_aperture_remove_conflicting_framebuffers(base, size, false, nv_drm_driver.name);
+#endif
+
+#elif defined(NV_APERTURE_REMOVE_CONFLICTING_DEVICES_PRESENT)
+                aperture_remove_conflicting_devices(base, size, nv_drm_driver.name);
+#endif
+            } else {
+                NV_DRM_DEV_LOG_INFO(nv_dev, "Invalid framebuffer console info");
+            }
        }
        #if defined(NV_DRM_CLIENT_AVAILABLE)
-	    drm_client_setup(dev, NULL);
+        drm_client_setup(dev, NULL);
        #elif defined(NV_DRM_FBDEV_TTM_AVAILABLE)
        drm_fbdev_ttm_setup(dev, 32);
        #elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
@@ -2078,7 +2317,7 @@ failed_drm_alloc:
 #if defined(NV_LINUX)
 int nv_drm_probe_devices(void)
 {
-    nv_gpu_info_t *gpu_info = NULL;
+    struct NvKmsKapiGpuInfo *gpu_info = NULL;
    NvU32 gpu_count = 0;
    NvU32 i;

--- a/kernel-open/nvidia-drm/nvidia-drm-drv.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-drv.h
@@ -27,13 +27,15 @@

 #if defined(NV_DRM_AVAILABLE)

+struct NvKmsKapiGpuInfo;
+
 int nv_drm_probe_devices(void);

 void nv_drm_remove_devices(void);

 void nv_drm_suspend_resume(NvBool suspend);

-void nv_drm_register_drm_device(const nv_gpu_info_t *);
+void nv_drm_register_drm_device(const struct NvKmsKapiGpuInfo *);

 void nv_drm_update_drm_driver_features(void);

--- a/kernel-open/nvidia-drm/nvidia-drm-encoder.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-encoder.c
@@ -319,7 +319,7 @@ void nv_drm_handle_dynamic_display_connected(struct nv_drm_device *nv_dev,
    nv_encoder = get_nv_encoder_from_nvkms_display(dev, hDisplay);

    if (nv_encoder != NULL) {
-        NV_DRM_DEV_LOG_ERR(
+        NV_DRM_DEV_LOG_INFO(
            nv_dev,
            "Encoder with NvKmsKapiDisplay 0x%08x already exists.",
            hDisplay);
--- a/kernel-open/nvidia-drm/nvidia-drm-fb.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-fb.c
@@ -202,6 +202,43 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
        params.explicit_layout = false;
    }

+    /*
+     * XXX work around an invalid pitch assumption in DRM.
+     *
+     * The smallest pitch the display hardware allows is 256.
+     *
+     * If a DRM client allocates a 32x32 cursor surface through
+     * DRM_IOCTL_MODE_CREATE_DUMB, we'll correctly round the pitch to 256:
+     *
+     *     pitch = round(32width * 4Bpp, 256) = 256
+     *
+     * and then allocate an 8k surface:
+     *
+     *     size = pitch * 32height = 8196
+     *
+     * and report the rounded pitch and size back to the client through the
+     * struct drm_mode_create_dumb ioctl params.
+     *
+     * But when the DRM client passes that buffer object handle to
+     * DRM_IOCTL_MODE_CURSOR, the client has no way to specify the pitch.  This
+     * path in drm:
+     *
+     *    DRM_IOCTL_MODE_CURSOR
+     *     drm_mode_cursor_ioctl()
+     *      drm_mode_cursor_common()
+     *       drm_mode_cursor_universal()
+     *
+     * will implicitly create a framebuffer from the buffer object, and compute
+     * the pitch as width x 32 (without aligning to our minimum pitch).
+     *
+     * Intercept this case and force the pitch back to 256.
+     */
+    if ((params.width == 32) &&
+        (params.height == 32) &&
+        (params.planes[0].pitch == 128)) {
+        params.planes[0].pitch = 256;
+    }
+
    /* Create NvKmsKapiSurface */

    nv_fb->pSurface = nvKms->createSurface(nv_dev->pDevice, &params);
--- a/kernel-open/nvidia-drm/nvidia-drm-format.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-format.c
@@ -166,4 +166,37 @@ uint32_t *nv_drm_format_array_alloc(
    return array;
 }

+bool nv_drm_format_is_yuv(u32 format)
+{
+#if defined(NV_DRM_FORMAT_INFO_HAS_IS_YUV)
+    const struct drm_format_info *format_info = drm_format_info(format);
+    return (format_info != NULL) && format_info->is_yuv;
+#else
+    switch (format) {
+        case DRM_FORMAT_YUYV:
+        case DRM_FORMAT_UYVY:
+
+        case DRM_FORMAT_NV24:
+        case DRM_FORMAT_NV42:
+        case DRM_FORMAT_NV16:
+        case DRM_FORMAT_NV61:
+        case DRM_FORMAT_NV12:
+        case DRM_FORMAT_NV21:
+
+#if defined(DRM_FORMAT_P210)
+        case DRM_FORMAT_P210:
+#endif
+#if defined(DRM_FORMAT_P010)
+        case DRM_FORMAT_P010:
+#endif
+#if defined(DRM_FORMAT_P012)
+        case DRM_FORMAT_P012:
+#endif
+            return true;
+        default:
+            return false;
+    }
+#endif
+}
+
 #endif
--- a/kernel-open/nvidia-drm/nvidia-drm-format.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-format.h
@@ -38,6 +38,8 @@ uint32_t *nv_drm_format_array_alloc(
    unsigned int *count,
    const long unsigned int nvkms_format_mask);

+bool nv_drm_format_is_yuv(u32 format);
+
 #endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */

 #endif /* __NVIDIA_DRM_FORMAT_H__ */
--- a/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
@@ -308,12 +308,12 @@ static int __nv_drm_nvkms_gem_obj_init(
    nv_nvkms_memory->pWriteCombinedIORemapAddress = NULL;
    nv_nvkms_memory->physically_mapped = false;

-    if (!nvKms->getMemoryPages(nv_dev->pDevice,
+    if (!nvKms->isVidmem(pMemory) &&
+        !nvKms->getMemoryPages(nv_dev->pDevice,
                               pMemory,
                               &pages,
-                               &numPages) &&
-        !nvKms->isVidmem(pMemory)) {
-        /* GetMemoryPages may fail for vidmem allocations,
+                               &numPages)) {
+        /* GetMemoryPages will fail for vidmem allocations,
         * but it should not fail for sysmem allocations. */
        NV_DRM_DEV_LOG_ERR(nv_dev,
            "Failed to get memory pages for NvKmsKapiMemory 0x%p",
--- a/kernel-open/nvidia-drm/nvidia-drm-helper.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-helper.h
@@ -69,6 +69,13 @@

 #endif //NV_DRM_ROTATION_AVAILABLE

+/*
+ * Commit 1e13c5644c44 ("drm/drm_mode_object: increase max objects to
+ * accommodate new color props") in Linux v6.8 increased the pre-object
+ * property limit to from 24 to 64.
+ */
+#define NV_DRM_USE_EXTENDED_PROPERTIES (DRM_OBJECT_MAX_PROPERTY >= 64)
+
 /*
 * drm_dev_put() is added by commit 9a96f55034e41b4e002b767e9218d55f03bdff7d
 * (2017-09-26) and drm_dev_unref() is removed by
--- a/kernel-open/nvidia-drm/nvidia-drm-ioctl.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-ioctl.h
@@ -182,6 +182,7 @@ struct drm_nvidia_gem_import_userspace_memory_params {

 struct drm_nvidia_get_dev_info_params {
    uint32_t gpu_id;             /* OUT */
+    uint32_t mig_device;         /* OUT */
    uint32_t primary_index;      /* OUT; the "card%d" value */

    uint32_t supports_alloc;     /* OUT */
--- a/kernel-open/nvidia-drm/nvidia-drm-modeset.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-modeset.c
@@ -677,6 +677,33 @@ int nv_drm_atomic_commit(struct drm_device *dev,
                    "Flip event timeout on head %u", nv_crtc->head);
            }
        }
+
+#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
+        /*
+         * If the legacy LUT needs to be updated, ensure that the previous LUT
+         * update is complete first.
+         */
+        if (crtc_state->color_mgmt_changed) {
+            NvBool complete = nvKms->checkLutNotifier(nv_dev->pDevice,
+                                                      nv_crtc->head,
+                                                      !nonblock /* waitForCompletion */);
+
+            /* If checking the LUT notifier failed, assume no LUT notifier is set. */
+            if (!complete) {
+                if (nonblock) {
+                    return -EBUSY;
+                } else {
+                    /*
+                     * checkLutNotifier should wait on the notifier in this
+                     * case, so we should only get here if the wait timed out.
+                     */
+                    NV_DRM_DEV_LOG_ERR(
+                        nv_dev,
+                        "LUT notifier timeout on head %u", nv_crtc->head);
+                }
+            }
+        }
+#endif
    }

 #if defined(NV_DRM_ATOMIC_HELPER_SWAP_STATE_HAS_STALL_ARG)
@@ -803,6 +830,19 @@ int nv_drm_atomic_commit(struct drm_device *dev,
                    __nv_drm_handle_flip_event(nv_crtc);
                }
            }
+
+#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
+            if (crtc_state->color_mgmt_changed) {
+                NvBool complete = nvKms->checkLutNotifier(nv_dev->pDevice,
+                                                          nv_crtc->head,
+                                                          true /* waitForCompletion */);
+                if (!complete) {
+                    NV_DRM_DEV_LOG_ERR(
+                        nv_dev,
+                        "LUT notifier timeout on head %u", nv_crtc->head);
+                }
+            }
+#endif
        }
    }

--- a/kernel-open/nvidia-drm/nvidia-drm-os-interface.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-os-interface.h
@@ -58,16 +58,6 @@ typedef struct nv_timer nv_drm_timer;
 #error "Need to define kernel timer callback primitives for this OS"
 #endif

-#if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
-#define NV_DRM_FBDEV_AVAILABLE
-#define NV_DRM_FBDEV_GENERIC_AVAILABLE
-#endif
-
-#if defined(NV_DRM_FBDEV_TTM_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
-#define NV_DRM_FBDEV_AVAILABLE
-#define NV_DRM_FBDEV_TTM_AVAILABLE
-#endif
-
 struct page;

 /* Set to true when the atomic modeset feature is enabled. */
--- a/kernel-open/nvidia-drm/nvidia-drm-priv.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-priv.h
@@ -85,8 +85,15 @@
    DRM_DEBUG_DRIVER("[GPU ID 0x%08x] " __fmt,     \
                     __dev->gpu_info.gpu_id, ##__VA_ARGS__)

+enum nv_drm_input_color_space {
+    NV_DRM_INPUT_COLOR_SPACE_NONE,
+    NV_DRM_INPUT_COLOR_SPACE_SCRGB_LINEAR,
+    NV_DRM_INPUT_COLOR_SPACE_BT2100_PQ
+};
+
 struct nv_drm_device {
    nv_gpu_info_t gpu_info;
+    MIGDeviceId gpu_mig_device;

    struct drm_device *dev;

@@ -182,6 +189,9 @@ struct nv_drm_device {
    struct drm_property *nv_crtc_regamma_divisor_property;

    struct nv_drm_device *next;
+
+    NvU64 vtFbBaseAddress;
+    NvU64 vtFbSize;
 };

 static inline NvU32 nv_drm_next_display_semaphore(
--- a/kernel-open/nvidia-drm/nvidia-drm-sources.mk
+++ b/kernel-open/nvidia-drm/nvidia-drm-sources.mk
@@ -65,6 +65,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_fence_set_error
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_devices
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_pci_devices
@@ -74,6 +75,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_client_setup
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_plane_create_color_properties
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_mixed
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += pfn_to_pfn_t
@@ -133,6 +135,8 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
 NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
 NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers_has_driver_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers_has_no_primary_arg
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers_has_driver_arg
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_syncobj_features_present
@@ -140,8 +144,9 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_framebuffer_obj_present
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_ctm_3x4_present
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_lut
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_info_has_is_yuv
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_property_blob_put
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_mmap
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date
-NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_helper_funcs_mode_valid_has_const_mode_arg
--- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
@@ -102,6 +102,11 @@ MODULE_PARM_DESC(malloc_verbose, "Report information about malloc calls on modul
 static bool malloc_verbose = false;
 module_param_named(malloc_verbose, malloc_verbose, bool, 0400);

+MODULE_PARM_DESC(conceal_vrr_caps, 
+                 "Conceal all display VRR capabilities");
+static bool conceal_vrr_caps = false;
+module_param_named(conceal_vrr_caps, conceal_vrr_caps, bool, 0400);
+
 /* Fail allocating the RM core channel for NVKMS using the i-th method (see
 * FailAllocCoreChannelMethod). Failures not using the i-th method are ignored. */
 MODULE_PARM_DESC(fail_alloc_core_channel, "Control testing for hardware core channel allocation failure");
@@ -135,7 +140,12 @@ NvBool nvkms_test_fail_alloc_core_channel(

    return NV_TRUE;
 }
-    
+
+NvBool nvkms_conceal_vrr_caps(void)
+{
+    return conceal_vrr_caps;
+}
+
 NvBool nvkms_output_rounding_fix(void)
 {
    return output_rounding_fix;
--- a/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
@@ -110,6 +110,7 @@ enum FailAllocCoreChannelMethod {
 };

 NvBool nvkms_test_fail_alloc_core_channel(enum FailAllocCoreChannelMethod method);
+NvBool nvkms_conceal_vrr_caps(void);
 NvBool nvkms_output_rounding_fix(void);
 NvBool nvkms_disable_hdmi_frl(void);
 NvBool nvkms_disable_vrr_memclk_switch(void);
--- a/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild
+++ b/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild
@@ -52,7 +52,7 @@ nvidia-modeset-y += $(NVIDIA_MODESET_BINARY_OBJECT_O)
 # Define nvidia-modeset.ko-specific CFLAGS.
 #

-NVIDIA_MODESET_CFLAGS += -I$(src)/nvidia-modeset
+NVIDIA_MODESET_CFLAGS += -I$(src)/nvidia-modeset -I$(src)/common/inc
 NVIDIA_MODESET_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0

 # Some Android kernels prohibit driver use of filesystem functions like
--- a/kernel-open/nvidia-peermem/nv-p2p.h
+++ b/kernel-open/nvidia-peermem/nv-p2p.h
@@ -453,35 +453,19 @@ typedef struct nvidia_p2p_rsync_reg_info {

 /*
 * @brief
- *   Gets rsync (GEN-ID) register information associated with the supported
- *   NPUs.
- *
- *   The caller would use the returned information {GPU device, NPU device,
- *   socket-id, cluster-id} to pick the optimal generation registers to issue
- *   RSYNC (NVLink HW flush).
- *
- *   The interface allocates structures to return the information, hence
- *   nvidia_p2p_put_rsync_registers() must be called to free the structures.
- *
- *   Note, cluster-id is hardcoded to zero as early system configurations would
- *   only support cluster mode i.e. all devices would share the same cluster-id
- *   (0). In the future, appropriate kernel support would be needed to query
- *   cluster-ids.
- *
- * @param[out]     reg_info
- *   A pointer to the rsync reg info structure.
+ *   This interface is no longer supported and will always return an error.  It
+ *   is left in place (for now) to allow third-party callers to build without
+ *   any errors.
 *
 * @Returns
- *   0 Upon successful completion. Otherwise, returns negative value.
+ *   -ENODEV
 */
 int nvidia_p2p_get_rsync_registers(nvidia_p2p_rsync_reg_info_t **reg_info);

 /*
 * @brief
- *   Frees the structures allocated by nvidia_p2p_get_rsync_registers().
- *
- * @param[in]     reg_info
- *   A pointer to the rsync reg info structure.
+ *   This interface is no longer supported.  It is left in place (for now) to
+ *   allow third-party callers to build without any errors.
 */
 void nvidia_p2p_put_rsync_registers(nvidia_p2p_rsync_reg_info_t *reg_info);

--- a/kernel-open/nvidia-uvm/clc86f.h
+++ b/kernel-open/nvidia-uvm/clc86f.h
@@ -1,51 +1,31 @@
-/*******************************************************************************
-    Copyright (c) 2012-2015 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-
-#ifndef _clc86f_h_
-#define _clc86f_h_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "nvtypes.h"
-
-/* class HOPPER_CHANNEL_GPFIFO  */
 /*
- * Documentation for HOPPER_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
- * chapter "User Control Registers". It is documented as device NV_UDMA.
- * The GPFIFO format itself is also documented in dev_pbdma.ref,
- * NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
- * chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
+ * SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES
+ * SPDX-License-Identifier: MIT
 *
- * Note there is no .mfs file for this class.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
 */
+
+#ifndef __gh100_clc86f_h__
+#define __gh100_clc86f_h__
+
 #define  HOPPER_CHANNEL_GPFIFO_A                           (0x0000C86F)

-#define NVC86F_TYPEDEF                             HOPPER_CHANNELChannelGPFifoA
-
-/* dma flow control data structure */
 typedef volatile struct Nvc86fControl_struct {
 NvU32 Ignored00[0x010];        /*                                  0000-003f*/
 NvU32 Put;                     /* put offset, read/write           0040-0043*/
@@ -64,54 +44,7 @@ typedef volatile struct Nvc86fControl_struct {
 NvU32 Ignored05[0x5c];
 } Nvc86fControl, HopperAControlGPFifo;

-/* fields and values */
-#define NVC86F_NUMBER_OF_SUBCHANNELS                               (8)
 #define NVC86F_SET_OBJECT                                          (0x00000000)
-#define NVC86F_SET_OBJECT_NVCLASS                                         15:0
-#define NVC86F_SET_OBJECT_ENGINE                                         20:16
-#define NVC86F_SET_OBJECT_ENGINE_SW                                 0x0000001f
-#define NVC86F_ILLEGAL                                             (0x00000004)
-#define NVC86F_ILLEGAL_HANDLE                                             31:0
-#define NVC86F_NOP                                                 (0x00000008)
-#define NVC86F_NOP_HANDLE                                                 31:0
-#define NVC86F_SEMAPHOREA                                          (0x00000010)
-#define NVC86F_SEMAPHOREA_OFFSET_UPPER                                     7:0
-#define NVC86F_SEMAPHOREB                                          (0x00000014)
-#define NVC86F_SEMAPHOREB_OFFSET_LOWER                                    31:2
-#define NVC86F_SEMAPHOREC                                          (0x00000018)
-#define NVC86F_SEMAPHOREC_PAYLOAD                                         31:0
-#define NVC86F_SEMAPHORED                                          (0x0000001C)
-#define NVC86F_SEMAPHORED_OPERATION                                        4:0
-#define NVC86F_SEMAPHORED_OPERATION_ACQUIRE                         0x00000001
-#define NVC86F_SEMAPHORED_OPERATION_RELEASE                         0x00000002
-#define NVC86F_SEMAPHORED_OPERATION_ACQ_GEQ                         0x00000004
-#define NVC86F_SEMAPHORED_OPERATION_ACQ_AND                         0x00000008
-#define NVC86F_SEMAPHORED_OPERATION_REDUCTION                       0x00000010
-#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH                                 12:12
-#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED                   0x00000000
-#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED                    0x00000001
-#define NVC86F_SEMAPHORED_RELEASE_WFI                                    20:20
-#define NVC86F_SEMAPHORED_RELEASE_WFI_EN                            0x00000000
-#define NVC86F_SEMAPHORED_RELEASE_WFI_DIS                           0x00000001
-#define NVC86F_SEMAPHORED_RELEASE_SIZE                                   24:24
-#define NVC86F_SEMAPHORED_RELEASE_SIZE_16BYTE                       0x00000000
-#define NVC86F_SEMAPHORED_RELEASE_SIZE_4BYTE                        0x00000001
-#define NVC86F_SEMAPHORED_REDUCTION                                      30:27
-#define NVC86F_SEMAPHORED_REDUCTION_MIN                             0x00000000
-#define NVC86F_SEMAPHORED_REDUCTION_MAX                             0x00000001
-#define NVC86F_SEMAPHORED_REDUCTION_XOR                             0x00000002
-#define NVC86F_SEMAPHORED_REDUCTION_AND                             0x00000003
-#define NVC86F_SEMAPHORED_REDUCTION_OR                              0x00000004
-#define NVC86F_SEMAPHORED_REDUCTION_ADD                             0x00000005
-#define NVC86F_SEMAPHORED_REDUCTION_INC                             0x00000006
-#define NVC86F_SEMAPHORED_REDUCTION_DEC                             0x00000007
-#define NVC86F_SEMAPHORED_FORMAT                                         31:31
-#define NVC86F_SEMAPHORED_FORMAT_SIGNED                             0x00000000
-#define NVC86F_SEMAPHORED_FORMAT_UNSIGNED                           0x00000001
-#define NVC86F_NON_STALL_INTERRUPT                                 (0x00000020)
-#define NVC86F_NON_STALL_INTERRUPT_HANDLE                                 31:0
-#define NVC86F_FB_FLUSH                                            (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
-#define NVC86F_FB_FLUSH_HANDLE                                            31:0
 // NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
 // specifying the page address for a targeted TLB invalidate and the uTLB for
 // a targeted REPLAY_CANCEL for UVM.
@@ -206,67 +139,31 @@ typedef volatile struct Nvc86fControl_struct {
 #define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE                               23:20
 #define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE_RESERVED                 0x00000000
 #define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE_VIDMEM_ACCESS_BIT_DUMP   0x00000001
-#define NVC86F_SET_REFERENCE                                       (0x00000050)
-#define NVC86F_SET_REFERENCE_COUNT                                        31:0
 #define NVC86F_SEM_ADDR_LO                                         (0x0000005c)
 #define NVC86F_SEM_ADDR_LO_OFFSET                                         31:2
 #define NVC86F_SEM_ADDR_HI                                         (0x00000060)
 #define NVC86F_SEM_ADDR_HI_OFFSET                                         24:0
 #define NVC86F_SEM_PAYLOAD_LO                                      (0x00000064)
-#define NVC86F_SEM_PAYLOAD_LO_PAYLOAD                                     31:0
 #define NVC86F_SEM_PAYLOAD_HI                                      (0x00000068)
-#define NVC86F_SEM_PAYLOAD_HI_PAYLOAD                                     31:0
 #define NVC86F_SEM_EXECUTE                                         (0x0000006c)
 #define NVC86F_SEM_EXECUTE_OPERATION                                       2:0
 #define NVC86F_SEM_EXECUTE_OPERATION_ACQUIRE                        0x00000000
 #define NVC86F_SEM_EXECUTE_OPERATION_RELEASE                        0x00000001
-#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ                 0x00000002
 #define NVC86F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ                   0x00000003
-#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_AND                        0x00000004
-#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_NOR                        0x00000005
-#define NVC86F_SEM_EXECUTE_OPERATION_REDUCTION                      0x00000006
 #define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG                            12:12
-#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS                   0x00000000
 #define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN                    0x00000001
 #define NVC86F_SEM_EXECUTE_RELEASE_WFI                                   20:20
 #define NVC86F_SEM_EXECUTE_RELEASE_WFI_DIS                          0x00000000
-#define NVC86F_SEM_EXECUTE_RELEASE_WFI_EN                           0x00000001
 #define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE                                  24:24
 #define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT                       0x00000000
-#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT                       0x00000001
 #define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP                             25:25
 #define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS                    0x00000000
 #define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN                     0x00000001
-#define NVC86F_SEM_EXECUTE_REDUCTION                                     30:27
-#define NVC86F_SEM_EXECUTE_REDUCTION_IMIN                           0x00000000
-#define NVC86F_SEM_EXECUTE_REDUCTION_IMAX                           0x00000001
-#define NVC86F_SEM_EXECUTE_REDUCTION_IXOR                           0x00000002
-#define NVC86F_SEM_EXECUTE_REDUCTION_IAND                           0x00000003
-#define NVC86F_SEM_EXECUTE_REDUCTION_IOR                            0x00000004
-#define NVC86F_SEM_EXECUTE_REDUCTION_IADD                           0x00000005
-#define NVC86F_SEM_EXECUTE_REDUCTION_INC                            0x00000006
-#define NVC86F_SEM_EXECUTE_REDUCTION_DEC                            0x00000007
-#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT                              31:31
-#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED                  0x00000000
-#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED                0x00000001
 #define NVC86F_WFI                                                 (0x00000078)
 #define NVC86F_WFI_SCOPE                                                   0:0
 #define NVC86F_WFI_SCOPE_CURRENT_SCG_TYPE                           0x00000000
 #define NVC86F_WFI_SCOPE_CURRENT_VEID                               0x00000000
 #define NVC86F_WFI_SCOPE_ALL                                        0x00000001
-#define NVC86F_YIELD                                               (0x00000080)
-#define NVC86F_YIELD_OP                                                    1:0
-#define NVC86F_YIELD_OP_NOP                                         0x00000000
-#define NVC86F_YIELD_OP_TSG                                         0x00000003
-#define NVC86F_CLEAR_FAULTED                                       (0x00000084)
-// Note: RM provides the HANDLE as an opaque value; the internal detail fields
-// are intentionally not exposed to the driver through these defines.
-#define NVC86F_CLEAR_FAULTED_HANDLE                                       30:0
-#define NVC86F_CLEAR_FAULTED_TYPE                                        31:31
-#define NVC86F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED                     0x00000000
-#define NVC86F_CLEAR_FAULTED_TYPE_ENG_FAULTED                       0x00000001
-#define NVC86F_QUADRO_VERIFY                                       (0x000000a0)
-

 /* GPFIFO entry format */
 #define NVC86F_GP_ENTRY__SIZE                                          8
@@ -291,85 +188,4 @@ typedef volatile struct Nvc86fControl_struct {
 #define NVC86F_GP_ENTRY1_OPCODE_PB_CRC                        0x00000003
 #define NVC86F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE  0x00000004

-/* dma method formats */
-#define NVC86F_DMA_METHOD_ADDRESS_OLD                              12:2
-#define NVC86F_DMA_METHOD_ADDRESS                                  11:0
-#define NVC86F_DMA_SUBDEVICE_MASK                                  15:4
-#define NVC86F_DMA_METHOD_SUBCHANNEL                               15:13
-#define NVC86F_DMA_TERT_OP                                         17:16
-#define NVC86F_DMA_TERT_OP_GRP0_INC_METHOD                         (0x00000000)
-#define NVC86F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK                   (0x00000001)
-#define NVC86F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK                 (0x00000002)
-#define NVC86F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK                   (0x00000003)
-#define NVC86F_DMA_TERT_OP_GRP2_NON_INC_METHOD                     (0x00000000)
-#define NVC86F_DMA_METHOD_COUNT_OLD                                28:18
-#define NVC86F_DMA_METHOD_COUNT                                    28:16
-#define NVC86F_DMA_IMMD_DATA                                       28:16
-#define NVC86F_DMA_SEC_OP                                          31:29
-#define NVC86F_DMA_SEC_OP_GRP0_USE_TERT                            (0x00000000)
-#define NVC86F_DMA_SEC_OP_INC_METHOD                               (0x00000001)
-#define NVC86F_DMA_SEC_OP_GRP2_USE_TERT                            (0x00000002)
-#define NVC86F_DMA_SEC_OP_NON_INC_METHOD                           (0x00000003)
-#define NVC86F_DMA_SEC_OP_IMMD_DATA_METHOD                         (0x00000004)
-#define NVC86F_DMA_SEC_OP_ONE_INC                                  (0x00000005)
-#define NVC86F_DMA_SEC_OP_RESERVED6                                (0x00000006)
-#define NVC86F_DMA_SEC_OP_END_PB_SEGMENT                           (0x00000007)
-/* dma incrementing method format */
-#define NVC86F_DMA_INCR_ADDRESS                                    11:0
-#define NVC86F_DMA_INCR_SUBCHANNEL                                 15:13
-#define NVC86F_DMA_INCR_COUNT                                      28:16
-#define NVC86F_DMA_INCR_OPCODE                                     31:29
-#define NVC86F_DMA_INCR_OPCODE_VALUE                               (0x00000001)
-#define NVC86F_DMA_INCR_DATA                                       31:0
-/* dma non-incrementing method format */
-#define NVC86F_DMA_NONINCR_ADDRESS                                 11:0
-#define NVC86F_DMA_NONINCR_SUBCHANNEL                              15:13
-#define NVC86F_DMA_NONINCR_COUNT                                   28:16
-#define NVC86F_DMA_NONINCR_OPCODE                                  31:29
-#define NVC86F_DMA_NONINCR_OPCODE_VALUE                            (0x00000003)
-#define NVC86F_DMA_NONINCR_DATA                                    31:0
-/* dma increment-once method format */
-#define NVC86F_DMA_ONEINCR_ADDRESS                                 11:0
-#define NVC86F_DMA_ONEINCR_SUBCHANNEL                              15:13
-#define NVC86F_DMA_ONEINCR_COUNT                                   28:16
-#define NVC86F_DMA_ONEINCR_OPCODE                                  31:29
-#define NVC86F_DMA_ONEINCR_OPCODE_VALUE                            (0x00000005)
-#define NVC86F_DMA_ONEINCR_DATA                                    31:0
-/* dma no-operation format */
-#define NVC86F_DMA_NOP                                             (0x00000000)
-/* dma immediate-data format */
-#define NVC86F_DMA_IMMD_ADDRESS                                    11:0
-#define NVC86F_DMA_IMMD_SUBCHANNEL                                 15:13
-#define NVC86F_DMA_IMMD_DATA                                       28:16
-#define NVC86F_DMA_IMMD_OPCODE                                     31:29
-#define NVC86F_DMA_IMMD_OPCODE_VALUE                               (0x00000004)
-/* dma set sub-device mask format */
-#define NVC86F_DMA_SET_SUBDEVICE_MASK_VALUE                        15:4
-#define NVC86F_DMA_SET_SUBDEVICE_MASK_OPCODE                       31:16
-#define NVC86F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000001)
-/* dma store sub-device mask format */
-#define NVC86F_DMA_STORE_SUBDEVICE_MASK_VALUE                      15:4
-#define NVC86F_DMA_STORE_SUBDEVICE_MASK_OPCODE                     31:16
-#define NVC86F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE               (0x00000002)
-/* dma use sub-device mask format */
-#define NVC86F_DMA_USE_SUBDEVICE_MASK_OPCODE                       31:16
-#define NVC86F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000003)
-/* dma end-segment format */
-#define NVC86F_DMA_ENDSEG_OPCODE                                   31:29
-#define NVC86F_DMA_ENDSEG_OPCODE_VALUE                             (0x00000007)
-/* dma legacy incrementing/non-incrementing formats */
-#define NVC86F_DMA_ADDRESS                                         12:2
-#define NVC86F_DMA_SUBCH                                           15:13
-#define NVC86F_DMA_OPCODE3                                         17:16
-#define NVC86F_DMA_OPCODE3_NONE                                    (0x00000000)
-#define NVC86F_DMA_COUNT                                           28:18
-#define NVC86F_DMA_OPCODE                                          31:29
-#define NVC86F_DMA_OPCODE_METHOD                                   (0x00000000)
-#define NVC86F_DMA_OPCODE_NONINC_METHOD                            (0x00000002)
-#define NVC86F_DMA_DATA                                            31:0
-
-#ifdef __cplusplus
-};     /* extern "C" */
-#endif
-
-#endif /* _clc86f_h_ */
+#endif // __gh100_clc86f_h__
--- a/kernel-open/nvidia-uvm/clc8b5.h
+++ b/kernel-open/nvidia-uvm/clc8b5.h
@@ -1,160 +1,46 @@
-/*******************************************************************************
-    Copyright (c) 1993-2004 NVIDIA Corporation
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
+#ifndef __gh100_clc8b5_h__
+#define __gh100_clc8b5_h__

-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-
-
-#include "nvtypes.h"
-
-#ifndef _clc8b5_h_
-#define _clc8b5_h_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define HOPPER_DMA_COPY_A                                                            (0x0000C8B5)
-
-typedef volatile struct _clc8b5_tag0 {
-    NvV32 Reserved00[0x40];
-    NvV32 Nop;                                                                  // 0x00000100 - 0x00000103
-    NvV32 Reserved01[0xF];
-    NvV32 PmTrigger;                                                            // 0x00000140 - 0x00000143
-    NvV32 Reserved02[0x36];
-    NvV32 SetMonitoredFenceType;                                                // 0x0000021C - 0x0000021F
-    NvV32 SetMonitoredFenceSignalAddrBaseUpper;                                 // 0x00000220 - 0x00000223
-    NvV32 SetMonitoredFenceSignalAddrBaseLower;                                 // 0x00000224 - 0x00000227
-    NvV32 Reserved03[0x6];
-    NvV32 SetSemaphoreA;                                                        // 0x00000240 - 0x00000243
-    NvV32 SetSemaphoreB;                                                        // 0x00000244 - 0x00000247
-    NvV32 SetSemaphorePayload;                                                  // 0x00000248 - 0x0000024B
-    NvV32 SetSemaphorePayloadUpper;                                             // 0x0000024C - 0x0000024F
-    NvV32 Reserved04[0x1];
-    NvV32 SetRenderEnableA;                                                     // 0x00000254 - 0x00000257
-    NvV32 SetRenderEnableB;                                                     // 0x00000258 - 0x0000025B
-    NvV32 SetRenderEnableC;                                                     // 0x0000025C - 0x0000025F
-    NvV32 SetSrcPhysMode;                                                       // 0x00000260 - 0x00000263
-    NvV32 SetDstPhysMode;                                                       // 0x00000264 - 0x00000267
-    NvV32 Reserved05[0x26];
-    NvV32 LaunchDma;                                                            // 0x00000300 - 0x00000303
-    NvV32 Reserved06[0x3F];
-    NvV32 OffsetInUpper;                                                        // 0x00000400 - 0x00000403
-    NvV32 OffsetInLower;                                                        // 0x00000404 - 0x00000407
-    NvV32 OffsetOutUpper;                                                       // 0x00000408 - 0x0000040B
-    NvV32 OffsetOutLower;                                                       // 0x0000040C - 0x0000040F
-    NvV32 PitchIn;                                                              // 0x00000410 - 0x00000413
-    NvV32 PitchOut;                                                             // 0x00000414 - 0x00000417
-    NvV32 LineLengthIn;                                                         // 0x00000418 - 0x0000041B
-    NvV32 LineCount;                                                            // 0x0000041C - 0x0000041F
-    NvV32 Reserved07[0x38];
-    NvV32 SetSecureCopyMode;                                                    // 0x00000500 - 0x00000503
-    NvV32 SetDecryptIv0;                                                        // 0x00000504 - 0x00000507
-    NvV32 SetDecryptIv1;                                                        // 0x00000508 - 0x0000050B
-    NvV32 SetDecryptIv2;                                                        // 0x0000050C - 0x0000050F
-    NvV32 Reserved_SetAESCounter;                                               // 0x00000510 - 0x00000513
-    NvV32 SetDecryptAuthTagCompareAddrUpper;                                    // 0x00000514 - 0x00000517
-    NvV32 SetDecryptAuthTagCompareAddrLower;                                    // 0x00000518 - 0x0000051B
-    NvV32 Reserved08[0x5];
-    NvV32 SetEncryptAuthTagAddrUpper;                                           // 0x00000530 - 0x00000533
-    NvV32 SetEncryptAuthTagAddrLower;                                           // 0x00000534 - 0x00000537
-    NvV32 SetEncryptIvAddrUpper;                                                // 0x00000538 - 0x0000053B
-    NvV32 SetEncryptIvAddrLower;                                                // 0x0000053C - 0x0000053F
-    NvV32 Reserved09[0x6F];
-    NvV32 SetMemoryScrubParameters;                                             // 0x000006FC - 0x000006FF
-    NvV32 SetRemapConstA;                                                       // 0x00000700 - 0x00000703
-    NvV32 SetRemapConstB;                                                       // 0x00000704 - 0x00000707
-    NvV32 SetRemapComponents;                                                   // 0x00000708 - 0x0000070B
-    NvV32 SetDstBlockSize;                                                      // 0x0000070C - 0x0000070F
-    NvV32 SetDstWidth;                                                          // 0x00000710 - 0x00000713
-    NvV32 SetDstHeight;                                                         // 0x00000714 - 0x00000717
-    NvV32 SetDstDepth;                                                          // 0x00000718 - 0x0000071B
-    NvV32 SetDstLayer;                                                          // 0x0000071C - 0x0000071F
-    NvV32 SetDstOrigin;                                                         // 0x00000720 - 0x00000723
-    NvV32 Reserved10[0x1];
-    NvV32 SetSrcBlockSize;                                                      // 0x00000728 - 0x0000072B
-    NvV32 SetSrcWidth;                                                          // 0x0000072C - 0x0000072F
-    NvV32 SetSrcHeight;                                                         // 0x00000730 - 0x00000733
-    NvV32 SetSrcDepth;                                                          // 0x00000734 - 0x00000737
-    NvV32 SetSrcLayer;                                                          // 0x00000738 - 0x0000073B
-    NvV32 SetSrcOrigin;                                                         // 0x0000073C - 0x0000073F
-    NvV32 Reserved11[0x1];
-    NvV32 SrcOriginX;                                                           // 0x00000744 - 0x00000747
-    NvV32 SrcOriginY;                                                           // 0x00000748 - 0x0000074B
-    NvV32 DstOriginX;                                                           // 0x0000074C - 0x0000074F
-    NvV32 DstOriginY;                                                           // 0x00000750 - 0x00000753
-    NvV32 Reserved12[0x270];
-    NvV32 PmTriggerEnd;                                                         // 0x00001114 - 0x00001117
-    NvV32 Reserved13[0x3BA];
-} hopper_dma_copy_aControlPio;
-
-#define NVC8B5_NOP                                                              (0x00000100)
-#define NVC8B5_NOP_PARAMETER                                                    31:0
-#define NVC8B5_PM_TRIGGER                                                       (0x00000140)
-#define NVC8B5_PM_TRIGGER_V                                                     31:0
-#define NVC8B5_SET_MONITORED_FENCE_TYPE                                         (0x0000021C)
-#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE                                    0:0
-#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE                    (0x00000000)
-#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT                (0x00000001)
-#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER                       (0x00000220)
-#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER                 24:0
-#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER                       (0x00000224)
-#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER                 31:0
+#define HOPPER_DMA_COPY_A                                                       (0x0000C8B5)
 #define NVC8B5_SET_SEMAPHORE_A                                                  (0x00000240)
 #define NVC8B5_SET_SEMAPHORE_A_UPPER                                            24:0
 #define NVC8B5_SET_SEMAPHORE_B                                                  (0x00000244)
 #define NVC8B5_SET_SEMAPHORE_B_LOWER                                            31:0
 #define NVC8B5_SET_SEMAPHORE_PAYLOAD                                            (0x00000248)
-#define NVC8B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD                                    31:0
-#define NVC8B5_SET_SEMAPHORE_PAYLOAD_UPPER                                      (0x0000024C)
-#define NVC8B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD                              31:0
-#define NVC8B5_SET_RENDER_ENABLE_A                                              (0x00000254)
-#define NVC8B5_SET_RENDER_ENABLE_A_UPPER                                        24:0
-#define NVC8B5_SET_RENDER_ENABLE_B                                              (0x00000258)
-#define NVC8B5_SET_RENDER_ENABLE_B_LOWER                                        31:0
-#define NVC8B5_SET_RENDER_ENABLE_C                                              (0x0000025C)
-#define NVC8B5_SET_RENDER_ENABLE_C_MODE                                         2:0
-#define NVC8B5_SET_RENDER_ENABLE_C_MODE_FALSE                                   (0x00000000)
-#define NVC8B5_SET_RENDER_ENABLE_C_MODE_TRUE                                    (0x00000001)
-#define NVC8B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL                             (0x00000002)
-#define NVC8B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL                         (0x00000003)
-#define NVC8B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL                     (0x00000004)
-#define NVC8B5_SET_SRC_PHYS_MODE                                                (0x00000260)
 #define NVC8B5_SET_SRC_PHYS_MODE_TARGET                                         1:0
 #define NVC8B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
 #define NVC8B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
-#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
 #define NVC8B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM                                 (0x00000003)
-#define NVC8B5_SET_SRC_PHYS_MODE_BASIC_KIND                                     5:2
 #define NVC8B5_SET_SRC_PHYS_MODE_PEER_ID                                        8:6
 #define NVC8B5_SET_SRC_PHYS_MODE_FLA                                            9:9
 #define NVC8B5_SET_DST_PHYS_MODE                                                (0x00000264)
 #define NVC8B5_SET_DST_PHYS_MODE_TARGET                                         1:0
 #define NVC8B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
 #define NVC8B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
-#define NVC8B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
 #define NVC8B5_SET_DST_PHYS_MODE_TARGET_PEERMEM                                 (0x00000003)
-#define NVC8B5_SET_DST_PHYS_MODE_BASIC_KIND                                     5:2
-#define NVC8B5_SET_DST_PHYS_MODE_PEER_ID                                        8:6
-#define NVC8B5_SET_DST_PHYS_MODE_FLA                                            9:9
 #define NVC8B5_LAUNCH_DMA                                                       (0x00000300)
 #define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                    1:0
 #define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE                               (0x00000000)
@@ -167,80 +53,41 @@ typedef volatile struct _clc8b5_tag0 {
 #define NVC8B5_LAUNCH_DMA_FLUSH_TYPE_SYS                                        (0x00000000)
 #define NVC8B5_LAUNCH_DMA_FLUSH_TYPE_GL                                         (0x00000001)
 #define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE                                        4:3
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE                                   (0x00000000)
 #define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP         (0x00000001)
 #define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP       (0x00000002)
 #define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE             (0x00000001)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE            (0x00000002)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE     (0x00000003)
-#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE                                        6:5
-#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE                                   (0x00000000)
-#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING                               (0x00000001)
-#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING                           (0x00000002)
 #define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT                                     7:7
-#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
 #define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH                               (0x00000001)
 #define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT                                     8:8
-#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
 #define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH                               (0x00000001)
 #define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE                                     9:9
 #define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE                               (0x00000000)
-#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE                                (0x00000001)
 #define NVC8B5_LAUNCH_DMA_REMAP_ENABLE                                          10:10
 #define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_FALSE                                    (0x00000000)
 #define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_TRUE                                     (0x00000001)
-#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE                                      11:11
-#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE                                (0x00000000)
-#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE                                 (0x00000001)
 #define NVC8B5_LAUNCH_DMA_SRC_TYPE                                              12:12
-#define NVC8B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL                                      (0x00000000)
 #define NVC8B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL                                     (0x00000001)
 #define NVC8B5_LAUNCH_DMA_DST_TYPE                                              13:13
 #define NVC8B5_LAUNCH_DMA_DST_TYPE_VIRTUAL                                      (0x00000000)
 #define NVC8B5_LAUNCH_DMA_DST_TYPE_PHYSICAL                                     (0x00000001)
 #define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION                                   17:14
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN                              (0x00000000)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX                              (0x00000001)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR                              (0x00000002)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND                              (0x00000003)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR                               (0x00000004)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD                              (0x00000005)
 #define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC                               (0x00000006)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC                               (0x00000007)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA                          (0x00000008)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB                          (0x00000009)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD                              (0x0000000A)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN                              (0x0000000B)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX                              (0x0000000C)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC                          (0x0000000D)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD                          (0x0000000E)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE                          (0x0000000F)
 #define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN                              18:18
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED                       (0x00000000)
 #define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED                     (0x00000001)
 #define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE                            19:19
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE                      (0x00000000)
 #define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE                       (0x00000001)
 #define NVC8B5_LAUNCH_DMA_COPY_TYPE                                             21:20
 #define NVC8B5_LAUNCH_DMA_COPY_TYPE_PROT2PROT                                   (0x00000000)
 #define NVC8B5_LAUNCH_DMA_COPY_TYPE_DEFAULT                                     (0x00000000)
 #define NVC8B5_LAUNCH_DMA_COPY_TYPE_SECURE                                      (0x00000001)
 #define NVC8B5_LAUNCH_DMA_COPY_TYPE_NONPROT2NONPROT                             (0x00000002)
-#define NVC8B5_LAUNCH_DMA_COPY_TYPE_RESERVED                                    (0x00000003)
-#define NVC8B5_LAUNCH_DMA_VPRMODE                                               22:22
-#define NVC8B5_LAUNCH_DMA_VPRMODE_VPR_NONE                                      (0x00000000)
-#define NVC8B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID                                   (0x00000001)
 #define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE                                   23:23
 #define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_FALSE                             (0x00000000)
 #define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_TRUE                              (0x00000001)
-#define NVC8B5_LAUNCH_DMA_RESERVED_START_OF_COPY                                24:24
 #define NVC8B5_LAUNCH_DMA_DISABLE_PLC                                           26:26
-#define NVC8B5_LAUNCH_DMA_DISABLE_PLC_FALSE                                     (0x00000000)
 #define NVC8B5_LAUNCH_DMA_DISABLE_PLC_TRUE                                      (0x00000001)
 #define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE                                27:27
 #define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD                       (0x00000000)
-#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD                       (0x00000001)
-#define NVC8B5_LAUNCH_DMA_RESERVED_ERR_CODE                                     31:28
 #define NVC8B5_OFFSET_IN_UPPER                                                  (0x00000400)
 #define NVC8B5_OFFSET_IN_UPPER_UPPER                                            24:0
 #define NVC8B5_OFFSET_IN_LOWER                                                  (0x00000404)
@@ -249,41 +96,11 @@ typedef volatile struct _clc8b5_tag0 {
 #define NVC8B5_OFFSET_OUT_UPPER_UPPER                                           24:0
 #define NVC8B5_OFFSET_OUT_LOWER                                                 (0x0000040C)
 #define NVC8B5_OFFSET_OUT_LOWER_VALUE                                           31:0
-#define NVC8B5_PITCH_IN                                                         (0x00000410)
-#define NVC8B5_PITCH_IN_VALUE                                                   31:0
-#define NVC8B5_PITCH_OUT                                                        (0x00000414)
-#define NVC8B5_PITCH_OUT_VALUE                                                  31:0
 #define NVC8B5_LINE_LENGTH_IN                                                   (0x00000418)
-#define NVC8B5_LINE_LENGTH_IN_VALUE                                             31:0
-#define NVC8B5_LINE_COUNT                                                       (0x0000041C)
-#define NVC8B5_LINE_COUNT_VALUE                                                 31:0
 #define NVC8B5_SET_SECURE_COPY_MODE                                             (0x00000500)
 #define NVC8B5_SET_SECURE_COPY_MODE_MODE                                        0:0
 #define NVC8B5_SET_SECURE_COPY_MODE_MODE_ENCRYPT                                (0x00000000)
 #define NVC8B5_SET_SECURE_COPY_MODE_MODE_DECRYPT                                (0x00000001)
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET                         20:19
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_LOCAL_FB                (0x00000000)
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_COHERENT_SYSMEM         (0x00000001)
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_NONCOHERENT_SYSMEM      (0x00000002)
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_PEERMEM                 (0x00000003)
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_PEER_ID                        23:21
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_FLA                            24:24
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET                         26:25
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_LOCAL_FB                (0x00000000)
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_COHERENT_SYSMEM         (0x00000001)
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_NONCOHERENT_SYSMEM      (0x00000002)
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_PEERMEM                 (0x00000003)
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_PEER_ID                        29:27
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_FLA                            30:30
-#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_END_OF_COPY                        31:31
-#define NVC8B5_SET_DECRYPT_IV0                                                  (0x00000504)
-#define NVC8B5_SET_DECRYPT_IV0_VALUE                                            31:0
-#define NVC8B5_SET_DECRYPT_IV1                                                  (0x00000508)
-#define NVC8B5_SET_DECRYPT_IV1_VALUE                                            31:0
-#define NVC8B5_SET_DECRYPT_IV2                                                  (0x0000050C)
-#define NVC8B5_SET_DECRYPT_IV2_VALUE                                            31:0
-#define NVC8B5_RESERVED_SET_AESCOUNTER                                          (0x00000510)
-#define NVC8B5_RESERVED_SET_AESCOUNTER_VALUE                                    31:0
 #define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER                          (0x00000514)
 #define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER_UPPER                    24:0
 #define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER                          (0x00000518)
@@ -299,132 +116,18 @@ typedef volatile struct _clc8b5_tag0 {
 #define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS                                      (0x000006FC)
 #define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE                          0:0
 #define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_FALSE                    (0x00000000)
-#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_TRUE                     (0x00000001)
 #define NVC8B5_SET_REMAP_CONST_A                                                (0x00000700)
-#define NVC8B5_SET_REMAP_CONST_A_V                                              31:0
 #define NVC8B5_SET_REMAP_CONST_B                                                (0x00000704)
-#define NVC8B5_SET_REMAP_CONST_B_V                                              31:0
 #define NVC8B5_SET_REMAP_COMPONENTS                                             (0x00000708)
 #define NVC8B5_SET_REMAP_COMPONENTS_DST_X                                       2:0
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_X                                 (0x00000000)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y                                 (0x00000001)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z                                 (0x00000002)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_W                                 (0x00000003)
 #define NVC8B5_SET_REMAP_COMPONENTS_DST_X_CONST_A                               (0x00000004)
 #define NVC8B5_SET_REMAP_COMPONENTS_DST_X_CONST_B                               (0x00000005)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE                              (0x00000006)
 #define NVC8B5_SET_REMAP_COMPONENTS_DST_Y                                       6:4
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X                                 (0x00000000)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y                                 (0x00000001)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z                                 (0x00000002)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W                                 (0x00000003)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A                               (0x00000004)
 #define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B                               (0x00000005)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE                              (0x00000006)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z                                       10:8
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X                                 (0x00000000)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y                                 (0x00000001)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z                                 (0x00000002)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W                                 (0x00000003)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A                               (0x00000004)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B                               (0x00000005)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE                              (0x00000006)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_W                                       14:12
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_X                                 (0x00000000)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y                                 (0x00000001)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z                                 (0x00000002)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_W                                 (0x00000003)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_CONST_A                               (0x00000004)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_CONST_B                               (0x00000005)
-#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE                              (0x00000006)
 #define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE                              17:16
 #define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE                          (0x00000000)
-#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO                          (0x00000001)
-#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE                        (0x00000002)
 #define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR                         (0x00000003)
-#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS                          21:20
-#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE                      (0x00000000)
-#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO                      (0x00000001)
-#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE                    (0x00000002)
-#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR                     (0x00000003)
 #define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS                          25:24
 #define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE                      (0x00000000)
 #define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO                      (0x00000001)
-#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE                    (0x00000002)
-#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR                     (0x00000003)
-#define NVC8B5_SET_DST_BLOCK_SIZE                                               (0x0000070C)
-#define NVC8B5_SET_DST_BLOCK_SIZE_WIDTH                                         3:0
-#define NVC8B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
-#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT                                        7:4
-#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
-#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
-#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
-#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
-#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
-#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
-#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH                                         11:8
-#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
-#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
-#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
-#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
-#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
-#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
-#define NVC8B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT                                    15:12
-#define NVC8B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
-#define NVC8B5_SET_DST_WIDTH                                                    (0x00000710)
-#define NVC8B5_SET_DST_WIDTH_V                                                  31:0
-#define NVC8B5_SET_DST_HEIGHT                                                   (0x00000714)
-#define NVC8B5_SET_DST_HEIGHT_V                                                 31:0
-#define NVC8B5_SET_DST_DEPTH                                                    (0x00000718)
-#define NVC8B5_SET_DST_DEPTH_V                                                  31:0
-#define NVC8B5_SET_DST_LAYER                                                    (0x0000071C)
-#define NVC8B5_SET_DST_LAYER_V                                                  31:0
-#define NVC8B5_SET_DST_ORIGIN                                                   (0x00000720)
-#define NVC8B5_SET_DST_ORIGIN_X                                                 15:0
-#define NVC8B5_SET_DST_ORIGIN_Y                                                 31:16
-#define NVC8B5_SET_SRC_BLOCK_SIZE                                               (0x00000728)
-#define NVC8B5_SET_SRC_BLOCK_SIZE_WIDTH                                         3:0
-#define NVC8B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
-#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT                                        7:4
-#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
-#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
-#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
-#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
-#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
-#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
-#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH                                         11:8
-#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
-#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
-#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
-#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
-#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
-#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
-#define NVC8B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT                                    15:12
-#define NVC8B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
-#define NVC8B5_SET_SRC_WIDTH                                                    (0x0000072C)
-#define NVC8B5_SET_SRC_WIDTH_V                                                  31:0
-#define NVC8B5_SET_SRC_HEIGHT                                                   (0x00000730)
-#define NVC8B5_SET_SRC_HEIGHT_V                                                 31:0
-#define NVC8B5_SET_SRC_DEPTH                                                    (0x00000734)
-#define NVC8B5_SET_SRC_DEPTH_V                                                  31:0
-#define NVC8B5_SET_SRC_LAYER                                                    (0x00000738)
-#define NVC8B5_SET_SRC_LAYER_V                                                  31:0
-#define NVC8B5_SET_SRC_ORIGIN                                                   (0x0000073C)
-#define NVC8B5_SET_SRC_ORIGIN_X                                                 15:0
-#define NVC8B5_SET_SRC_ORIGIN_Y                                                 31:16
-#define NVC8B5_SRC_ORIGIN_X                                                     (0x00000744)
-#define NVC8B5_SRC_ORIGIN_X_VALUE                                               31:0
-#define NVC8B5_SRC_ORIGIN_Y                                                     (0x00000748)
-#define NVC8B5_SRC_ORIGIN_Y_VALUE                                               31:0
-#define NVC8B5_DST_ORIGIN_X                                                     (0x0000074C)
-#define NVC8B5_DST_ORIGIN_X_VALUE                                               31:0
-#define NVC8B5_DST_ORIGIN_Y                                                     (0x00000750)
-#define NVC8B5_DST_ORIGIN_Y_VALUE                                               31:0
-#define NVC8B5_PM_TRIGGER_END                                                   (0x00001114)
-#define NVC8B5_PM_TRIGGER_END_V                                                 31:0
-
-#ifdef __cplusplus
-};     /* extern "C" */
-#endif
-#endif // _clc8b5_h
-
+#endif // __gh100_clc8b5_h__
--- a/kernel-open/nvidia-uvm/clc96f.h
+++ b/kernel-open/nvidia-uvm/clc96f.h
@@ -1,84 +1,42 @@
-/*******************************************************************************
-    Copyright (c) 2012-2015 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-
-#ifndef _clc96f_h_
-#define _clc96f_h_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "nvtypes.h"
-
-/* class BLACKWELL_CHANNEL_GPFIFO  */
 /*
- * Documentation for BLACKWELL_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
- * chapter "User Control Registers". It is documented as device NV_UDMA.
- * The GPFIFO format itself is also documented in dev_pbdma.ref,
- * NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
- * chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
+ * SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES
+ * SPDX-License-Identifier: MIT
 *
- * Note there is no .mfs file for this class.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
 */
+
+#ifndef __gb100_clc96f_h__
+#define __gb100_clc96f_h__
+
 #define  BLACKWELL_CHANNEL_GPFIFO_A                           (0x0000C96F)

-#define NVC96F_TYPEDEF                             BLACKWELL_CHANNELChannelGPFifoA
-
-/* dma flow control data structure */
 typedef volatile struct Nvc96fControl_struct {
 NvU32 Ignored00[0x23];        /*                                  0000-008b*/
 NvU32 GPPut;                   /* GP FIFO put offset               008c-008f*/
 NvU32 Ignored01[0x5c];
 } Nvc96fControl, BlackwellAControlGPFifo;

-/* fields and values */
-#define NVC96F_NUMBER_OF_SUBCHANNELS                               (8)
 #define NVC96F_SET_OBJECT                                          (0x00000000)
-#define NVC96F_SET_OBJECT_NVCLASS                                         15:0
-#define NVC96F_SET_OBJECT_ENGINE                                         20:16
-#define NVC96F_SET_OBJECT_ENGINE_SW                                 0x0000001f
-#define NVC96F_NOP                                                 (0x00000008)
-#define NVC96F_NOP_HANDLE                                                 31:0
-#define NVC96F_NON_STALL_INTERRUPT                                 (0x00000020)
-#define NVC96F_NON_STALL_INTERRUPT_HANDLE                                 31:0
-#define NVC96F_FB_FLUSH                                            (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
-#define NVC96F_FB_FLUSH_HANDLE                                            31:0
-// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
-// specifying the page address for a targeted TLB invalidate and the uTLB for
-// a targeted REPLAY_CANCEL for UVM.
-// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
-// rearranged fields.
 #define NVC96F_MEM_OP_A                                            (0x00000028)
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID        5:0  // only relevant for REPLAY_CANCEL_TARGETED
 #define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE                   5:0  // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID               10:6  // only relevant for REPLAY_CANCEL_TARGETED
 #define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE                         7:6  // only relevant for invalidates with NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE for invalidating  link TLB only, or non-link TLB only or all TLBs
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_ALL_TLBS                  0
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_LINK_TLBS                 1
 #define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_NON_LINK_TLBS             2
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_RSVRVD                    3
-#define NVC96F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID                8:0  // only relevant for REPLAY_CANCEL_VA_GLOBAL
 #define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR                         11:11
 #define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN                 0x00000001
 #define NVC96F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS                0x00000000
@@ -86,9 +44,6 @@ typedef volatile struct Nvc96fControl_struct {
 #define NVC96F_MEM_OP_B                                            (0x0000002c)
 #define NVC96F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI                     31:0
 #define NVC96F_MEM_OP_C                                            (0x00000030)
-#define NVC96F_MEM_OP_C_MEMBAR_TYPE                                        2:0
-#define NVC96F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR                      0x00000000
-#define NVC96F_MEM_OP_C_MEMBAR_TYPE_MEMBAR                          0x00000001
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB                                 0:0
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE                      0x00000000
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL                      0x00000001  // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
@@ -97,130 +52,38 @@ typedef volatile struct Nvc96fControl_struct {
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE                  0x00000001
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY                              4:2  // only relevant if GPC ENABLE
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE                  0x00000000
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START                 0x00000001
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL         0x00000002
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED       0x00000003
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL         0x00000004
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL      0x00000005
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE                            6:5  // only relevant if GPC ENABLE
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE                0x00000000
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY            0x00000001
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE           0x00000002
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE                         9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ                 0
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE                1
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG        2
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD               3
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK          4
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL           5
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC     6
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL                  7
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL                    9:7  // Invalidate affects this level and all below
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL         0x00000000  // Invalidate tlb caches at all levels of the page table
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY    0x00000001
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0  0x00000002
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1  0x00000003
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2  0x00000004
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3  0x00000005
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4  0x00000006
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5  0x00000007
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE                          11:10  // only relevant if PDB_ONE
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM             0x00000000
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT    0x00000002
-#define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
 #define NVC96F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO                       31:12  // only relevant if PDB_ONE
-#define NVC96F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG            19:0
-// MEM_OP_D MUST be preceded by MEM_OPs A-C.
+
 #define NVC96F_MEM_OP_D                                            (0x00000034)
 #define NVC96F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI                        26:0  // only relevant if PDB_ONE
 #define NVC96F_MEM_OP_D_OPERATION                                        31:27
-#define NVC96F_MEM_OP_D_OPERATION_MEMBAR                            0x00000005
 #define NVC96F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE                0x00000009
 #define NVC96F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED       0x0000000a
-#define NVC96F_MEM_OP_D_OPERATION_MMU_OPERATION                     0x0000000b
-#define NVC96F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE             0x0000000d
-#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE              0x0000000e
-// CLEAN_LINES is an alias for Tegra/GPU IP usage
-#define NVC96F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES         0x0000000e
-#define NVC96F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS                 0x0000000f
-#define NVC96F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY                    0x00000010
-#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_NCOH_INVALIDATE         0x00000011
-#define NVC96F_MEM_OP_D_OPERATION_L2_SYSMEM_COH_INVALIDATE          0x00000012
-#define NVC96F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS     0x00000015
-#define NVC96F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR                0x00000016
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE                            1:0
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC                0x00000000
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC                0x00000001
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL                 0x00000002
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED            0x00000003
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE                   2:2
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC       0x00000000
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC       0x00000001
-#define NVC96F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK                   6:3
-#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE                               23:20
-#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE_RESERVED                 0x00000000
-#define NVC96F_MEM_OP_D_MMU_OPERATION_TYPE_VIDMEM_ACCESS_BIT_DUMP   0x00000001
+
 #define NVC96F_SEM_ADDR_LO                                         (0x0000005c)
 #define NVC96F_SEM_ADDR_LO_OFFSET                                         31:2
 #define NVC96F_SEM_ADDR_HI                                         (0x00000060)
 #define NVC96F_SEM_ADDR_HI_OFFSET                                         24:0
 #define NVC96F_SEM_PAYLOAD_LO                                      (0x00000064)
-#define NVC96F_SEM_PAYLOAD_LO_PAYLOAD                                     31:0
 #define NVC96F_SEM_PAYLOAD_HI                                      (0x00000068)
-#define NVC96F_SEM_PAYLOAD_HI_PAYLOAD                                     31:0
 #define NVC96F_SEM_EXECUTE                                         (0x0000006c)
 #define NVC96F_SEM_EXECUTE_OPERATION                                       2:0
 #define NVC96F_SEM_EXECUTE_OPERATION_ACQUIRE                        0x00000000
 #define NVC96F_SEM_EXECUTE_OPERATION_RELEASE                        0x00000001
-#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ                 0x00000002
-#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ                   0x00000003
-#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_AND                        0x00000004
-#define NVC96F_SEM_EXECUTE_OPERATION_ACQ_NOR                        0x00000005
-#define NVC96F_SEM_EXECUTE_OPERATION_REDUCTION                      0x00000006
-#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG                            12:12
-#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS                   0x00000000
-#define NVC96F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN                    0x00000001
-#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK                               18:18
-#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK_DIS                      0x00000000
-#define NVC96F_SEM_EXECUTE_ACQUIRE_RECHECK_EN                       0x00000001
 #define NVC96F_SEM_EXECUTE_RELEASE_WFI                                   20:20
 #define NVC96F_SEM_EXECUTE_RELEASE_WFI_DIS                          0x00000000
-#define NVC96F_SEM_EXECUTE_RELEASE_WFI_EN                           0x00000001
 #define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE                                  24:24
 #define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT                       0x00000000
-#define NVC96F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT                       0x00000001
-#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP                             25:25
-#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS                    0x00000000
-#define NVC96F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN                     0x00000001
-#define NVC96F_SEM_EXECUTE_REDUCTION                                     30:27
-#define NVC96F_SEM_EXECUTE_REDUCTION_IMIN                           0x00000000
-#define NVC96F_SEM_EXECUTE_REDUCTION_IMAX                           0x00000001
-#define NVC96F_SEM_EXECUTE_REDUCTION_IXOR                           0x00000002
-#define NVC96F_SEM_EXECUTE_REDUCTION_IAND                           0x00000003
-#define NVC96F_SEM_EXECUTE_REDUCTION_IOR                            0x00000004
-#define NVC96F_SEM_EXECUTE_REDUCTION_IADD                           0x00000005
-#define NVC96F_SEM_EXECUTE_REDUCTION_INC                            0x00000006
-#define NVC96F_SEM_EXECUTE_REDUCTION_DEC                            0x00000007
-#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT                              31:31
-#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED                  0x00000000
-#define NVC96F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED                0x00000001
-#define NVC96F_WFI                                                 (0x00000078)
-#define NVC96F_WFI_SCOPE                                                   0:0
-#define NVC96F_WFI_SCOPE_CURRENT_SCG_TYPE                           0x00000000
-#define NVC96F_WFI_SCOPE_CURRENT_VEID                               0x00000000
-#define NVC96F_WFI_SCOPE_ALL                                        0x00000001
-#define NVC96F_YIELD                                               (0x00000080)
-#define NVC96F_YIELD_OP                                                    1:0
-#define NVC96F_YIELD_OP_NOP                                         0x00000000
-#define NVC96F_YIELD_OP_TSG                                         0x00000003
-#define NVC96F_CLEAR_FAULTED                                       (0x00000084)
-// Note: RM provides the HANDLE as an opaque value; the internal detail fields
-// are intentionally not exposed to the driver through these defines.
-#define NVC96F_CLEAR_FAULTED_HANDLE                                       30:0
-#define NVC96F_CLEAR_FAULTED_TYPE                                        31:31
-#define NVC96F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED                     0x00000000
-#define NVC96F_CLEAR_FAULTED_TYPE_ENG_FAULTED                       0x00000001
-

 /* GPFIFO entry format */
 #define NVC96F_GP_ENTRY__SIZE                                          8
@@ -245,85 +108,4 @@ typedef volatile struct Nvc96fControl_struct {
 #define NVC96F_GP_ENTRY1_OPCODE_PB_CRC                        0x00000003
 #define NVC96F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE  0x00000004

-/* dma method formats */
-#define NVC96F_DMA_METHOD_ADDRESS_OLD                              12:2
-#define NVC96F_DMA_METHOD_ADDRESS                                  11:0
-#define NVC96F_DMA_SUBDEVICE_MASK                                  15:4
-#define NVC96F_DMA_METHOD_SUBCHANNEL                               15:13
-#define NVC96F_DMA_TERT_OP                                         17:16
-#define NVC96F_DMA_TERT_OP_GRP0_INC_METHOD                         (0x00000000)
-#define NVC96F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK                   (0x00000001)
-#define NVC96F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK                 (0x00000002)
-#define NVC96F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK                   (0x00000003)
-#define NVC96F_DMA_TERT_OP_GRP2_NON_INC_METHOD                     (0x00000000)
-#define NVC96F_DMA_METHOD_COUNT_OLD                                28:18
-#define NVC96F_DMA_METHOD_COUNT                                    28:16
-#define NVC96F_DMA_IMMD_DATA                                       28:16
-#define NVC96F_DMA_SEC_OP                                          31:29
-#define NVC96F_DMA_SEC_OP_GRP0_USE_TERT                            (0x00000000)
-#define NVC96F_DMA_SEC_OP_INC_METHOD                               (0x00000001)
-#define NVC96F_DMA_SEC_OP_GRP2_USE_TERT                            (0x00000002)
-#define NVC96F_DMA_SEC_OP_NON_INC_METHOD                           (0x00000003)
-#define NVC96F_DMA_SEC_OP_IMMD_DATA_METHOD                         (0x00000004)
-#define NVC96F_DMA_SEC_OP_ONE_INC                                  (0x00000005)
-#define NVC96F_DMA_SEC_OP_RESERVED6                                (0x00000006)
-#define NVC96F_DMA_SEC_OP_END_PB_SEGMENT                           (0x00000007)
-/* dma incrementing method format */
-#define NVC96F_DMA_INCR_ADDRESS                                    11:0
-#define NVC96F_DMA_INCR_SUBCHANNEL                                 15:13
-#define NVC96F_DMA_INCR_COUNT                                      28:16
-#define NVC96F_DMA_INCR_OPCODE                                     31:29
-#define NVC96F_DMA_INCR_OPCODE_VALUE                               (0x00000001)
-#define NVC96F_DMA_INCR_DATA                                       31:0
-/* dma non-incrementing method format */
-#define NVC96F_DMA_NONINCR_ADDRESS                                 11:0
-#define NVC96F_DMA_NONINCR_SUBCHANNEL                              15:13
-#define NVC96F_DMA_NONINCR_COUNT                                   28:16
-#define NVC96F_DMA_NONINCR_OPCODE                                  31:29
-#define NVC96F_DMA_NONINCR_OPCODE_VALUE                            (0x00000003)
-#define NVC96F_DMA_NONINCR_DATA                                    31:0
-/* dma increment-once method format */
-#define NVC96F_DMA_ONEINCR_ADDRESS                                 11:0
-#define NVC96F_DMA_ONEINCR_SUBCHANNEL                              15:13
-#define NVC96F_DMA_ONEINCR_COUNT                                   28:16
-#define NVC96F_DMA_ONEINCR_OPCODE                                  31:29
-#define NVC96F_DMA_ONEINCR_OPCODE_VALUE                            (0x00000005)
-#define NVC96F_DMA_ONEINCR_DATA                                    31:0
-/* dma no-operation format */
-#define NVC96F_DMA_NOP                                             (0x00000000)
-/* dma immediate-data format */
-#define NVC96F_DMA_IMMD_ADDRESS                                    11:0
-#define NVC96F_DMA_IMMD_SUBCHANNEL                                 15:13
-#define NVC96F_DMA_IMMD_DATA                                       28:16
-#define NVC96F_DMA_IMMD_OPCODE                                     31:29
-#define NVC96F_DMA_IMMD_OPCODE_VALUE                               (0x00000004)
-/* dma set sub-device mask format */
-#define NVC96F_DMA_SET_SUBDEVICE_MASK_VALUE                        15:4
-#define NVC96F_DMA_SET_SUBDEVICE_MASK_OPCODE                       31:16
-#define NVC96F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000001)
-/* dma store sub-device mask format */
-#define NVC96F_DMA_STORE_SUBDEVICE_MASK_VALUE                      15:4
-#define NVC96F_DMA_STORE_SUBDEVICE_MASK_OPCODE                     31:16
-#define NVC96F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE               (0x00000002)
-/* dma use sub-device mask format */
-#define NVC96F_DMA_USE_SUBDEVICE_MASK_OPCODE                       31:16
-#define NVC96F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE                 (0x00000003)
-/* dma end-segment format */
-#define NVC96F_DMA_ENDSEG_OPCODE                                   31:29
-#define NVC96F_DMA_ENDSEG_OPCODE_VALUE                             (0x00000007)
-/* dma legacy incrementing/non-incrementing formats */
-#define NVC96F_DMA_ADDRESS                                         12:2
-#define NVC96F_DMA_SUBCH                                           15:13
-#define NVC96F_DMA_OPCODE3                                         17:16
-#define NVC96F_DMA_OPCODE3_NONE                                    (0x00000000)
-#define NVC96F_DMA_COUNT                                           28:18
-#define NVC96F_DMA_OPCODE                                          31:29
-#define NVC96F_DMA_OPCODE_METHOD                                   (0x00000000)
-#define NVC96F_DMA_OPCODE_NONINC_METHOD                            (0x00000002)
-#define NVC96F_DMA_DATA                                            31:0
-
-#ifdef __cplusplus
-};     /* extern "C" */
-#endif
-
-#endif /* _clc96f_h_ */
+#endif // __gb100_clc96f_h__
--- a/kernel-open/nvidia-uvm/clc9b5.h
+++ b/kernel-open/nvidia-uvm/clc9b5.h
@@ -1,460 +1,29 @@
-/*******************************************************************************
-    Copyright (c) 1993-2004 NVIDIA Corporation
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-
-
-#include "nvtypes.h"
-
-#ifndef _clc9b5_h_
-#define _clc9b5_h_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
+#ifndef __gb100_clc9b5_h__
+#define __gb100_clc9b5_h__

 #define BLACKWELL_DMA_COPY_A                                                            (0x0000C9B5)

-typedef volatile struct _clc9b5_tag0 {
-    NvV32 Reserved00[0x40];
-    NvV32 Nop;                                                                  // 0x00000100 - 0x00000103
-    NvV32 Reserved01[0xF];
-    NvV32 PmTrigger;                                                            // 0x00000140 - 0x00000143
-    NvV32 Reserved02[0x36];
-    NvV32 SetMonitoredFenceType;                                                // 0x0000021C - 0x0000021F
-    NvV32 SetMonitoredFenceSignalAddrBaseUpper;                                 // 0x00000220 - 0x00000223
-    NvV32 SetMonitoredFenceSignalAddrBaseLower;                                 // 0x00000224 - 0x00000227
-    NvV32 Reserved03[0x6];
-    NvV32 SetSemaphoreA;                                                        // 0x00000240 - 0x00000243
-    NvV32 SetSemaphoreB;                                                        // 0x00000244 - 0x00000247
-    NvV32 SetSemaphorePayload;                                                  // 0x00000248 - 0x0000024B
-    NvV32 SetSemaphorePayloadUpper;                                             // 0x0000024C - 0x0000024F
-    NvV32 Reserved04[0x1];
-    NvV32 SetRenderEnableA;                                                     // 0x00000254 - 0x00000257
-    NvV32 SetRenderEnableB;                                                     // 0x00000258 - 0x0000025B
-    NvV32 SetRenderEnableC;                                                     // 0x0000025C - 0x0000025F
-    NvV32 SetSrcPhysMode;                                                       // 0x00000260 - 0x00000263
-    NvV32 SetDstPhysMode;                                                       // 0x00000264 - 0x00000267
-    NvV32 Reserved05[0x26];
-    NvV32 LaunchDma;                                                            // 0x00000300 - 0x00000303
-    NvV32 Reserved06[0x3F];
-    NvV32 OffsetInUpper;                                                        // 0x00000400 - 0x00000403
-    NvV32 OffsetInLower;                                                        // 0x00000404 - 0x00000407
-    NvV32 OffsetOutUpper;                                                       // 0x00000408 - 0x0000040B
-    NvV32 OffsetOutLower;                                                       // 0x0000040C - 0x0000040F
-    NvV32 PitchIn;                                                              // 0x00000410 - 0x00000413
-    NvV32 PitchOut;                                                             // 0x00000414 - 0x00000417
-    NvV32 LineLengthIn;                                                         // 0x00000418 - 0x0000041B
-    NvV32 LineCount;                                                            // 0x0000041C - 0x0000041F
-    NvV32 Reserved07[0x38];
-    NvV32 SetSecureCopyMode;                                                    // 0x00000500 - 0x00000503
-    NvV32 SetDecryptIv0;                                                        // 0x00000504 - 0x00000507
-    NvV32 SetDecryptIv1;                                                        // 0x00000508 - 0x0000050B
-    NvV32 SetDecryptIv2;                                                        // 0x0000050C - 0x0000050F
-    NvV32 Reserved_SetAESCounter;                                               // 0x00000510 - 0x00000513
-    NvV32 SetDecryptAuthTagCompareAddrUpper;                                    // 0x00000514 - 0x00000517
-    NvV32 SetDecryptAuthTagCompareAddrLower;                                    // 0x00000518 - 0x0000051B
-    NvV32 Reserved08[0x5];
-    NvV32 SetEncryptAuthTagAddrUpper;                                           // 0x00000530 - 0x00000533
-    NvV32 SetEncryptAuthTagAddrLower;                                           // 0x00000534 - 0x00000537
-    NvV32 SetEncryptIvAddrUpper;                                                // 0x00000538 - 0x0000053B
-    NvV32 SetEncryptIvAddrLower;                                                // 0x0000053C - 0x0000053F
-    NvV32 Reserved09[0x10];
-    NvV32 SetCompressionParameters;                                             // 0x00000580 - 0x00000583
-    NvV32 SetDecompressOutLength;                                               // 0x00000584 - 0x00000587
-    NvV32 SetDecompressOutLengthAddrUpper;                                      // 0x00000588 - 0x0000058B
-    NvV32 SetDecompressOutLengthAddrLower;                                      // 0x0000058C - 0x0000058F
-    NvV32 SetDecompressChecksum;                                                // 0x00000590 - 0x00000593
-    NvV32 Reserved10[0x5A];
-    NvV32 SetMemoryScrubParameters;                                             // 0x000006FC - 0x000006FF
-    NvV32 SetRemapConstA;                                                       // 0x00000700 - 0x00000703
-    NvV32 SetRemapConstB;                                                       // 0x00000704 - 0x00000707
-    NvV32 SetRemapComponents;                                                   // 0x00000708 - 0x0000070B
-    NvV32 SetDstBlockSize;                                                      // 0x0000070C - 0x0000070F
-    NvV32 SetDstWidth;                                                          // 0x00000710 - 0x00000713
-    NvV32 SetDstHeight;                                                         // 0x00000714 - 0x00000717
-    NvV32 SetDstDepth;                                                          // 0x00000718 - 0x0000071B
-    NvV32 SetDstLayer;                                                          // 0x0000071C - 0x0000071F
-    NvV32 SetDstOrigin;                                                         // 0x00000720 - 0x00000723
-    NvV32 Reserved11[0x1];
-    NvV32 SetSrcBlockSize;                                                      // 0x00000728 - 0x0000072B
-    NvV32 SetSrcWidth;                                                          // 0x0000072C - 0x0000072F
-    NvV32 SetSrcHeight;                                                         // 0x00000730 - 0x00000733
-    NvV32 SetSrcDepth;                                                          // 0x00000734 - 0x00000737
-    NvV32 SetSrcLayer;                                                          // 0x00000738 - 0x0000073B
-    NvV32 SetSrcOrigin;                                                         // 0x0000073C - 0x0000073F
-    NvV32 Reserved12[0x1];
-    NvV32 SrcOriginX;                                                           // 0x00000744 - 0x00000747
-    NvV32 SrcOriginY;                                                           // 0x00000748 - 0x0000074B
-    NvV32 DstOriginX;                                                           // 0x0000074C - 0x0000074F
-    NvV32 DstOriginY;                                                           // 0x00000750 - 0x00000753
-    NvV32 Reserved13[0x270];
-    NvV32 PmTriggerEnd;                                                         // 0x00001114 - 0x00001117
-    NvV32 Reserved14[0x3BA];
-} blackwell_dma_copy_aControlPio;
-
-#define NVC9B5_NOP                                                              (0x00000100)
-#define NVC9B5_NOP_PARAMETER                                                    31:0
-#define NVC9B5_PM_TRIGGER                                                       (0x00000140)
-#define NVC9B5_PM_TRIGGER_V                                                     31:0
-#define NVC9B5_SET_MONITORED_FENCE_TYPE                                         (0x0000021C)
-#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE                                    0:0
-#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE                    (0x00000000)
-#define NVC9B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT                (0x00000001)
-#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER                       (0x00000220)
-#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER                 24:0
-#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER                       (0x00000224)
-#define NVC9B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER                 31:0
-#define NVC9B5_SET_SEMAPHORE_A                                                  (0x00000240)
-#define NVC9B5_SET_SEMAPHORE_A_UPPER                                            24:0
-#define NVC9B5_SET_SEMAPHORE_B                                                  (0x00000244)
-#define NVC9B5_SET_SEMAPHORE_B_LOWER                                            31:0
-#define NVC9B5_SET_SEMAPHORE_PAYLOAD                                            (0x00000248)
-#define NVC9B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD                                    31:0
-#define NVC9B5_SET_SEMAPHORE_PAYLOAD_UPPER                                      (0x0000024C)
-#define NVC9B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD                              31:0
-#define NVC9B5_SET_RENDER_ENABLE_A                                              (0x00000254)
-#define NVC9B5_SET_RENDER_ENABLE_A_UPPER                                        24:0
-#define NVC9B5_SET_RENDER_ENABLE_B                                              (0x00000258)
-#define NVC9B5_SET_RENDER_ENABLE_B_LOWER                                        31:0
-#define NVC9B5_SET_RENDER_ENABLE_C                                              (0x0000025C)
-#define NVC9B5_SET_RENDER_ENABLE_C_MODE                                         2:0
-#define NVC9B5_SET_RENDER_ENABLE_C_MODE_FALSE                                   (0x00000000)
-#define NVC9B5_SET_RENDER_ENABLE_C_MODE_TRUE                                    (0x00000001)
-#define NVC9B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL                             (0x00000002)
-#define NVC9B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL                         (0x00000003)
-#define NVC9B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL                     (0x00000004)
-#define NVC9B5_SET_SRC_PHYS_MODE                                                (0x00000260)
-#define NVC9B5_SET_SRC_PHYS_MODE_TARGET                                         1:0
-#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
-#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
-#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
-#define NVC9B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM                                 (0x00000003)
-#define NVC9B5_SET_SRC_PHYS_MODE_BASIC_KIND                                     5:2
-#define NVC9B5_SET_SRC_PHYS_MODE_PEER_ID                                        8:6
-#define NVC9B5_SET_SRC_PHYS_MODE_FLA                                            9:9
-#define NVC9B5_SET_DST_PHYS_MODE                                                (0x00000264)
-#define NVC9B5_SET_DST_PHYS_MODE_TARGET                                         1:0
-#define NVC9B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB                                (0x00000000)
-#define NVC9B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM                         (0x00000001)
-#define NVC9B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM                      (0x00000002)
-#define NVC9B5_SET_DST_PHYS_MODE_TARGET_PEERMEM                                 (0x00000003)
-#define NVC9B5_SET_DST_PHYS_MODE_BASIC_KIND                                     5:2
-#define NVC9B5_SET_DST_PHYS_MODE_PEER_ID                                        8:6
-#define NVC9B5_SET_DST_PHYS_MODE_FLA                                            9:9
-#define NVC9B5_LAUNCH_DMA                                                       (0x00000300)
-#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE                                    1:0
-#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE                               (0x00000000)
-#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED                          (0x00000001)
-#define NVC9B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED                      (0x00000002)
-#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE                                          2:2
-#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE                                    (0x00000000)
-#define NVC9B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE                                     (0x00000001)
-#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE                                            25:25
-#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE_SYS                                        (0x00000000)
-#define NVC9B5_LAUNCH_DMA_FLUSH_TYPE_GL                                         (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE                                        4:3
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE                                   (0x00000000)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP         (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP       (0x00000002)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE             (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE            (0x00000002)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE     (0x00000003)
-#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE                                        6:5
-#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE                                   (0x00000000)
-#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING                               (0x00000001)
-#define NVC9B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING                           (0x00000002)
-#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT                                     7:7
-#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
-#define NVC9B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH                               (0x00000001)
-#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT                                     8:8
-#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR                         (0x00000000)
-#define NVC9B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH                               (0x00000001)
-#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE                                     9:9
-#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE                               (0x00000000)
-#define NVC9B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE                                (0x00000001)
-#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE                                          10:10
-#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE_FALSE                                    (0x00000000)
-#define NVC9B5_LAUNCH_DMA_REMAP_ENABLE_TRUE                                     (0x00000001)
-#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE                                    11:11
-#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE_FALSE                              (0x00000000)
-#define NVC9B5_LAUNCH_DMA_COMPRESSION_ENABLE_TRUE                               (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SRC_TYPE                                              12:12
-#define NVC9B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL                                      (0x00000000)
-#define NVC9B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL                                     (0x00000001)
-#define NVC9B5_LAUNCH_DMA_DST_TYPE                                              13:13
-#define NVC9B5_LAUNCH_DMA_DST_TYPE_VIRTUAL                                      (0x00000000)
-#define NVC9B5_LAUNCH_DMA_DST_TYPE_PHYSICAL                                     (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION                                   17:14
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN                              (0x00000000)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX                              (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR                              (0x00000002)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND                              (0x00000003)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR                               (0x00000004)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD                              (0x00000005)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC                               (0x00000006)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC                               (0x00000007)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA                          (0x00000008)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB                          (0x00000009)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD                              (0x0000000A)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN                              (0x0000000B)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX                              (0x0000000C)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC                          (0x0000000D)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD                          (0x0000000E)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE                          (0x0000000F)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN                              18:18
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED                       (0x00000000)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED                     (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE                            19:19
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE                      (0x00000000)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE                       (0x00000001)
-#define NVC9B5_LAUNCH_DMA_COPY_TYPE                                             21:20
-#define NVC9B5_LAUNCH_DMA_COPY_TYPE_PROT2PROT                                   (0x00000000)
-#define NVC9B5_LAUNCH_DMA_COPY_TYPE_DEFAULT                                     (0x00000000)
-#define NVC9B5_LAUNCH_DMA_COPY_TYPE_SECURE                                      (0x00000001)
-#define NVC9B5_LAUNCH_DMA_COPY_TYPE_NONPROT2NONPROT                             (0x00000002)
-#define NVC9B5_LAUNCH_DMA_COPY_TYPE_RESERVED                                    (0x00000003)
-#define NVC9B5_LAUNCH_DMA_VPRMODE                                               22:22
-#define NVC9B5_LAUNCH_DMA_VPRMODE_VPR_NONE                                      (0x00000000)
-#define NVC9B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID                                   (0x00000001)
-#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE                                   23:23
-#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_FALSE                             (0x00000000)
-#define NVC9B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_TRUE                              (0x00000001)
-#define NVC9B5_LAUNCH_DMA_RESERVED_START_OF_COPY                                24:24
-#define NVC9B5_LAUNCH_DMA_DISABLE_PLC                                           26:26
-#define NVC9B5_LAUNCH_DMA_DISABLE_PLC_FALSE                                     (0x00000000)
-#define NVC9B5_LAUNCH_DMA_DISABLE_PLC_TRUE                                      (0x00000001)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE                                27:27
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD                       (0x00000000)
-#define NVC9B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD                       (0x00000001)
-#define NVC9B5_LAUNCH_DMA_RESERVED_ERR_CODE                                     31:28
-#define NVC9B5_OFFSET_IN_UPPER                                                  (0x00000400)
-#define NVC9B5_OFFSET_IN_UPPER_UPPER                                            24:0
-#define NVC9B5_OFFSET_IN_LOWER                                                  (0x00000404)
-#define NVC9B5_OFFSET_IN_LOWER_VALUE                                            31:0
-#define NVC9B5_OFFSET_OUT_UPPER                                                 (0x00000408)
-#define NVC9B5_OFFSET_OUT_UPPER_UPPER                                           24:0
-#define NVC9B5_OFFSET_OUT_LOWER                                                 (0x0000040C)
-#define NVC9B5_OFFSET_OUT_LOWER_VALUE                                           31:0
-#define NVC9B5_PITCH_IN                                                         (0x00000410)
-#define NVC9B5_PITCH_IN_VALUE                                                   31:0
-#define NVC9B5_PITCH_OUT                                                        (0x00000414)
-#define NVC9B5_PITCH_OUT_VALUE                                                  31:0
-#define NVC9B5_LINE_LENGTH_IN                                                   (0x00000418)
-#define NVC9B5_LINE_LENGTH_IN_VALUE                                             31:0
-#define NVC9B5_LINE_COUNT                                                       (0x0000041C)
-#define NVC9B5_LINE_COUNT_VALUE                                                 31:0
-#define NVC9B5_SET_SECURE_COPY_MODE                                             (0x00000500)
-#define NVC9B5_SET_SECURE_COPY_MODE_MODE                                        0:0
-#define NVC9B5_SET_SECURE_COPY_MODE_MODE_ENCRYPT                                (0x00000000)
-#define NVC9B5_SET_SECURE_COPY_MODE_MODE_DECRYPT                                (0x00000001)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET                         20:19
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_LOCAL_FB                (0x00000000)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_COHERENT_SYSMEM         (0x00000001)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_NONCOHERENT_SYSMEM      (0x00000002)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_PEERMEM                 (0x00000003)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_PEER_ID                        23:21
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_SRC_FLA                            24:24
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET                         26:25
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_LOCAL_FB                (0x00000000)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_COHERENT_SYSMEM         (0x00000001)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_NONCOHERENT_SYSMEM      (0x00000002)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_PEERMEM                 (0x00000003)
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_PEER_ID                        29:27
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_DST_FLA                            30:30
-#define NVC9B5_SET_SECURE_COPY_MODE_RESERVED_END_OF_COPY                        31:31
-#define NVC9B5_SET_DECRYPT_IV0                                                  (0x00000504)
-#define NVC9B5_SET_DECRYPT_IV0_VALUE                                            31:0
-#define NVC9B5_SET_DECRYPT_IV1                                                  (0x00000508)
-#define NVC9B5_SET_DECRYPT_IV1_VALUE                                            31:0
-#define NVC9B5_SET_DECRYPT_IV2                                                  (0x0000050C)
-#define NVC9B5_SET_DECRYPT_IV2_VALUE                                            31:0
-#define NVC9B5_RESERVED_SET_AESCOUNTER                                          (0x00000510)
-#define NVC9B5_RESERVED_SET_AESCOUNTER_VALUE                                    31:0
-#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER                          (0x00000514)
-#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER_UPPER                    24:0
-#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER                          (0x00000518)
-#define NVC9B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER_LOWER                    31:0
-#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER                                  (0x00000530)
-#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER_UPPER                            24:0
-#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER                                  (0x00000534)
-#define NVC9B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER_LOWER                            31:0
-#define NVC9B5_SET_ENCRYPT_IV_ADDR_UPPER                                        (0x00000538)
-#define NVC9B5_SET_ENCRYPT_IV_ADDR_UPPER_UPPER                                  24:0
-#define NVC9B5_SET_ENCRYPT_IV_ADDR_LOWER                                        (0x0000053C)
-#define NVC9B5_SET_ENCRYPT_IV_ADDR_LOWER_LOWER                                  31:0
-#define NVC9B5_SET_COMPRESSION_PARAMETERS                                       (0x00000580)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION                             0:0
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION_DECOMPRESS                  (0x00000000)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_OPERATION_COMPRESS                    (0x00000001)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO                                  3:1
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_SNAPPY                           (0x00000000)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_DATA_ONLY                    (0x00000001)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_BLOCK                        (0x00000002)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_LZ4_BLOCK_CHECKSUM               (0x00000003)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_DEFLATE                          (0x00000004)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_ALGO_SNAPPY_WITH_LONG_FETCH           (0x00000005)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM                             29:28
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_NONE                        (0x00000000)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_ADLER32                     (0x00000001)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_CRC32                       (0x00000002)
-#define NVC9B5_SET_COMPRESSION_PARAMETERS_CHECK_SUM_SNAPPY_CRC                  (0x00000003)
-#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH                                        (0x00000584)
-#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_V                                      31:0
-#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_UPPER                             (0x00000588)
-#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_UPPER_UPPER                       24:0
-#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_LOWER                             (0x0000058C)
-#define NVC9B5_SET_DECOMPRESS_OUT_LENGTH_ADDR_LOWER_LOWER                       31:0
-#define NVC9B5_SET_DECOMPRESS_CHECKSUM                                          (0x00000590)
-#define NVC9B5_SET_DECOMPRESS_CHECKSUM_V                                        31:0
-#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS                                      (0x000006FC)
-#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE                          0:0
-#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_FALSE                    (0x00000000)
-#define NVC9B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_TRUE                     (0x00000001)
-#define NVC9B5_SET_REMAP_CONST_A                                                (0x00000700)
-#define NVC9B5_SET_REMAP_CONST_A_V                                              31:0
-#define NVC9B5_SET_REMAP_CONST_B                                                (0x00000704)
-#define NVC9B5_SET_REMAP_CONST_B_V                                              31:0
-#define NVC9B5_SET_REMAP_COMPONENTS                                             (0x00000708)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X                                       2:0
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_X                                 (0x00000000)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y                                 (0x00000001)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z                                 (0x00000002)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_SRC_W                                 (0x00000003)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_CONST_A                               (0x00000004)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_CONST_B                               (0x00000005)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE                              (0x00000006)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y                                       6:4
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X                                 (0x00000000)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y                                 (0x00000001)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z                                 (0x00000002)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W                                 (0x00000003)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A                               (0x00000004)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B                               (0x00000005)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE                              (0x00000006)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z                                       10:8
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X                                 (0x00000000)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y                                 (0x00000001)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z                                 (0x00000002)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W                                 (0x00000003)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A                               (0x00000004)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B                               (0x00000005)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE                              (0x00000006)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W                                       14:12
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_X                                 (0x00000000)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y                                 (0x00000001)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z                                 (0x00000002)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_SRC_W                                 (0x00000003)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_CONST_A                               (0x00000004)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_CONST_B                               (0x00000005)
-#define NVC9B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE                              (0x00000006)
-#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE                              17:16
-#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE                          (0x00000000)
-#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO                          (0x00000001)
-#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE                        (0x00000002)
-#define NVC9B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR                         (0x00000003)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS                          21:20
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE                      (0x00000000)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO                      (0x00000001)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE                    (0x00000002)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR                     (0x00000003)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS                          25:24
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE                      (0x00000000)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO                      (0x00000001)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE                    (0x00000002)
-#define NVC9B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR                     (0x00000003)
-#define NVC9B5_SET_DST_BLOCK_SIZE                                               (0x0000070C)
-#define NVC9B5_SET_DST_BLOCK_SIZE_WIDTH                                         3:0
-#define NVC9B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
-#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT                                        7:4
-#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
-#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
-#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
-#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
-#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
-#define NVC9B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
-#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH                                         11:8
-#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
-#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
-#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
-#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
-#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
-#define NVC9B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
-#define NVC9B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT                                    15:12
-#define NVC9B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
-#define NVC9B5_SET_DST_WIDTH                                                    (0x00000710)
-#define NVC9B5_SET_DST_WIDTH_V                                                  31:0
-#define NVC9B5_SET_DST_HEIGHT                                                   (0x00000714)
-#define NVC9B5_SET_DST_HEIGHT_V                                                 31:0
-#define NVC9B5_SET_DST_DEPTH                                                    (0x00000718)
-#define NVC9B5_SET_DST_DEPTH_V                                                  31:0
-#define NVC9B5_SET_DST_LAYER                                                    (0x0000071C)
-#define NVC9B5_SET_DST_LAYER_V                                                  31:0
-#define NVC9B5_SET_DST_ORIGIN                                                   (0x00000720)
-#define NVC9B5_SET_DST_ORIGIN_X                                                 15:0
-#define NVC9B5_SET_DST_ORIGIN_Y                                                 31:16
-#define NVC9B5_SET_SRC_BLOCK_SIZE                                               (0x00000728)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_WIDTH                                         3:0
-#define NVC9B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB                                 (0x00000000)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT                                        7:4
-#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB                                (0x00000000)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS                               (0x00000001)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS                              (0x00000002)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS                             (0x00000003)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS                           (0x00000004)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS                         (0x00000005)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH                                         11:8
-#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB                                 (0x00000000)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS                                (0x00000001)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS                               (0x00000002)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS                              (0x00000003)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS                            (0x00000004)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS                          (0x00000005)
-#define NVC9B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT                                    15:12
-#define NVC9B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8                 (0x00000001)
-#define NVC9B5_SET_SRC_WIDTH                                                    (0x0000072C)
-#define NVC9B5_SET_SRC_WIDTH_V                                                  31:0
-#define NVC9B5_SET_SRC_HEIGHT                                                   (0x00000730)
-#define NVC9B5_SET_SRC_HEIGHT_V                                                 31:0
-#define NVC9B5_SET_SRC_DEPTH                                                    (0x00000734)
-#define NVC9B5_SET_SRC_DEPTH_V                                                  31:0
-#define NVC9B5_SET_SRC_LAYER                                                    (0x00000738)
-#define NVC9B5_SET_SRC_LAYER_V                                                  31:0
-#define NVC9B5_SET_SRC_ORIGIN                                                   (0x0000073C)
-#define NVC9B5_SET_SRC_ORIGIN_X                                                 15:0
-#define NVC9B5_SET_SRC_ORIGIN_Y                                                 31:16
-#define NVC9B5_SRC_ORIGIN_X                                                     (0x00000744)
-#define NVC9B5_SRC_ORIGIN_X_VALUE                                               31:0
-#define NVC9B5_SRC_ORIGIN_Y                                                     (0x00000748)
-#define NVC9B5_SRC_ORIGIN_Y_VALUE                                               31:0
-#define NVC9B5_DST_ORIGIN_X                                                     (0x0000074C)
-#define NVC9B5_DST_ORIGIN_X_VALUE                                               31:0
-#define NVC9B5_DST_ORIGIN_Y                                                     (0x00000750)
-#define NVC9B5_DST_ORIGIN_Y_VALUE                                               31:0
-#define NVC9B5_PM_TRIGGER_END                                                   (0x00001114)
-#define NVC9B5_PM_TRIGGER_END_V                                                 31:0
-
-#ifdef __cplusplus
-};     /* extern "C" */
-#endif
-#endif // _clc9b5_h
-
+#endif // __gb100_clc9b5_h__
--- a/kernel-open/nvidia-uvm/clcba2.h
+++ b/kernel-open/nvidia-uvm/clcba2.h
@@ -151,6 +151,7 @@ typedef volatile struct _clcba2_tag0 {
 #define NVCBA2_ERROR_SCRUBBER_INSUFFICIENT_PERMISSIONS                          (0x0000001b)
 #define NVCBA2_ERROR_SCRUBBER_MUTEX_ACQUIRE_FAILURE                             (0x0000001c)
 #define NVCBA2_ERROR_SCRUB_SIZE_MAX_EXCEEDED                                    (0x0000001d)
+#define NVCBA2_ERROR_SIZE_ZERO                                                  (0x0000001e)

 #ifdef __cplusplus
 };     /* extern "C" */
--- a/kernel-open/nvidia-uvm/ctrl2080mc.h
+++ b/kernel-open/nvidia-uvm/ctrl2080mc.h
@@ -43,4 +43,7 @@

 #define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA100              (0x00000000)
 #define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA000              (0x00000001)
+
+#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B              (0x0000000B)
+#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B              (0x0000000B)
 #endif /* _ctrl2080mc_h_ */
--- a/kernel-open/nvidia-uvm/nvidia-uvm-sources.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm-sources.Kbuild
@@ -21,6 +21,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_channel.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_lock.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hal.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_fd_type.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_processors.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree.c
@@ -59,7 +60,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_fault_buffer.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_access_counter_buffer.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_access_counter_buffer.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_fault_buffer.c
@@ -96,7 +96,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_heuristics.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_thrashing.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_prefetch.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats.c
-NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_ibm.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_faults.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
@@ -128,3 +127,4 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group_tree_test.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_thread_context_test.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree_test.c
 NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
+NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_file.c
--- a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
@@ -50,7 +50,6 @@ NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)

 NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
--- a/kernel-open/nvidia-uvm/uvm.c
+++ b/kernel-open/nvidia-uvm/uvm.c
@@ -33,10 +33,12 @@
 #include "uvm_va_block.h"
 #include "uvm_tools.h"
 #include "uvm_common.h"
+#include "uvm_fd_type.h"
 #include "uvm_linux_ioctl.h"
 #include "uvm_hmm.h"
 #include "uvm_mem.h"
 #include "uvm_kvmalloc.h"
+#include "uvm_test_file.h"

 #define NVIDIA_UVM_DEVICE_NAME          "nvidia-uvm"

@@ -49,55 +51,9 @@ bool uvm_file_is_nvidia_uvm(struct file *filp)
    return (filp != NULL) && (filp->f_op == &uvm_fops);
 }

-uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
+bool uvm_file_is_nvidia_uvm_va_space(struct file *filp)
 {
-    unsigned long uptr;
-    uvm_fd_type_t type;
-    void *ptr;
-
-    UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
-
-    uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
-    type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
-    ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
-    BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
-
-    switch (type) {
-        case UVM_FD_UNINITIALIZED:
-        case UVM_FD_INITIALIZING:
-            UVM_ASSERT(!ptr);
-            break;
-
-        case UVM_FD_VA_SPACE:
-            UVM_ASSERT(ptr);
-            BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
-            break;
-
-        case UVM_FD_MM:
-            UVM_ASSERT(ptr);
-            BUILD_BUG_ON(__alignof__(struct file) < (1UL << UVM_FD_TYPE_BITS));
-            break;
-
-        default:
-            UVM_ASSERT(0);
-    }
-
-    if (ptr_val)
-        *ptr_val = ptr;
-
-    return type;
-}
-
-void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type)
-{
-    void *ptr;
-
-    UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
-
-    if (uvm_fd_type(filp, &ptr) == type)
-        return ptr;
-    else
-        return NULL;
+    return uvm_file_is_nvidia_uvm(filp) && uvm_fd_type(filp, NULL) == UVM_FD_VA_SPACE;
 }

 static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct file *filp)
@@ -105,7 +61,6 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
    uvm_va_space_t *va_space;
    uvm_va_space_mm_t *va_space_mm;
    struct file *uvm_file;
-    uvm_fd_type_t old_fd_type;
    struct mm_struct *mm;
    NV_STATUS status;

@@ -127,14 +82,9 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
        goto err;
    }

-    old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
-                                      UVM_FD_UNINITIALIZED,
-                                      UVM_FD_INITIALIZING);
-    old_fd_type &= UVM_FD_TYPE_MASK;
-    if (old_fd_type != UVM_FD_UNINITIALIZED) {
-        status = NV_ERR_IN_USE;
+    status = uvm_fd_type_init(filp);
+    if (status != NV_OK)
        goto err;
-    }

    va_space_mm = &va_space->va_space_mm;
    uvm_spin_lock(&va_space_mm->lock);
@@ -173,13 +123,13 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
            break;
    }
    uvm_spin_unlock(&va_space_mm->lock);
-    atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)uvm_file | UVM_FD_MM);
+    uvm_fd_type_set(filp, UVM_FD_MM, uvm_file);

    return NV_OK;

 err_release_unlock:
    uvm_spin_unlock(&va_space_mm->lock);
-    atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
+    uvm_fd_type_set(filp, UVM_FD_UNINITIALIZED, NULL);

 err:
    if (uvm_file)
@@ -240,7 +190,7 @@ static void uvm_release_deferred(void *data)
    // Since this function is only scheduled to run when uvm_release() fails
    // to trylock-acquire the pm.lock, the following acquisition attempt
    // is expected to block this thread, and cause it to remain blocked until
-    // uvm_resume() releases the lock.  As a result, the deferred release
+    // uvm_resume() releases the lock. As a result, the deferred release
    // kthread queue may stall for long periods of time.
    uvm_down_read(&g_uvm_global.pm.lock);

@@ -249,12 +199,43 @@ static void uvm_release_deferred(void *data)
    uvm_up_read(&g_uvm_global.pm.lock);
 }

-static void uvm_mm_release(struct file *filp, struct file *uvm_file)
+static void uvm_release_va_space(struct file *filp, uvm_va_space_t *va_space)
+{
+    int ret;
+
+    filp->private_data = NULL;
+    filp->f_mapping = NULL;
+
+    // Because the kernel discards the status code returned from this release
+    // callback, early exit in case of a pm.lock acquisition failure is not
+    // an option. Instead, the teardown work normally performed synchronously
+    // needs to be scheduled to run after uvm_resume() releases the lock.
+    if (uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
+        uvm_va_space_destroy(va_space);
+        uvm_up_read(&g_uvm_global.pm.lock);
+    }
+    else {
+        // Remove references to this inode from the address_space. This isn't
+        // strictly necessary, as any CPU mappings of this file have already
+        // been destroyed, and va_space->mapping won't be used again. Still,
+        // the va_space survives the inode if its destruction is deferred, in
+        // which case the references are rendered stale.
+        address_space_init_once(va_space->mapping);
+
+        nv_kthread_q_item_init(&va_space->deferred_release_q_item, uvm_release_deferred, va_space);
+        ret = nv_kthread_q_schedule_q_item(&g_uvm_global.deferred_release_q, &va_space->deferred_release_q_item);
+        UVM_ASSERT(ret != 0);
+    }
+}
+
+static void uvm_release_mm(struct file *filp, struct file *uvm_file)
 {
    uvm_va_space_t *va_space = uvm_va_space_get(uvm_file);
    uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
    struct mm_struct *mm = va_space_mm->mm;

+    uvm_kvfree(filp->f_mapping);
+
    if (uvm_va_space_mm_enabled(va_space)) {
        uvm_va_space_mm_unregister(va_space);

@@ -269,46 +250,27 @@ static void uvm_mm_release(struct file *filp, struct file *uvm_file)
 static int uvm_release(struct inode *inode, struct file *filp)
 {
    void *ptr;
-    uvm_va_space_t *va_space;
-    uvm_fd_type_t fd_type;
-    int ret;
+    uvm_fd_type_t fd_type = uvm_fd_type(filp, &ptr);

-    fd_type = uvm_fd_type(filp, &ptr);
-    UVM_ASSERT(fd_type != UVM_FD_INITIALIZING);
-    if (fd_type == UVM_FD_UNINITIALIZED) {
-        uvm_kvfree(filp->f_mapping);
-        return 0;
-    }
-    else if (fd_type == UVM_FD_MM) {
-        uvm_kvfree(filp->f_mapping);
-        uvm_mm_release(filp, (struct file *)ptr);
-        return 0;
-    }
+    switch (fd_type) {
+        case UVM_FD_UNINITIALIZED:
+            uvm_kvfree(filp->f_mapping);
+            break;

-    UVM_ASSERT(fd_type == UVM_FD_VA_SPACE);
-    va_space = (uvm_va_space_t *)ptr;
-    filp->private_data = NULL;
-    filp->f_mapping = NULL;
+        case UVM_FD_VA_SPACE:
+            uvm_release_va_space(filp, (uvm_va_space_t *)ptr);
+            break;

-    // Because the kernel discards the status code returned from this release
-    // callback, early exit in case of a pm.lock acquisition failure is not
-    // an option.  Instead, the teardown work normally performed synchronously
-    // needs to be scheduled to run after uvm_resume() releases the lock.
-    if (uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
-        uvm_va_space_destroy(va_space);
-        uvm_up_read(&g_uvm_global.pm.lock);
-    }
-    else {
-        // Remove references to this inode from the address_space.  This isn't
-        // strictly necessary, as any CPU mappings of this file have already
-        // been destroyed, and va_space->mapping won't be used again. Still,
-        // the va_space survives the inode if its destruction is deferred, in
-        // which case the references are rendered stale.
-        address_space_init_once(va_space->mapping);
+        case UVM_FD_MM:
+            uvm_release_mm(filp, (struct file *)ptr);
+            break;

-        nv_kthread_q_item_init(&va_space->deferred_release_q_item, uvm_release_deferred, va_space);
-        ret = nv_kthread_q_schedule_q_item(&g_uvm_global.deferred_release_q, &va_space->deferred_release_q_item);
-        UVM_ASSERT(ret != 0);
+        case UVM_FD_TEST:
+            uvm_test_file_release(filp, (uvm_test_file_t *)ptr);
+            break;
+
+        default:
+            UVM_ASSERT_MSG(0, "Unexpected fd type: %d\n", fd_type);
    }

    return 0;
@@ -829,6 +791,7 @@ static struct vm_operations_struct uvm_vm_ops_device_p2p =

 static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
 {
+    void *fd_type_ptr;
    uvm_va_space_t *va_space;
    NV_STATUS status = uvm_global_get_status();
    int ret = 0;
@@ -837,9 +800,17 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
    if (status != NV_OK)
        return -nv_status_to_errno(status);

-    va_space = uvm_fd_va_space(filp);
-    if (!va_space)
-        return -EBADFD;
+    switch (uvm_fd_type(filp, &fd_type_ptr)) {
+        case UVM_FD_VA_SPACE:
+            va_space = (uvm_va_space_t *)fd_type_ptr;
+            break;
+
+        case UVM_FD_TEST:
+            return uvm_test_file_mmap((uvm_test_file_t *)fd_type_ptr, vma);
+
+        default:
+            return -EBADFD;
+    }

    // When the VA space is associated with an mm, all vmas under the VA space
    // must come from that mm.
@@ -867,8 +838,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
    }

    // If the PM lock cannot be acquired, disable the VMA and report success
-    // to the caller.  The caller is expected to determine whether the
-    // map operation succeeded via an ioctl() call.  This is necessary to
+    // to the caller. The caller is expected to determine whether the
+    // map operation succeeded via an ioctl() call. This is necessary to
    // safely handle MAP_FIXED, which needs to complete atomically to prevent
    // the loss of the virtual address range.
    if (!uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
@@ -999,33 +970,40 @@ static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *
    // attempt to be made. This is safe because other threads will have only had
    // a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
    // case.
-    old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
-                                      UVM_FD_UNINITIALIZED,
-                                      UVM_FD_INITIALIZING);
-    old_fd_type &= UVM_FD_TYPE_MASK;
-    if (old_fd_type == UVM_FD_UNINITIALIZED) {
-        status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
-        if (status != NV_OK) {
-            atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
-            return status;
-        }
+    old_fd_type = uvm_fd_type_init_cas(filp);
+    switch (old_fd_type) {
+        case UVM_FD_UNINITIALIZED:
+            status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
+            if (status != NV_OK) {
+                uvm_fd_type_set(filp, UVM_FD_UNINITIALIZED, NULL);
+                return status;
+            }

-        atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)va_space | UVM_FD_VA_SPACE);
-    }
-    else if (old_fd_type == UVM_FD_VA_SPACE) {
-        va_space = uvm_va_space_get(filp);
+            uvm_fd_type_set(filp, UVM_FD_VA_SPACE, va_space);
+            break;

-        if (params->flags != va_space->initialization_flags)
+        case UVM_FD_VA_SPACE:
+            va_space = uvm_va_space_get(filp);
+            if (params->flags != va_space->initialization_flags)
+                status = NV_ERR_INVALID_ARGUMENT;
+            else
+                status = NV_OK;
+
+            break;
+
+        case UVM_FD_MM:
+        case UVM_FD_TEST:
            status = NV_ERR_INVALID_ARGUMENT;
-        else
-            status = NV_OK;
-    }
-    else if (old_fd_type == UVM_FD_MM) {
-        status = NV_ERR_INVALID_ARGUMENT;
-    }
-    else {
-        UVM_ASSERT(old_fd_type == UVM_FD_INITIALIZING);
-        status = NV_ERR_BUSY_RETRY;
+            break;
+
+        case UVM_FD_INITIALIZING:
+            status = NV_ERR_BUSY_RETRY;
+            break;
+
+        default:
+            UVM_ASSERT(0);
+            status = NV_ERR_INVALID_STATE; // Quiet compiler warnings
+            break;
    }

    return status;
@@ -1233,19 +1211,8 @@ static int uvm_init(void)
        goto error;
    }

-    pr_info("Loaded the UVM driver, major device number %d.\n", MAJOR(g_uvm_base_dev));
-
    if (uvm_enable_builtin_tests)
-        pr_info("Built-in UVM tests are enabled. This is a security risk.\n");
-
-    // After Open RM is released, both the enclosing "#if" and this comment
-    // block should be removed, because the uvm_hmm_is_enabled_system_wide()
-    // check is both necessary and sufficient for reporting functionality.
-    // Until that time, however, we need to avoid advertisting UVM's ability to
-    // enable HMM functionality.
-
-    if (uvm_hmm_is_enabled_system_wide())
-        UVM_INFO_PRINT("HMM (Heterogeneous Memory Management) is enabled in the UVM driver.\n");
+        UVM_INFO_PRINT("Built-in UVM tests are enabled. This is a security risk.\n");

    return 0;

@@ -1274,8 +1241,6 @@ static void uvm_exit(void)
    uvm_global_exit();

    uvm_test_unload_state_exit();
-
-    pr_info("Unloaded the UVM driver.\n");
 }

 static void __exit uvm_exit_entry(void)
--- a/kernel-open/nvidia-uvm/uvm.h
+++ b/kernel-open/nvidia-uvm/uvm.h
@@ -1430,9 +1430,9 @@ NV_STATUS UvmAllocDeviceP2P(NvProcessorUuid gpuUuid,
 // UvmMigrate
 //
 // Migrates the backing of a given virtual address range to the specified
-// destination processor. If any page in the VA range is unpopulated, it is
-// populated at the destination processor. The migrated pages in the VA range
-// are also mapped on the destination processor.
+// destination processor's nearest memory. If any page in the VA range is
+// unpopulated, it is populated at the destination processor. The migrated pages
+// in the VA range are also mapped on the destination processor.
 //
 // Both base and length must be aligned to the smallest page size supported by
 // the CPU. The VA range must lie within the largest possible virtual address
@@ -2207,9 +2207,9 @@ NV_STATUS UvmMapDynamicParallelismRegion(void                  *base,
 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported
 // system-allocated pageable memory. If the input virtual range corresponds to
 // system-allocated pageable memory and UvmIsPageableMemoryAccessSupported
-// reports that pageable memory access is supported, the behavior described
-// below does not take effect, and read duplication will not be enabled for
-// the input range.
+// reports that pageable memory access is supported, or if a memoryless
+// processor is present, the behavior described below does not take effect, and
+// read duplication will not be enabled for the input range.
 //
 // Both base and length must be aligned to the smallest page size supported by
 // the CPU.
@@ -2330,7 +2330,7 @@ NV_STATUS UvmDisableReadDuplication(void     *base,
 // UvmSetPreferredLocation
 //
 // Sets the preferred location for the given virtual address range to be the
-// specified processor's memory.
+// specified processor's nearest memory.
 //
 // Both base and length must be aligned to the smallest page size supported by
 // the CPU. The VA range must lie within the largest possible virtual address
--- a/kernel-open/nvidia-uvm/uvm_ada.c
+++ b/kernel-open/nvidia-uvm/uvm_ada.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021-2024 NVIDIA Corporation
+    Copyright (c) 2021-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -38,12 +38,10 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_ada_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
-                                                          parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) *
-                                                                                 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Ada covers 128 TB and that's the minimum size
@@ -80,10 +78,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->non_replayable_faults_supported = true;

-    parent_gpu->access_counters_supported = true;
-
-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
--- a/kernel-open/nvidia-uvm/uvm_ampere.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2018-2024 NVIDIA Corporation
+    Copyright (c) 2018-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -38,12 +38,10 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_ampere_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
-                                                          parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
-                                                                           (sizeof(dummy->fault_source.utlb_id) * 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Ampere covers 128 TB and that's the minimum
@@ -84,10 +82,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->non_replayable_faults_supported = true;

-    parent_gpu->access_counters_supported = true;
-
-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
--- a/kernel-open/nvidia-uvm/uvm_api.h
+++ b/kernel-open/nvidia-uvm/uvm_api.h
@@ -189,7 +189,7 @@ static bool uvm_api_range_invalid(NvU64 base, NvU64 length)
 }

 // Some APIs can only enforce 4K alignment as it's the smallest GPU page size
-// even when the smallest host page is larger (e.g. 64K on ppc64le).
+// even when the smallest host page is larger.
 static bool uvm_api_range_invalid_4k(NvU64 base, NvU64 length)
 {
    return uvm_api_range_invalid_aligned(base, length, UVM_PAGE_SIZE_4K);
--- a/kernel-open/nvidia-uvm/uvm_ats.c
+++ b/kernel-open/nvidia-uvm/uvm_ats.c
@@ -42,26 +42,11 @@ void uvm_ats_init(const UvmPlatformInfo *platform_info)
                               uvm_va_space_mm_enabled_system();
 }

-void uvm_ats_init_va_space(uvm_va_space_t *va_space)
-{
-    uvm_init_rwsem(&va_space->ats.lock, UVM_LOCK_ORDER_LEAF);
-
-    if (UVM_ATS_IBM_SUPPORTED())
-        uvm_ats_ibm_init_va_space(va_space);
-}
-
 NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
 {
-    if (UVM_ATS_IBM_SUPPORTED()) {
-        // uvm_ibm_add_gpu() needs to be called even if ATS is disabled since it
-        // sets parent_gpu->npu. Not setting parent_gpu->npu will result in
-        // incorrect NVLink addresses. See dma_addr_to_gpu_addr().
-
-        return uvm_ats_ibm_add_gpu(parent_gpu);
-    }
-    else if (UVM_ATS_SVA_SUPPORTED()) {
-        if (g_uvm_global.ats.enabled)
-            return uvm_ats_sva_add_gpu(parent_gpu);
+    if (g_uvm_global.ats.enabled) {
+        UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
+        return uvm_ats_sva_add_gpu(parent_gpu);
    }

    return NV_OK;
@@ -69,38 +54,25 @@ NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)

 void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu)
 {
-    if (UVM_ATS_IBM_SUPPORTED()) {
-        // uvm_ibm_remove_gpu() needs to be called even if ATS is disabled since
-        // uvm_ibm_add_gpu() is called even in that case and
-        // uvm_ibm_remove_gpu() needs to undo the work done by
-        // uvm_ats_add_gpu() (gpu retained_count etc.).
-
-        uvm_ats_ibm_remove_gpu(parent_gpu);
-    }
-    else if (UVM_ATS_SVA_SUPPORTED()) {
-        if (g_uvm_global.ats.enabled)
-            uvm_ats_sva_remove_gpu(parent_gpu);
+    if (g_uvm_global.ats.enabled) {
+        UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
+        uvm_ats_sva_remove_gpu(parent_gpu);
    }
 }

 NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
 {
-    NV_STATUS status = NV_OK;
-
    UVM_ASSERT(gpu_va_space);

    if (!gpu_va_space->ats.enabled)
-        return status;
+        return NV_OK;
+
+    UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());

    uvm_assert_lockable_order(UVM_LOCK_ORDER_MMAP_LOCK);
    uvm_assert_lockable_order(UVM_LOCK_ORDER_VA_SPACE);

-    if (UVM_ATS_IBM_SUPPORTED())
-        status = uvm_ats_ibm_bind_gpu(gpu_va_space);
-    else if (UVM_ATS_SVA_SUPPORTED())
-        status = uvm_ats_sva_bind_gpu(gpu_va_space);
-
-    return status;
+    return uvm_ats_sva_bind_gpu(gpu_va_space);
 }

 void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
@@ -110,10 +82,9 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
    if (!gpu_va_space->ats.enabled)
        return;

-    if (UVM_ATS_IBM_SUPPORTED())
-        uvm_ats_ibm_unbind_gpu(gpu_va_space);
-    else if (UVM_ATS_SVA_SUPPORTED())
-        uvm_ats_sva_unbind_gpu(gpu_va_space);
+    UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
+
+    uvm_ats_sva_unbind_gpu(gpu_va_space);
 }

 NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
@@ -127,6 +98,8 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
    if (!gpu_va_space->ats.enabled)
        return status;

+    UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
+
    va_space = gpu_va_space->va_space;
    UVM_ASSERT(va_space);

@@ -138,10 +111,7 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
    if (uvm_processor_mask_test(&va_space->ats.registered_gpu_va_spaces, gpu_id))
        return NV_ERR_INVALID_DEVICE;

-    if (UVM_ATS_IBM_SUPPORTED())
-        status = uvm_ats_ibm_register_gpu_va_space(gpu_va_space);
-    else if (UVM_ATS_SVA_SUPPORTED())
-        status = uvm_ats_sva_register_gpu_va_space(gpu_va_space);
+    status = uvm_ats_sva_register_gpu_va_space(gpu_va_space);

    if (status == NV_OK)
        uvm_processor_mask_set(&va_space->ats.registered_gpu_va_spaces, gpu_id);
@@ -159,25 +129,14 @@ void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
    if (!gpu_va_space->ats.enabled)
        return;

+    UVM_ASSERT(UVM_ATS_SVA_SUPPORTED());
+
    va_space = gpu_va_space->va_space;
    gpu_id = gpu_va_space->gpu->id;

-    if (UVM_ATS_IBM_SUPPORTED())
-        uvm_ats_ibm_unregister_gpu_va_space(gpu_va_space);
-    else if (UVM_ATS_SVA_SUPPORTED())
-        uvm_ats_sva_unregister_gpu_va_space(gpu_va_space);
+    uvm_ats_sva_unregister_gpu_va_space(gpu_va_space);

    uvm_va_space_down_write(va_space);
    uvm_processor_mask_clear(&va_space->ats.registered_gpu_va_spaces, gpu_id);
    uvm_va_space_up_write(va_space);
 }
-
-void uvm_ats_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
-{
-    // We can only reach here from the mmu_notifier callbacks and these callbacks
-    // wouldn't have been registered if ATS wasn't enabled.
-    UVM_ASSERT(g_uvm_global.ats.enabled);
-
-    if (UVM_ATS_IBM_SUPPORTED())
-        uvm_ats_ibm_invalidate(va_space, start, end);
-}
--- a/kernel-open/nvidia-uvm/uvm_ats.h
+++ b/kernel-open/nvidia-uvm/uvm_ats.h
@@ -26,12 +26,11 @@

 #include "uvm_linux.h"
 #include "uvm_forward_decl.h"
-#include "uvm_ats_ibm.h"
 #include "nv_uvm_types.h"
 #include "uvm_lock.h"
 #include "uvm_ats_sva.h"

-#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
+#define UVM_ATS_SUPPORTED() UVM_ATS_SVA_SUPPORTED()

 typedef struct
 {
@@ -43,12 +42,7 @@ typedef struct
    // being called in ats_compute_residency_mask().
    uvm_rw_semaphore_t lock;

-    union
-    {
-        uvm_ibm_va_space_t ibm;
-
-        uvm_sva_va_space_t sva;
-    };
+    uvm_sva_va_space_t sva;
 } uvm_ats_va_space_t;

 typedef struct
@@ -61,12 +55,7 @@ typedef struct

    NvU32 pasid;

-    union
-    {
-        uvm_ibm_gpu_va_space_t ibm;
-
-        uvm_sva_gpu_va_space_t sva;
-    };
+    uvm_sva_gpu_va_space_t sva;
 } uvm_ats_gpu_va_space_t;

 // Initializes driver-wide ATS state
@@ -74,11 +63,6 @@ typedef struct
 // LOCKING: None
 void uvm_ats_init(const UvmPlatformInfo *platform_info);

-// Initializes ATS specific GPU state
-//
-// LOCKING: None
-void uvm_ats_init_va_space(uvm_va_space_t *va_space);
-
 // Enables ATS feature on the GPU.
 //
 // LOCKING: g_uvm_global.global lock mutex must be held.
@@ -115,8 +99,6 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space);
 //
 // LOCKING: The VA space lock must be held in write mode.
 //          mm has to be retained prior to calling this function.
-//          current->mm->mmap_lock must be held in write mode iff
-//          UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1.
 NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);

 // Disables ATS access for the gpu_va_space. Prior to calling this function,
@@ -124,19 +106,8 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
 // accesses in this GPU VA space, and that no ATS fault handling for this
 // GPU will be attempted.
 //
-// LOCKING: This function may block on mmap_lock and will acquire the VA space
-// lock, so neither lock must be held.
+// LOCKING: This function will acquire the VA space lock, so it must not be
+// held.
 void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);

-// Synchronously invalidate ATS translations cached by GPU TLBs. The
-// invalidate applies to all GPUs with active GPU VA spaces in va_space, and
-// covers all pages touching any part of the given range. end is inclusive.
-//
-// GMMU translations in the given range are not guaranteed to be
-// invalidated.
-//
-// LOCKING: No locks are required, but this function may be called with
-//          interrupts disabled.
-void uvm_ats_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end);
-
 #endif // __UVM_ATS_H__
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2023 NVIDIA Corporation
+    Copyright (c) 2024-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -58,37 +58,6 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
    bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
    bool is_fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
    bool is_prefetch_faults = (is_fault_service_type && (access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH));
-    uvm_populate_permissions_t populate_permissions = is_fault_service_type ?
-                                            (write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
-                                            UVM_POPULATE_PERMISSIONS_INHERIT;
-
-
-    // Request uvm_migrate_pageable() to touch the corresponding page after
-    // population.
-    // Under virtualization ATS provides two translations:
-    // 1) guest virtual -> guest physical
-    // 2) guest physical -> host physical
-    //
-    // The overall ATS translation will fault if either of those translations is
-    // invalid. The pin_user_pages() call within uvm_migrate_pageable() call
-    // below handles translation #1, but not #2. We don't know if we're running
-    // as a guest, but in case we are we can force that translation to be valid
-    // by touching the guest physical address from the CPU. If the translation
-    // is not valid then the access will cause a hypervisor fault. Note that
-    // dma_map_page() can't establish mappings used by GPU ATS SVA translations.
-    // GPU accesses to host physical addresses obtained as a result of the
-    // address translation request uses the CPU address space instead of the
-    // IOMMU address space since the translated host physical address isn't
-    // necessarily an IOMMU address. The only way to establish guest physical to
-    // host physical mapping in the CPU address space is to touch the page from
-    // the CPU.
-    //
-    // We assume that the hypervisor mappings are all VM_PFNMAP, VM_SHARED, and
-    // VM_WRITE, meaning that the mappings are all granted write access on any
-    // fault and that the kernel will never revoke them.
-    // drivers/vfio/pci/vfio_pci_nvlink2.c enforces this. Thus we can assume
-    // that a read fault is always sufficient to also enable write access on the
-    // guest translation.

    uvm_migrate_args_t uvm_migrate_args =
    {
@@ -98,8 +67,8 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
        .dst_node_id                        = ats_context->residency_node,
        .start                              = start,
        .length                             = length,
-        .populate_permissions               = populate_permissions,
-        .touch                              = is_fault_service_type,
+        .populate_permissions               = UVM_POPULATE_PERMISSIONS_INHERIT,
+        .populate_flags                     = UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK,
        .skip_mapped                        = is_fault_service_type,
        .populate_on_cpu_alloc_failures     = is_fault_service_type,
        .populate_on_migrate_vma_failures   = is_fault_service_type,
@@ -115,6 +84,13 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
        .fail_on_unresolved_sto_errors      = !is_fault_service_type || is_prefetch_faults,
    };

+    if (is_fault_service_type) {
+        uvm_migrate_args.populate_permissions = (write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY);
+
+        // If we're faulting, let the GPU access special vmas
+        uvm_migrate_args.populate_flags |= UVM_POPULATE_PAGEABLE_FLAG_ALLOW_SPECIAL;
+    }
+
    UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));

    // We are trying to use migrate_vma API in the kernel (if it exists) to
@@ -139,9 +115,9 @@ static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
    uvm_ats_fault_invalidate_t *ats_invalidate;

    if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
-        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
+        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.replayable.ats_invalidate;
    else
-        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
+        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.non_replayable.ats_invalidate;

    if (!ats_invalidate->tlb_batch_pending) {
        uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);
@@ -533,8 +509,20 @@ static NV_STATUS uvm_ats_service_faults_region(uvm_gpu_va_space_t *gpu_va_space,
                                  access_type,
                                  UVM_ATS_SERVICE_TYPE_FAULTS,
                                  ats_context);
-    if (status != NV_OK)
+    if (status != NV_OK) {
+        // This condition can occur if we unexpectedly fault on a vma that
+        // doesn't support faulting (or at least doesn't support
+        // pin_user_pages). This may be an incorrect mapping setup from the
+        // vma's owning driver, a hardware bug, or just that the owning driver
+        // didn't expect a device fault. Either way, we don't want to consider
+        // this a global error so don't propagate it, but also don't indicate
+        // that the faults were serviced. That way the caller knows to cancel
+        // them precisely.
+        if (status == NV_ERR_INVALID_ADDRESS)
+            return NV_OK;
+
        return status;
+    }

    uvm_page_mask_region_fill(faults_serviced_mask, region);

@@ -689,12 +677,14 @@ bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_v
        if (next->node.start <= gmmu_region_base + UVM_GMMU_ATS_GRANULARITY - 1)
            return true;

-        prev = uvm_va_range_container(uvm_range_tree_prev(&va_space->va_range_tree, &next->node));
+        prev = uvm_va_range_gmmu_mappable_prev(next);
    }
    else {
        // No VA range exists after address, so check the last VA range in the
        // tree.
        prev = uvm_va_range_container(uvm_range_tree_last(&va_space->va_range_tree));
+        while (prev && !uvm_va_range_is_gmmu_mappable(prev))
+            prev = uvm_va_range_gmmu_mappable_prev(prev);
    }

    return prev && (prev->node.end >= gmmu_region_base);
--- a/kernel-open/nvidia-uvm/uvm_ats_ibm.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_ibm.c
@@ -1,715 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 2018-2019 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-#include "uvm_api.h"
-#include "uvm_lock.h"
-#include "uvm_kvmalloc.h"
-#include "uvm_global.h"
-#include "uvm_va_space.h"
-#include "uvm_va_space_mm.h"
-#include "uvm_ats_ibm.h"
-#include "uvm_common.h"
-
-#include <linux/pci.h>
-
-#if UVM_IBM_NPU_SUPPORTED()
-
-#include <linux/of.h>
-#include <linux/sizes.h>
-#include <asm/pci-bridge.h>
-#include <asm/io.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-
-#define NPU_ATSD_REG_MAP_SIZE 32
-
-// There are three 8-byte registers in each ATSD mapping:
-#define NPU_ATSD_REG_LAUNCH 0
-#define NPU_ATSD_REG_AVA    1
-#define NPU_ATSD_REG_STAT   2
-
-// Fields within the NPU_ATSD_REG_LAUNCH register:
-
-// "PRS" (process-scoped) bit. 1 means to limit invalidates to the specified
-// PASID.
-#define NPU_ATSD_REG_LAUNCH_PASID_ENABLE    13
-
-// "PID" field. This specifies the PASID target of the invalidate.
-#define NPU_ATSD_REG_LAUNCH_PASID_VAL       38
-
-// "IS" bit. 0 means the specified virtual address range will be invalidated. 1
-// means all entries will be invalidated.
-#define NPU_ATSD_REG_LAUNCH_INVAL_ALL       12
-
-// "AP" field. This encodes the size of a range-based invalidate.
-#define NPU_ATSD_REG_LAUNCH_INVAL_SIZE      17
-
-// "No flush" bit. 0 will trigger a flush (membar) from the GPU following the
-// invalidate, 1 will not.
-#define NPU_ATSD_REG_LAUNCH_FLUSH_DISABLE   39
-
-// Helper to iterate over the active NPUs in the given VA space (all NPUs with
-// GPUs that have GPU VA spaces registered in this VA space).
-#define for_each_npu_index_in_va_space(npu_index, va_space)                                                 \
-    for (({uvm_assert_rwlock_locked(&(va_space)->ats.ibm.rwlock);                                           \
-           (npu_index) = find_first_bit((va_space)->ats.ibm.npu_active_mask, NV_MAX_NPUS);});               \
-           (npu_index) < NV_MAX_NPUS;                                                                       \
-           (npu_index) = find_next_bit((va_space)->ats.ibm.npu_active_mask, NV_MAX_NPUS, (npu_index) + 1))
-
-// An invalidate requires operating on one set of registers in each NPU. This
-// struct tracks which register set (id) is in use per NPU for a given
-// operation.
-typedef struct
-{
-    NvU8 ids[NV_MAX_NPUS];
-} uvm_atsd_regs_t;
-
-// Get the index of the input npu pointer within UVM's global npus array
-static size_t uvm_ibm_npu_index(uvm_ibm_npu_t *npu)
-{
-    size_t npu_index = npu - &g_uvm_global.npus[0];
-    UVM_ASSERT(npu_index < ARRAY_SIZE(g_uvm_global.npus));
-    return npu_index;
-}
-
-// Find an existing NPU matching pci_domain, or return an empty NPU slot if none
-// is found. Returns NULL if no slots are available.
-static uvm_ibm_npu_t *uvm_ibm_npu_find(int pci_domain)
-{
-    size_t i;
-    uvm_ibm_npu_t *npu, *first_free = NULL;
-
-    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
-
-    for (i = 0; i < ARRAY_SIZE(g_uvm_global.npus); i++) {
-        npu = &g_uvm_global.npus[i];
-        if (npu->num_retained_gpus == 0) {
-            if (!first_free)
-                first_free = npu;
-        }
-        else if (npu->pci_domain == pci_domain) {
-            return npu;
-        }
-    }
-
-    return first_free;
-}
-
-static void uvm_ibm_npu_destroy(uvm_ibm_npu_t *npu)
-{
-    size_t i;
-
-    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
-    UVM_ASSERT(npu->num_retained_gpus == 0);
-    UVM_ASSERT(bitmap_empty(npu->atsd_regs.locks, UVM_MAX_ATSD_REGS));
-
-    for (i = 0; i < npu->atsd_regs.count; i++) {
-        UVM_ASSERT(npu->atsd_regs.io_addrs[i]);
-        iounmap(npu->atsd_regs.io_addrs[i]);
-    }
-
-    memset(npu, 0, sizeof(*npu));
-}
-
-static NV_STATUS uvm_ibm_npu_init(uvm_ibm_npu_t *npu, struct pci_dev *npu_dev)
-{
-    struct pci_controller *hose;
-    size_t i, reg_count, reg_size = sizeof(npu->atsd_regs.io_addrs[0]);
-    int ret;
-
-    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
-    UVM_ASSERT(npu->num_retained_gpus == 0);
-    UVM_ASSERT(bitmap_empty(npu->atsd_regs.locks, UVM_MAX_ATSD_REGS));
-
-    npu->pci_domain = pci_domain_nr(npu_dev->bus);
-
-    if (!UVM_ATS_IBM_SUPPORTED_IN_DRIVER())
-        return NV_OK;
-
-    hose = pci_bus_to_host(npu_dev->bus);
-
-    ret = of_property_count_elems_of_size(hose->dn, "ibm,mmio-atsd", reg_size);
-    if (ret < 0) {
-        UVM_ERR_PRINT("Failed to query NPU %d ATSD register count: %d\n", npu->pci_domain, ret);
-        return errno_to_nv_status(ret);
-    }
-
-    // For ATS to be enabled globally, we must have NPU ATSD registers
-    reg_count = ret;
-    if (reg_count == 0 || reg_count > UVM_MAX_ATSD_REGS) {
-        UVM_ERR_PRINT("NPU %d has invalid ATSD register count: %zu\n", npu->pci_domain, reg_count);
-        return NV_ERR_INVALID_STATE;
-    }
-
-    // Map the ATSD registers
-    for (i = 0; i < reg_count; i++) {
-        u64 phys_addr;
-        __be64 __iomem *io_addr;
-        ret = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", i, &phys_addr);
-        UVM_ASSERT(ret == 0);
-
-        io_addr = ioremap(phys_addr, NPU_ATSD_REG_MAP_SIZE);
-        if (!io_addr) {
-            uvm_ibm_npu_destroy(npu);
-            return NV_ERR_NO_MEMORY;
-        }
-
-        npu->atsd_regs.io_addrs[npu->atsd_regs.count++] = io_addr;
-    }
-
-    return NV_OK;
-}
-
-NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu)
-{
-    struct pci_dev *npu_dev = pnv_pci_get_npu_dev(parent_gpu->pci_dev, 0);
-    uvm_ibm_npu_t *npu;
-    NV_STATUS status;
-
-    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
-
-    if (!npu_dev)
-        return NV_OK;
-
-    npu = uvm_ibm_npu_find(pci_domain_nr(npu_dev->bus));
-    if (!npu) {
-        // If this happens then we can't support the system configuation until
-        // NV_MAX_NPUS is updated. Return the same error as when the number of
-        // GPUs exceeds UVM_MAX_GPUS.
-        UVM_ERR_PRINT("No more NPU slots available, update NV_MAX_NPUS\n");
-        return NV_ERR_INSUFFICIENT_RESOURCES;
-    }
-
-    if (npu->num_retained_gpus == 0) {
-        status = uvm_ibm_npu_init(npu, npu_dev);
-        if (status != NV_OK)
-            return status;
-    }
-
-    // This npu field could be read concurrently by a thread in the ATSD
-    // invalidate path. We don't need to provide ordering with those threads
-    // because those invalidates won't apply to the GPU being added until a GPU
-    // VA space on this GPU is registered.
-    npu->atsd_regs.num_membars = max(npu->atsd_regs.num_membars, parent_gpu->num_hshub_tlb_invalidate_membars);
-
-    parent_gpu->npu = npu;
-    ++npu->num_retained_gpus;
-    return NV_OK;
-}
-
-void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu)
-{
-    uvm_ibm_npu_t *npu = parent_gpu->npu;
-    uvm_parent_gpu_t *other_parent_gpu;
-    NvU32 num_membars_new = 0;
-
-    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
-
-    if (!npu)
-        return;
-
-    UVM_ASSERT(npu->num_retained_gpus > 0);
-    if (--npu->num_retained_gpus == 0) {
-        uvm_ibm_npu_destroy(npu);
-    }
-    else {
-        // Re-calculate the membar count
-        for_each_parent_gpu(other_parent_gpu) {
-            // The current GPU being removed should've already been removed from
-            // the global list.
-            UVM_ASSERT(other_parent_gpu != parent_gpu);
-            if (other_parent_gpu->npu == npu)
-                num_membars_new = max(num_membars_new, other_parent_gpu->num_hshub_tlb_invalidate_membars);
-        }
-
-        UVM_ASSERT(num_membars_new > 0);
-        npu->atsd_regs.num_membars = num_membars_new;
-    }
-}
-
-#if UVM_ATS_IBM_SUPPORTED()
-
-void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space)
-{
-    uvm_ibm_va_space_t *ibm_va_space;
-
-    UVM_ASSERT(va_space);
-    ibm_va_space = &va_space->ats.ibm;
-
-    uvm_rwlock_irqsave_init(&ibm_va_space->rwlock, UVM_LOCK_ORDER_LEAF);
-}
-
-#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
-static void npu_release_dummy(struct npu_context *npu_context, void *va_mm)
-{
-    // See the comment on the call to pnv_npu2_init_context()
-}
-
-static NV_STATUS uvm_ats_ibm_register_gpu_va_space_kernel(uvm_gpu_va_space_t *gpu_va_space)
-{
-    uvm_va_space_t *va_space = gpu_va_space->va_space;
-    uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
-    struct npu_context *npu_context;
-
-    // pnv_npu2_init_context() registers current->mm with
-    // mmu_notifier_register(). We need that to match the mm we passed to our
-    // own mmu_notifier_register() for this VA space.
-    if (current->mm != va_space->va_space_mm.mm)
-        return NV_ERR_NOT_SUPPORTED;
-
-    uvm_assert_mmap_lock_locked_write(current->mm);
-    uvm_assert_rwsem_locked_write(&va_space->lock);
-
-    // pnv_npu2_init_context() doesn't handle being called multiple times for
-    // the same GPU under the same mm, which could happen if multiple VA spaces
-    // are created in this process. To handle that we pass the VA space pointer
-    // as the callback parameter: the callback values are shared by all devices
-    // under this mm, so pnv_npu2_init_context() enforces that the values match
-    // the ones already registered to the mm.
-    //
-    // Otherwise we don't use the callback, since we have our own callback
-    // registered under the va_space_mm that will be called at the same point
-    // (mmu_notifier release).
-    npu_context = pnv_npu2_init_context(gpu_va_space->gpu->parent->pci_dev,
-                                        (MSR_DR | MSR_PR | MSR_HV),
-                                        npu_release_dummy,
-                                        va_space);
-    if (IS_ERR(npu_context)) {
-        int err = PTR_ERR(npu_context);
-
-        // We'll get -EINVAL if the callback value (va_space) differs from the
-        // one already registered to the npu_context associated with this mm.
-        // That can only happen when multiple VA spaces attempt registration
-        // within the same process, which is disallowed and should return
-        // NV_ERR_NOT_SUPPORTED.
-        if (err == -EINVAL)
-            return NV_ERR_NOT_SUPPORTED;
-        return errno_to_nv_status(err);
-    }
-
-    ibm_gpu_va_space->npu_context = npu_context;
-
-    return NV_OK;
-}
-
-static void uvm_ats_ibm_unregister_gpu_va_space_kernel(uvm_gpu_va_space_t *gpu_va_space)
-{
-    uvm_gpu_va_space_state_t state;
-    uvm_va_space_t *va_space = gpu_va_space->va_space;
-    uvm_ibm_va_space_t *ibm_va_space;
-    uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
-
-    if (!ibm_gpu_va_space->npu_context)
-        return;
-
-    // va_space is guaranteed to not be NULL if ibm_gpu_va_space->npu_context is
-    // not NULL.
-    UVM_ASSERT(va_space);
-
-    state = uvm_gpu_va_space_state(gpu_va_space);
-    UVM_ASSERT(state == UVM_GPU_VA_SPACE_STATE_INIT || state == UVM_GPU_VA_SPACE_STATE_DEAD);
-
-    ibm_va_space = &va_space->ats.ibm;
-
-    // pnv_npu2_destroy_context() may in turn call mmu_notifier_unregister().
-    // If uvm_va_space_mm_shutdown() is concurrently executing in another
-    // thread, mmu_notifier_unregister() will wait for
-    // uvm_va_space_mm_shutdown() to finish. uvm_va_space_mm_shutdown() takes
-    // mmap_lock and the VA space lock, so we can't be holding those locks on
-    // this path.
-    uvm_assert_unlocked_order(UVM_LOCK_ORDER_MMAP_LOCK);
-    uvm_assert_unlocked_order(UVM_LOCK_ORDER_VA_SPACE);
-
-    pnv_npu2_destroy_context(ibm_gpu_va_space->npu_context, gpu_va_space->gpu->parent->pci_dev);
-    ibm_gpu_va_space->npu_context = NULL;
-}
-
-#else
-
-static void uvm_ats_ibm_register_gpu_va_space_driver(uvm_gpu_va_space_t *gpu_va_space)
-{
-    uvm_va_space_t *va_space = gpu_va_space->va_space;
-    uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
-    uvm_gpu_t *gpu = gpu_va_space->gpu;
-    size_t npu_index = uvm_ibm_npu_index(gpu->parent->npu);
-    uvm_ibm_va_space_t *ibm_va_space;
-
-    UVM_ASSERT(va_space);
-    ibm_va_space = &va_space->ats.ibm;
-
-    uvm_assert_rwsem_locked_write(&va_space->lock);
-
-    uvm_write_lock_irqsave(&ibm_va_space->rwlock);
-
-    // If this is the first GPU VA space to use this NPU in the VA space, mark
-    // the NPU as active so invalidates are issued to it.
-    if (ibm_va_space->npu_ref_counts[npu_index] == 0) {
-        // If this is the first active NPU in the entire VA space, we have to
-        // tell the kernel to send TLB invalidations to the IOMMU. See kernel
-        // commit 03b8abedf4f4965e7e9e0d4f92877c42c07ce19f for background.
-        //
-        // This is safe to do without holding mm_users high or mmap_lock.
-        if (bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS))
-            mm_context_add_copro(va_space->va_space_mm.mm);
-
-        UVM_ASSERT(!test_bit(npu_index, ibm_va_space->npu_active_mask));
-        __set_bit(npu_index, ibm_va_space->npu_active_mask);
-    }
-    else {
-        UVM_ASSERT(test_bit(npu_index, ibm_va_space->npu_active_mask));
-    }
-
-    ++ibm_va_space->npu_ref_counts[npu_index];
-
-    // As soon as this lock is dropped, invalidates on this VA space's mm may
-    // begin issuing ATSDs to this NPU.
-    uvm_write_unlock_irqrestore(&ibm_va_space->rwlock);
-
-    ibm_gpu_va_space->did_ibm_driver_init = true;
-}
-
-static void uvm_ats_ibm_unregister_gpu_va_space_driver(uvm_gpu_va_space_t *gpu_va_space)
-{
-    uvm_va_space_t *va_space = gpu_va_space->va_space;
-    uvm_gpu_t *gpu = gpu_va_space->gpu;
-    size_t npu_index = uvm_ibm_npu_index(gpu->parent->npu);
-    bool do_remove = false;
-    uvm_ibm_va_space_t *ibm_va_space;
-    uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
-
-    if (!ibm_gpu_va_space->did_ibm_driver_init)
-        return;
-
-    UVM_ASSERT(va_space);
-    ibm_va_space = &va_space->ats.ibm;
-
-    // Note that we aren't holding the VA space lock here, so another thread
-    // could be in uvm_ats_ibm_register_gpu_va_space() for this same GPU right
-    // now. The write lock and ref counts below will handle that case.
-
-    // Once we return from this function with a bit cleared in the
-    // npu_active_mask, we have to guarantee that this VA space no longer
-    // accesses that NPU's ATSD registers. This is needed in case GPU unregister
-    // needs to unmap those registers. We use the reader/writer lock to
-    // guarantee this, which means that invalidations must not access the ATSD
-    // registers outside of the lock.
-    //
-    // Future work: if we could synchronize_srcu() on the mmu_notifier SRCU we
-    // might do that here instead to flush out all invalidates. That would allow
-    // us to avoid taking a read lock in the invalidate path, though we'd have
-    // to be careful when clearing the mask bit relative to the synchronize, and
-    // we'd have to be careful in cases where this thread doesn't hold a
-    // reference to mm_users.
-    uvm_write_lock_irqsave(&ibm_va_space->rwlock);
-
-    UVM_ASSERT(ibm_va_space->npu_ref_counts[npu_index] > 0);
-    UVM_ASSERT(test_bit(npu_index, ibm_va_space->npu_active_mask));
-
-    --ibm_va_space->npu_ref_counts[npu_index];
-    if (ibm_va_space->npu_ref_counts[npu_index] == 0) {
-        __clear_bit(npu_index, ibm_va_space->npu_active_mask);
-        if (bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS))
-            do_remove = true;
-    }
-
-    uvm_write_unlock_irqrestore(&ibm_va_space->rwlock);
-
-    if (do_remove) {
-        // mm_context_remove_copro() must be called outside of the spinlock
-        // because it may issue invalidates across CPUs in this mm. The
-        // coprocessor count is atomically refcounted by that function, so it's
-        // safe to call here even if another thread jumps in with a register and
-        // calls mm_context_add_copro() between this thread's unlock and this
-        // call.
-        UVM_ASSERT(va_space->va_space_mm.mm);
-        mm_context_remove_copro(va_space->va_space_mm.mm);
-    }
-}
-
-#endif // UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
-
-static mm_context_id_t va_space_pasid(uvm_va_space_t *va_space)
-{
-    struct mm_struct *mm = va_space->va_space_mm.mm;
-    UVM_ASSERT(mm);
-    return mm->context.id;
-}
-
-NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
-{
-    uvm_va_space_t *va_space = gpu_va_space->va_space;
-    NV_STATUS status = NV_OK;
-
-    UVM_ASSERT(gpu_va_space->ats.enabled);
-    UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_INIT);
-    UVM_ASSERT(va_space->va_space_mm.mm);
-    uvm_assert_rwsem_locked_write(&va_space->lock);
-
-#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
-    status = uvm_ats_ibm_register_gpu_va_space_kernel(gpu_va_space);
-#else
-    uvm_ats_ibm_register_gpu_va_space_driver(gpu_va_space);
-#endif
-
-    gpu_va_space->ats.pasid = (NvU32) va_space_pasid(gpu_va_space->va_space);
-
-    return status;
-}
-
-void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
-{
-#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
-    uvm_ats_ibm_unregister_gpu_va_space_kernel(gpu_va_space);
-#else
-    uvm_ats_ibm_unregister_gpu_va_space_driver(gpu_va_space);
-#endif
-
-    gpu_va_space->ats.pasid = -1U;
-}
-
-#if UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
-
-// Find any available ATSD register set in this NPU and return that index. This
-// will busy wait until a register set is free.
-static NvU8 atsd_reg_acquire(uvm_ibm_npu_t *npu)
-{
-    uvm_spin_loop_t spin;
-    size_t i;
-    bool first = true;
-
-    while (1) {
-        // Using for_each_clear_bit is racy, since the bits could change at any
-        // point. That's ok since we'll either just retry or use a real atomic
-        // to lock the bit. Checking for clear bits first avoids spamming
-        // atomics in the contended case.
-        for_each_clear_bit(i, npu->atsd_regs.locks, npu->atsd_regs.count) {
-            if (!test_and_set_bit_lock(i, npu->atsd_regs.locks))
-                return (NvU8)i;
-        }
-
-        // Back off and try again, avoiding the overhead of initializing the
-        // tracking timers unless we need them.
-        if (first) {
-            uvm_spin_loop_init(&spin);
-            first = false;
-        }
-        else {
-            UVM_SPIN_LOOP(&spin);
-        }
-    }
-}
-
-static void atsd_reg_release(uvm_ibm_npu_t *npu, NvU8 reg)
-{
-    UVM_ASSERT(reg < npu->atsd_regs.count);
-    UVM_ASSERT(test_bit(reg, npu->atsd_regs.locks));
-    clear_bit_unlock(reg, npu->atsd_regs.locks);
-}
-
-static __be64 atsd_reg_read(uvm_ibm_npu_t *npu, NvU8 reg, size_t offset)
-{
-    __be64 __iomem *io_addr = npu->atsd_regs.io_addrs[reg] + offset;
-    UVM_ASSERT(reg < npu->atsd_regs.count);
-    return __raw_readq(io_addr);
-}
-
-static void atsd_reg_write(uvm_ibm_npu_t *npu, NvU8 reg, size_t offset, NvU64 val)
-{
-    __be64 __iomem *io_addr = npu->atsd_regs.io_addrs[reg] + offset;
-    UVM_ASSERT(reg < npu->atsd_regs.count);
-    __raw_writeq_be(val, io_addr);
-}
-
-// Acquire a set of registers in each NPU which is active in va_space
-static void atsd_regs_acquire(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
-{
-    size_t i;
-    for_each_npu_index_in_va_space(i, va_space)
-        regs->ids[i] = atsd_reg_acquire(&g_uvm_global.npus[i]);
-}
-
-static void atsd_regs_release(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
-{
-    size_t i;
-    for_each_npu_index_in_va_space(i, va_space)
-        atsd_reg_release(&g_uvm_global.npus[i], regs->ids[i]);
-}
-
-// Write the provided value to each NPU active in va_space at the provided
-// register offset.
-static void atsd_regs_write(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, size_t offset, NvU64 val)
-{
-    size_t i;
-    for_each_npu_index_in_va_space(i, va_space)
-        atsd_reg_write(&g_uvm_global.npus[i], regs->ids[i], offset, val);
-}
-
-// Wait for all prior operations issued to active NPUs in va_space on the given
-// registers to finish.
-static void atsd_regs_wait(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
-{
-    uvm_spin_loop_t spin;
-    size_t i;
-    for_each_npu_index_in_va_space(i, va_space) {
-        UVM_SPIN_WHILE(atsd_reg_read(&g_uvm_global.npus[i], regs->ids[i], NPU_ATSD_REG_STAT), &spin)
-            ;
-    }
-}
-
-// Encode an invalidate targeting the given pasid and the given size for the
-// NPU_ATSD_REG_LAUNCH register. The target address is encoded separately.
-//
-// psize must be one of the MMU_PAGE_* values defined in powerpc's asm/mmu.h. A
-// psize of MMU_PAGE_COUNT means to invalidate the entire address space.
-static NvU64 atsd_get_launch_val(mm_context_id_t pasid, int psize)
-{
-    NvU64 val = 0;
-
-    val |= PPC_BIT(NPU_ATSD_REG_LAUNCH_PASID_ENABLE);
-    val |= pasid << PPC_BITLSHIFT(NPU_ATSD_REG_LAUNCH_PASID_VAL);
-
-    if (psize == MMU_PAGE_COUNT) {
-        val |= PPC_BIT(NPU_ATSD_REG_LAUNCH_INVAL_ALL);
-    }
-    else {
-        // The NPU registers do not support arbitrary sizes
-        UVM_ASSERT(psize == MMU_PAGE_64K || psize == MMU_PAGE_2M  || psize == MMU_PAGE_1G);
-        val |= (NvU64)mmu_get_ap(psize) << PPC_BITLSHIFT(NPU_ATSD_REG_LAUNCH_INVAL_SIZE);
-    }
-
-    return val;
-}
-
-// Return the encoded size to use for an ATSD targeting the given range, in one
-// of the MMU_PAGE_* values defined in powerpc's asm/mmu.h. A return value of
-// MMU_PAGE_COUNT means the entire address space must be invalidated.
-//
-// start is an in/out parameter. On return start will be set to the aligned
-// starting address to use for the ATSD. end is inclusive.
-static int atsd_calc_size(NvU64 *start, NvU64 end)
-{
-    // ATSDs have high latency, so we prefer to over-invalidate rather than
-    // issue multiple precise invalidates. Supported sizes are only 64K, 2M, and
-    // 1G.
-
-    *start = UVM_ALIGN_DOWN(*start, SZ_64K);
-    end = UVM_ALIGN_DOWN(end, SZ_64K);
-    if (*start == end)
-        return MMU_PAGE_64K;
-
-    *start = UVM_ALIGN_DOWN(*start, SZ_2M);
-    end = UVM_ALIGN_DOWN(end, SZ_2M);
-    if (*start == end)
-        return MMU_PAGE_2M;
-
-    *start = UVM_ALIGN_DOWN(*start, SZ_1G);
-    end = UVM_ALIGN_DOWN(end, SZ_1G);
-    if (*start == end)
-        return MMU_PAGE_1G;
-
-    return MMU_PAGE_COUNT;
-}
-
-// Issue an ATSD to all NPUs and wait for completion
-static void atsd_launch_wait(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, NvU64 val)
-{
-    atsd_regs_write(va_space, regs, NPU_ATSD_REG_LAUNCH, val);
-    atsd_regs_wait(va_space, regs);
-}
-
-// Issue and wait for the required membars following an invalidate
-static void atsd_issue_membars(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
-{
-    size_t i;
-    NvU32 num_membars = 0;
-
-    // These membars are issued using ATSDs which target a reserved PASID of 0.
-    // That PASID is valid on the GPU in order for the membar to be valid, but
-    // 0 will never be used by the kernel for an actual address space so the
-    // ATSD won't actually invalidate any entries.
-    NvU64 val = atsd_get_launch_val(0, MMU_PAGE_COUNT);
-
-    for_each_npu_index_in_va_space(i, va_space) {
-        uvm_ibm_npu_t *npu = &g_uvm_global.npus[i];
-        num_membars = max(num_membars, npu->atsd_regs.num_membars);
-    }
-
-    for (i = 0; i < num_membars; i++)
-        atsd_launch_wait(va_space, regs, val);
-}
-
-static void uvm_ats_ibm_invalidate_all(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
-{
-    NvU64 val = atsd_get_launch_val(va_space_pasid(va_space), MMU_PAGE_COUNT);
-    atsd_launch_wait(va_space, regs, val);
-    atsd_issue_membars(va_space, regs);
-}
-
-static void uvm_ats_ibm_invalidate_range(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, NvU64 start, int psize)
-{
-    NvU64 val = atsd_get_launch_val(va_space_pasid(va_space), psize);
-
-    // Barriers are expensive, so write all address registers first then do a
-    // single barrier for all of them.
-    atsd_regs_write(va_space, regs, NPU_ATSD_REG_AVA, start);
-    eieio();
-    atsd_launch_wait(va_space, regs, val);
-    atsd_issue_membars(va_space, regs);
-}
-
-#endif // UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
-
-void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
-{
-#if UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
-    unsigned long irq_flags;
-    uvm_atsd_regs_t regs;
-    NvU64 atsd_start = start;
-    int psize = atsd_calc_size(&atsd_start, end);
-    uvm_ibm_va_space_t *ibm_va_space = &va_space->ats.ibm;
-
-    BUILD_BUG_ON(order_base_2(UVM_MAX_ATSD_REGS) > 8*sizeof(regs.ids[0]));
-
-    // We must hold this lock in at least read mode when accessing NPU
-    // registers. See the comment in uvm_ats_ibm_unregister_gpu_va_space_driver.
-    uvm_read_lock_irqsave(&ibm_va_space->rwlock, irq_flags);
-
-    if (!bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS)) {
-        atsd_regs_acquire(va_space, &regs);
-
-        if (psize == MMU_PAGE_COUNT)
-            uvm_ats_ibm_invalidate_all(va_space, &regs);
-        else
-            uvm_ats_ibm_invalidate_range(va_space, &regs, atsd_start, psize);
-
-        atsd_regs_release(va_space, &regs);
-    }
-
-    uvm_read_unlock_irqrestore(&ibm_va_space->rwlock, irq_flags);
-#else
-    UVM_ASSERT_MSG(0, "This function should not be called on this kernel version\n");
-#endif // UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
-}
-
-#endif // UVM_ATS_IBM_SUPPORTED
-#endif // UVM_IBM_NPU_SUPPORTED
--- a/kernel-open/nvidia-uvm/uvm_ats_ibm.h
+++ b/kernel-open/nvidia-uvm/uvm_ats_ibm.h
@@ -1,266 +0,0 @@
-/*******************************************************************************
-    Copyright (c) 2018-2019 NVIDIA Corporation
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to
-    deal in the Software without restriction, including without limitation the
-    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be
-        included in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-*******************************************************************************/
-
-#ifndef __UVM_ATS_IBM_H__
-#define __UVM_ATS_IBM_H__
-
-#include "uvm_linux.h"
-#include "uvm_forward_decl.h"
-#include "uvm_hal_types.h"
-
-#if defined(NVCPU_PPC64LE) && defined(NV_PNV_PCI_GET_NPU_DEV_PRESENT)
-    #include <asm/mmu.h>
-    #if defined(NV_MAX_NPUS)
-        #define UVM_IBM_NPU_SUPPORTED() 1
-    #else
-        #define UVM_IBM_NPU_SUPPORTED() 0
-    #endif
-#else
-    #define UVM_IBM_NPU_SUPPORTED() 0
-#endif
-
-#if defined(NV_ASM_OPAL_API_H_PRESENT)
-    // For OPAL_NPU_INIT_CONTEXT
-    #include <asm/opal-api.h>
-#endif
-
-// Timeline of kernel changes:
-//
-// 0) Before 1ab66d1fbadad86b1f4a9c7857e193af0ee0022c
-//      - No NPU-ATS code existed, nor did the OPAL_NPU_INIT_CONTEXT firmware
-//        call.
-//      - NV_PNV_NPU2_INIT_CONTEXT_PRESENT                  Not defined
-//      - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID    Not defined
-//      - OPAL_NPU_INIT_CONTEXT                             Not defined
-//      - ATS support type                                  None
-//
-// 1) NPU ATS code added: 1ab66d1fbadad86b1f4a9c7857e193af0ee0022c, v4.12
-//    (2017-04-03)
-//      - This commit added initial support for NPU ATS, including the necessary
-//        OPAL firmware calls. This support was developmental and required
-//        several bug fixes before it could be used in production.
-//      - NV_PNV_NPU2_INIT_CONTEXT_PRESENT                  Defined
-//      - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID    Not defined
-//      - OPAL_NPU_INIT_CONTEXT                             Defined
-//      - ATS support type                                  None
-//
-// 2) NPU ATS code fixed: a1409adac748f0db655e096521bbe6904aadeb98, v4.17
-//    (2018-04-11)
-//      - This commit changed the function signature for pnv_npu2_init_context's
-//        callback parameter. Since all required bug fixes went in prior to this
-//        change, we can use the callback signature as a flag to indicate
-//        whether the PPC arch layer in the kernel supports ATS in production.
-//      - NV_PNV_NPU2_INIT_CONTEXT_PRESENT                  Defined
-//      - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID    Defined
-//      - OPAL_NPU_INIT_CONTEXT                             Defined
-//      - ATS support type                                  Kernel
-//
-// 3) NPU ATS code removed: 7eb3cf761927b2687164e182efa675e6c09cfe44, v5.3
-//    (2019-06-25)
-//      - This commit removed NPU-ATS support from the PPC arch layer, so the
-//        driver needs to handle things instead. pnv_npu2_init_context is no
-//        longer present, so we use OPAL_NPU_INIT_CONTEXT to differentiate
-//        between this state and scenario #0.
-//      - NV_PNV_NPU2_INIT_CONTEXT_PRESENT                  Not defined
-//      - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID    Not defined
-//      - OPAL_NPU_INIT_CONTEXT                             Defined
-//      - ATS support type                                  Driver
-//
-#if defined(NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID)
-    #define UVM_ATS_IBM_SUPPORTED_IN_KERNEL()   1
-    #define UVM_ATS_IBM_SUPPORTED_IN_DRIVER()   0
-#elif !defined(NV_PNV_NPU2_INIT_CONTEXT_PRESENT) && defined(OPAL_NPU_INIT_CONTEXT) && UVM_CAN_USE_MMU_NOTIFIERS()
-    #define UVM_ATS_IBM_SUPPORTED_IN_KERNEL()   0
-    #define UVM_ATS_IBM_SUPPORTED_IN_DRIVER()   1
-#else
-    #define UVM_ATS_IBM_SUPPORTED_IN_KERNEL()   0
-    #define UVM_ATS_IBM_SUPPORTED_IN_DRIVER()   0
-#endif
-
-#define UVM_ATS_IBM_SUPPORTED() (UVM_ATS_IBM_SUPPORTED_IN_KERNEL() || UVM_ATS_IBM_SUPPORTED_IN_DRIVER())
-
-// Maximum number of parallel ATSD register sets per NPU
-#define UVM_MAX_ATSD_REGS 16
-
-typedef struct
-{
-#if UVM_IBM_NPU_SUPPORTED()
-    // These are the active NPUs in this VA space, that is, all NPUs with
-    // GPUs that have GPU VA spaces registered in this VA space.
-    //
-    // If a bit is clear in npu_active_mask then the corresponding entry of
-    // npu_ref_counts is 0. If a bit is set then the corresponding entry of
-    // npu_ref_counts is greater than 0.
-    NvU32 npu_ref_counts[NV_MAX_NPUS];
-    DECLARE_BITMAP(npu_active_mask, NV_MAX_NPUS);
-#endif
-
-    // Lock protecting npu_ref_counts and npu_active_mask. Invalidations
-    // take this lock for read. GPU VA space register and unregister take
-    // this lock for write. Since all invalidations take the lock for read
-    // for the duration of the invalidate, taking the lock for write also
-    // flushes all invalidates.
-    //
-    // This is a spinlock because the invalidation code paths may be called
-    // with interrupts disabled, so those paths can't take the VA space
-    // lock. We could use a normal exclusive spinlock instead, but a reader/
-    // writer lock is preferred to allow concurrent invalidates in the same
-    // VA space.
-    uvm_rwlock_irqsave_t rwlock;
-} uvm_ibm_va_space_t;
-
-typedef struct
-{
-#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
-    struct npu_context *npu_context;
-#endif
-
-    // Used on the teardown path to know what to clean up. npu_context acts
-    // as the equivalent flag for kernel-provided support.
-    bool did_ibm_driver_init;
-} uvm_ibm_gpu_va_space_t;
-
-struct uvm_ibm_npu_struct
-{
-    // Number of retained GPUs under this NPU. The other fields in this struct
-    // are only valid if this is non-zero.
-    unsigned int num_retained_gpus;
-
-    // PCI domain containing this NPU. This acts as a unique system-wide ID for
-    // this UVM NPU.
-    int pci_domain;
-
-    // The ATS-related fields are only valid when ATS support is enabled and
-    // UVM_ATS_IBM_SUPPORTED_IN_DRIVER() is 1.
-    struct
-    {
-        // Mapped addresses of the ATSD trigger registers. There may be more
-        // than one set of identical registers per NPU to enable concurrent
-        // invalidates.
-        //
-        // These will not be accessed unless there is a GPU VA space registered
-        // on a GPU under this NPU. They are protected by bit locks in the locks
-        // field.
-        __be64 __iomem *io_addrs[UVM_MAX_ATSD_REGS];
-
-        // Actual number of registers in the io_addrs array
-        size_t count;
-
-        // Bitmask for allocation and locking of the registers. Bit index n
-        // corresponds to io_addrs[n]. A set bit means that index is in use
-        // (locked).
-        DECLARE_BITMAP(locks, UVM_MAX_ATSD_REGS);
-
-        // Max value of any uvm_parent_gpu_t::num_hshub_tlb_invalidate_membars
-        // for all retained GPUs under this NPU.
-        NvU32 num_membars;
-    } atsd_regs;
-};
-
-#if UVM_IBM_NPU_SUPPORTED()
-    NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu);
-    void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu);
-#else
-    static NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu)
-    {
-        return NV_OK;
-    }
-
-    static void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu)
-    {
-
-    }
-#endif // UVM_IBM_NPU_SUPPORTED
-
-#if UVM_ATS_IBM_SUPPORTED()
-    // Initializes IBM specific GPU state.
-    //
-    // LOCKING: None
-    void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space);
-
-    // Enables ATS access for the gpu_va_space on the mm_struct associated with
-    // the VA space (va_space_mm).
-    //
-    // If UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1, NV_ERR_NOT_SUPPORTED is
-    // returned if current->mm does not match va_space_mm.mm or if a GPU VA
-    // space within another VA space has already called this function on the
-    // same mm.
-    //
-    // If UVM_ATS_IBM_SUPPORTED_IN_DRIVER() is 1 there are no such restrictions.
-    //
-    // LOCKING: The VA space lock must be held in write mode.
-    //          current->mm->mmap_lock must be held in write mode iff
-    //          UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1.
-    NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
-
-    // Disables ATS access for the gpu_va_space. Prior to calling this function,
-    // the caller must guarantee that the GPU will no longer make any ATS
-    // accesses in this GPU VA space, and that no ATS fault handling for this
-    // GPU will be attempted.
-    //
-    // LOCKING: This function may block on mmap_lock and the VA space lock, so
-    //          neither must be held.
-    void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
-
-    // Synchronously invalidate ATS translations cached by GPU TLBs. The
-    // invalidate applies to all GPUs with active GPU VA spaces in va_space, and
-    // covers all pages touching any part of the given range. end is inclusive.
-    //
-    // GMMU translations in the given range are not guaranteed to be
-    // invalidated.
-    //
-    // LOCKING: No locks are required, but this function may be called with
-    //          interrupts disabled.
-    void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end);
-#else
-    static void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space)
-    {
-
-    }
-    static NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
-    {
-        return NV_OK;
-    }
-
-    static void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
-    {
-
-    }
-
-    static void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
-    {
-
-    }
-#endif // UVM_ATS_IBM_SUPPORTED
-
-static NV_STATUS uvm_ats_ibm_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
-{
-    return NV_OK;
-}
-
-static void uvm_ats_ibm_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
-{
-
-}
-
-#endif // __UVM_ATS_IBM_H__
--- a/kernel-open/nvidia-uvm/uvm_blackwell.c
+++ b/kernel-open/nvidia-uvm/uvm_blackwell.c
@@ -38,12 +38,10 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_blackwell_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
-                                                          parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
-                                                                           (sizeof(dummy->fault_source.utlb_id) * 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Blackwell covers 64 PB and that's the minimum
@@ -83,10 +81,6 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->non_replayable_faults_supported = true;

-    parent_gpu->access_counters_supported = true;
-
-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
@@ -106,4 +100,15 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->no_ats_range_required = true;

    parent_gpu->conf_computing.per_channel_key_rotation = true;
+
+    // TODO: Bug 5023085: this should be queried from RM instead of determined
+    // by UVM.
+    if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 &&
+        parent_gpu->rm_info.gpuImplementation ==
+            NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B)
+        parent_gpu->is_integrated_gpu = true;
+    if (parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 &&
+        parent_gpu->rm_info.gpuImplementation ==
+            NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B)
+        parent_gpu->is_integrated_gpu = true;
 }
--- a/kernel-open/nvidia-uvm/uvm_blackwell_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_blackwell_mmu.c
@@ -39,6 +39,7 @@
 #include "hwref/blackwell/gb100/dev_fault.h"

 static uvm_mmu_mode_hal_t blackwell_mmu_mode_hal;
+static uvm_mmu_mode_hal_t blackwell_integrated_mmu_mode_hal;

 static NvU32 page_table_depth_blackwell(NvU64 page_size)
 {
@@ -59,35 +60,71 @@ static NvU64 page_sizes_blackwell(void)
    return UVM_PAGE_SIZE_256G | UVM_PAGE_SIZE_512M | UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
 }

+static NvU64 page_sizes_blackwell_integrated(void)
+{
+    return UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
+}
+
+static uvm_mmu_mode_hal_t *__uvm_hal_mmu_mode_blackwell(uvm_mmu_mode_hal_t *mmu_mode_hal,
+                                                        NvU64 big_page_size)
+{
+    uvm_mmu_mode_hal_t *hopper_mmu_mode_hal;
+
+    UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
+
+    hopper_mmu_mode_hal = uvm_hal_mmu_mode_hopper(big_page_size);
+    UVM_ASSERT(hopper_mmu_mode_hal);
+
+    // The assumption made is that arch_hal->mmu_mode_hal() will be called
+    // under the global lock the first time, so check it here.
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
+    *mmu_mode_hal = *hopper_mmu_mode_hal;
+    mmu_mode_hal->page_table_depth = page_table_depth_blackwell;
+
+    return mmu_mode_hal;
+}
+
 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size)
 {
    static bool initialized = false;

-    UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
-
    // TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
    // 128K big page size for Pascal+ GPUs
    if (big_page_size == UVM_PAGE_SIZE_128K)
        return NULL;

    if (!initialized) {
-        uvm_mmu_mode_hal_t *hopper_mmu_mode_hal = uvm_hal_mmu_mode_hopper(big_page_size);
-        UVM_ASSERT(hopper_mmu_mode_hal);
-
-        // The assumption made is that arch_hal->mmu_mode_hal() will be called
-        // under the global lock the first time, so check it here.
-        uvm_assert_mutex_locked(&g_uvm_global.global_lock);
-
-        blackwell_mmu_mode_hal = *hopper_mmu_mode_hal;
-        blackwell_mmu_mode_hal.page_table_depth = page_table_depth_blackwell;
-        blackwell_mmu_mode_hal.page_sizes = page_sizes_blackwell;
+        uvm_mmu_mode_hal_t *mmu_mode_hal;

+        mmu_mode_hal = __uvm_hal_mmu_mode_blackwell(&blackwell_mmu_mode_hal, big_page_size);
+        mmu_mode_hal->page_sizes = page_sizes_blackwell;
        initialized = true;
    }

    return &blackwell_mmu_mode_hal;
 }

+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell_integrated(NvU64 big_page_size)
+{
+    static bool initialized = false;
+
+    // TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
+    // 128K big page size for Pascal+ GPUs
+    if (big_page_size == UVM_PAGE_SIZE_128K)
+        return NULL;
+
+    if (!initialized) {
+        uvm_mmu_mode_hal_t *mmu_mode_hal;
+
+        mmu_mode_hal = __uvm_hal_mmu_mode_blackwell(&blackwell_integrated_mmu_mode_hal, big_page_size);
+        mmu_mode_hal->page_sizes = page_sizes_blackwell_integrated;
+        initialized = true;
+    }
+
+    return &blackwell_integrated_mmu_mode_hal;
+}
+
 NvU16 uvm_hal_blackwell_mmu_client_id_to_utlb_id(NvU16 client_id)
 {
    switch (client_id) {
--- a/kernel-open/nvidia-uvm/uvm_ce_test.c
+++ b/kernel-open/nvidia-uvm/uvm_ce_test.c
@@ -65,7 +65,10 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
    memset(host_ptr, 0, CE_TEST_MEM_SIZE);

    for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
-        status = uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, CE_TEST_MEM_SIZE, 0, &mem[i]);
+        uvm_rm_mem_type_t type;
+
+        type = gpu->mem_info.size ? UVM_RM_MEM_TYPE_GPU : UVM_RM_MEM_TYPE_SYS;
+        status = uvm_rm_mem_alloc(gpu, type, CE_TEST_MEM_SIZE, 0, &mem[i]);
        TEST_CHECK_GOTO(status == NV_OK, done);
    }

@@ -405,6 +408,7 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
    uvm_rm_mem_t *sys_rm_mem = NULL;
    uvm_rm_mem_t *gpu_rm_mem = NULL;
    uvm_gpu_address_t gpu_addresses[4] = {0};
+    size_t gpu_addresses_length = 0;
    size_t size = gpu->big_page.internal_size;
    static const size_t element_sizes[] = {1, 4, 8};
    const size_t iterations = 4;
@@ -435,7 +439,7 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)

    // Virtual address (in UVM's internal address space) backed by sysmem
    TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
-    gpu_addresses[0] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
+    gpu_addresses[gpu_addresses_length++] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);

    if (g_uvm_global.conf_computing_enabled) {
        for (i = 0; i < iterations; ++i) {
@@ -472,21 +476,23 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
    // Physical address in sysmem
    TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &sys_uvm_mem), done);
    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(sys_uvm_mem, gpu), done);
-    gpu_addresses[1] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
+    gpu_addresses[gpu_addresses_length++] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);

-    // Physical address in vidmem
-    mem_params.backing_gpu = gpu;
-    TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
-    gpu_addresses[2] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
+    if (gpu->mem_info.size > 0) {
+        // Physical address in vidmem
+        mem_params.backing_gpu = gpu;
+        TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
+        gpu_addresses[gpu_addresses_length++] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);

-    // Virtual address (in UVM's internal address space) backed by vidmem
-    TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
-    gpu_addresses[3] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
+        // Virtual address (in UVM's internal address space) backed by vidmem
+        TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
+        gpu_addresses[gpu_addresses_length++] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
+    }


    for (i = 0; i < iterations; ++i) {
-        for (j = 0; j < ARRAY_SIZE(gpu_addresses); ++j) {
-            for (k = 0; k < ARRAY_SIZE(gpu_addresses); ++k) {
+        for (j = 0; j < gpu_addresses_length; ++j) {
+            for (k = 0; k < gpu_addresses_length; ++k) {
                for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
                    TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
                                                                    gpu_addresses[k],
--- a/kernel-open/nvidia-uvm/uvm_channel.c
+++ b/kernel-open/nvidia-uvm/uvm_channel.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -110,16 +110,22 @@ typedef enum
 bool uvm_channel_pool_is_p2p(uvm_channel_pool_t *pool)
 {
    uvm_channel_manager_t *manager = pool->manager;
+    uvm_gpu_t *gpu = manager->gpu;
    uvm_gpu_id_t id;

    if (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_GPU] == pool)
        return true;

-    for_each_gpu_id_in_mask(id, &manager->gpu->peer_info.peer_gpu_mask) {
-        if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool)
+    uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
+    for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
+        if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool) {
+            uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
            return true;
+        }
    }

+    uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
+
    return false;
 }

@@ -1974,6 +1980,7 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
 {
    uvm_channel_pool_t *pool;
    NV_STATUS status = NV_OK;
+    uvm_gpu_t *gpu = channel_manager->gpu;
    uvm_gpu_id_t gpu_id;
    DECLARE_BITMAP(suspended_pools, UVM_COPY_ENGINE_COUNT_MAX);

@@ -1981,7 +1988,9 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
    // Use bitmap to track which were suspended.
    bitmap_zero(suspended_pools, channel_manager->num_channel_pools);

-    for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
+    for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
        pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
        if (pool && !test_bit(uvm_channel_pool_index_in_channel_manager(pool), suspended_pools)) {
            status = channel_pool_suspend_p2p(pool);
@@ -2014,6 +2023,7 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
 void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
 {
    uvm_channel_pool_t *pool;
+    uvm_gpu_t *gpu = channel_manager->gpu;
    uvm_gpu_id_t gpu_id;
    DECLARE_BITMAP(resumed_pools, UVM_COPY_ENGINE_COUNT_MAX);

@@ -2021,7 +2031,9 @@ void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
    // Use bitmap to track which were suspended.
    bitmap_zero(resumed_pools, channel_manager->num_channel_pools);

-    for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
+    for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
        pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
        if (pool && !test_and_set_bit(uvm_channel_pool_index_in_channel_manager(pool), resumed_pools))
            channel_pool_resume_p2p(pool);
@@ -2889,6 +2901,13 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,

 static bool ce_is_usable(const UvmGpuCopyEngineCaps *cap)
 {
+    // When Confidential Computing is enabled, all Copy Engines must support
+    // encryption / decryption, tracked by 'secure' flag. This holds even for
+    // non-CPU-GPU transactions because each channel has an associate semaphore,
+    // and semaphore release must be observable by all processing units.
+    if (g_uvm_global.conf_computing_enabled && !cap->secure)
+        return false;
+
    return cap->supported && !cap->grce;
 }

@@ -3243,9 +3262,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
        manager->conf.num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT;

    if (manager->conf.num_gpfifo_entries != uvm_channel_num_gpfifo_entries) {
-        pr_info("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
-                uvm_channel_num_gpfifo_entries,
-                manager->conf.num_gpfifo_entries);
+        UVM_INFO_PRINT("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
+                       uvm_channel_num_gpfifo_entries,
+                       manager->conf.num_gpfifo_entries);
    }

    // 2- Allocation locations
@@ -3285,9 +3304,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
    pushbuffer_loc_value = uvm_channel_pushbuffer_loc;
    if (!is_string_valid_location(pushbuffer_loc_value)) {
        pushbuffer_loc_value = UVM_CHANNEL_PUSHBUFFER_LOC_DEFAULT;
-        pr_info("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
-                uvm_channel_pushbuffer_loc,
-                pushbuffer_loc_value);
+        UVM_INFO_PRINT("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
+                       uvm_channel_pushbuffer_loc,
+                       pushbuffer_loc_value);
    }

    // Override the default value if requested by the user
@@ -3297,8 +3316,8 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
        // so force the location to sys for now.
        // TODO: Bug 2904133: Remove the following "if" after the bug is fixed.
        if (NVCPU_IS_AARCH64) {
-            pr_info("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
-                    pushbuffer_loc_value);
+            UVM_INFO_PRINT("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
+                           pushbuffer_loc_value);
            manager->conf.pushbuffer_loc = UVM_BUFFER_LOCATION_SYS;
        }
        else {
@@ -3310,8 +3329,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
    // Only support the knobs for GPFIFO/GPPut on Volta+
    if (!gpu->parent->gpfifo_in_vidmem_supported) {
        if (manager->conf.gpput_loc == UVM_BUFFER_LOCATION_SYS) {
-            pr_info("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s instead\n",
-                    buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
+            UVM_INFO_PRINT("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s "
+                           "instead\n",
+                           buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
        }

        manager->conf.gpfifo_loc = UVM_BUFFER_LOCATION_DEFAULT;
@@ -3323,17 +3343,17 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
    gpfifo_loc_value = uvm_channel_gpfifo_loc;
    if (!is_string_valid_location(gpfifo_loc_value)) {
        gpfifo_loc_value = UVM_CHANNEL_GPFIFO_LOC_DEFAULT;
-        pr_info("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
-                uvm_channel_gpfifo_loc,
-                gpfifo_loc_value);
+        UVM_INFO_PRINT("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
+                       uvm_channel_gpfifo_loc,
+                       gpfifo_loc_value);
    }

    gpput_loc_value = uvm_channel_gpput_loc;
    if (!is_string_valid_location(gpput_loc_value)) {
        gpput_loc_value = UVM_CHANNEL_GPPUT_LOC_DEFAULT;
-        pr_info("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
-                uvm_channel_gpput_loc,
-                gpput_loc_value);
+        UVM_INFO_PRINT("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
+                       uvm_channel_gpput_loc,
+                       gpput_loc_value);
    }

    // On coherent platforms where the GPU does not cache sysmem but the CPU
--- a/kernel-open/nvidia-uvm/uvm_common.h
+++ b/kernel-open/nvidia-uvm/uvm_common.h
@@ -57,6 +57,7 @@ enum {
 // NULL.
 void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);

+// Long prefix - typically for debugging and tests.
 #define UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ...) \
    func(prefix "%s:%u %s[pid:%d]" fmt,               \
         kbasename(__FILE__),                         \
@@ -65,10 +66,15 @@ void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
         current->pid,                                \
         ##__VA_ARGS__)

+// Short prefix - typically for information.
+#define UVM_PRINT_FUNC_SHORT_PREFIX(func, prefix, fmt, ...) \
+    func(prefix fmt, ##__VA_ARGS__)
+
+// No prefix - used by kernel panic messages.
 #define UVM_PRINT_FUNC(func, fmt, ...)  \
    UVM_PRINT_FUNC_PREFIX(func, "", fmt, ##__VA_ARGS__)

-// Check whether UVM_{ERR,DBG,INFO)_PRINT* should be enabled
+// Check whether UVM_{ERR,DBG)_PRINT* should be enabled.
 bool uvm_debug_prints_enabled(void);

 // A printing helper like UVM_PRINT_FUNC_PREFIX that only prints if
@@ -80,10 +86,10 @@ bool uvm_debug_prints_enabled(void);
        }                                                               \
    } while (0)

-#define UVM_ASSERT_PRINT(fmt, ...) \
+#define UVM_ERR_PRINT_ALWAYS(fmt, ...) \
    UVM_PRINT_FUNC_PREFIX(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

-#define UVM_ASSERT_PRINT_RL(fmt, ...) \
+#define UVM_ERR_PRINT_ALWAYS_RL(fmt, ...) \
    UVM_PRINT_FUNC_PREFIX(printk_ratelimited, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

 #define UVM_ERR_PRINT(fmt, ...) \
@@ -95,13 +101,16 @@ bool uvm_debug_prints_enabled(void);
 #define UVM_DBG_PRINT(fmt, ...) \
    UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

-#define UVM_DBG_PRINT_RL(fmt, ...)                              \
+#define UVM_DBG_PRINT_RL(fmt, ...) \
    UVM_PRINT_FUNC_PREFIX_CHECK(printk_ratelimited, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

+// UVM_INFO_PRINT prints in all modes (including in the release mode.) It is
+// used for relaying driver-level information, rather than detailed debugging
+// information; therefore, it does not add the "pretty long prefix".
 #define UVM_INFO_PRINT(fmt, ...) \
-    UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
+    UVM_PRINT_FUNC_SHORT_PREFIX(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)

-#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...)                        \
+#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
    UVM_ERR_PRINT("ERROR: %s : " msg "\n", nvstatusToString(rmStatus), ##__VA_ARGS__)

 #define UVM_PANIC()             UVM_PRINT_FUNC(panic, "\n")
@@ -134,13 +143,13 @@ void on_uvm_test_fail(void);
 // Unlike on_uvm_test_fail it provides 'panic' coverity semantics
 void on_uvm_assert(void);

-#define _UVM_ASSERT_MSG(expr, cond, fmt, ...)                                                   \
-    do {                                                                                        \
-        if (unlikely(!(expr))) {                                                                \
-            UVM_ASSERT_PRINT("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);  \
-            dump_stack();                                                                       \
-            on_uvm_assert();                                                                    \
-        }                                                                                       \
+#define _UVM_ASSERT_MSG(expr, cond, fmt, ...)                                                       \
+    do {                                                                                            \
+        if (unlikely(!(expr))) {                                                                    \
+            UVM_ERR_PRINT_ALWAYS("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);  \
+            dump_stack();                                                                           \
+            on_uvm_assert();                                                                        \
+        }                                                                                           \
    } while (0)

 // Prevent function calls in expr and the print argument list from being
@@ -151,7 +160,8 @@ void on_uvm_assert(void);
        UVM_NO_PRINT(fmt, ##__VA_ARGS__);       \
    } while (0)

-// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity builds
+// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity
+// builds.
 #if UVM_IS_DEBUG() || defined __COVERITY__
    #define UVM_ASSERT_MSG(expr, fmt, ...)  _UVM_ASSERT_MSG(expr, #expr, ": " fmt, ##__VA_ARGS__)
    #define UVM_ASSERT(expr)                _UVM_ASSERT_MSG(expr, #expr, "\n")
@@ -174,16 +184,16 @@ extern bool uvm_release_asserts_set_global_error_for_tests;
 // Given these are enabled for release builds, we need to be more cautious than
 // in UVM_ASSERT(). Use a ratelimited print and only dump the stack if a module
 // param is enabled.
-#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...)                                                   \
-    do {                                                                                                \
-        if (uvm_release_asserts && unlikely(!(expr))) {                                                 \
-            UVM_ASSERT_PRINT_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);       \
-            if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
-                uvm_global_set_fatal_error(NV_ERR_INVALID_STATE);                                       \
-            if (uvm_release_asserts_dump_stack)                                                         \
-                dump_stack();                                                                           \
-            on_uvm_assert();                                                                            \
-        }                                                                                               \
+#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...)                                                       \
+    do {                                                                                                    \
+        if (uvm_release_asserts && unlikely(!(expr))) {                                                     \
+            UVM_ERR_PRINT_ALWAYS_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__);       \
+            if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests)     \
+                uvm_global_set_fatal_error(NV_ERR_INVALID_STATE);                                           \
+            if (uvm_release_asserts_dump_stack)                                                             \
+                dump_stack();                                                                               \
+            on_uvm_assert();                                                                                \
+        }                                                                                                   \
    } while (0)

 #define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...)  _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
@@ -240,15 +250,6 @@ static inline NvBool uvm_ranges_overlap(NvU64 a_start, NvU64 a_end, NvU64 b_star
    return a_end >= b_start && b_end >= a_start;
 }

-static int debug_mode(void)
-{
-#ifdef DEBUG
-    return 1;
-#else
-    return 0;
-#endif
-}
-
 static inline void kmem_cache_destroy_safe(struct kmem_cache **ppCache)
 {
    if (ppCache)
@@ -326,22 +327,6 @@ typedef struct
    NvHandle user_object;
 } uvm_rm_user_object_t;

-typedef enum
-{
-    UVM_FD_UNINITIALIZED,
-    UVM_FD_INITIALIZING,
-    UVM_FD_VA_SPACE,
-    UVM_FD_MM,
-    UVM_FD_COUNT
-} uvm_fd_type_t;
-
-// This should be large enough to fit the valid values from uvm_fd_type_t above.
-// Note we can't use order_base_2(UVM_FD_COUNT) to define this because our code
-// coverage tool fails due when the preprocessor expands that to a huge mess of
-// ternary operators.
-#define UVM_FD_TYPE_BITS 2
-#define UVM_FD_TYPE_MASK ((1UL << UVM_FD_TYPE_BITS) - 1)
-
 // Macro used to compare two values for types that support less than operator.
 // It returns -1 if a < b, 1 if a > b and 0 if a == 0
 #define UVM_CMP_DEFAULT(a,b)              \
@@ -364,37 +349,13 @@ typedef enum
 // file. A NULL input returns false.
 bool uvm_file_is_nvidia_uvm(struct file *filp);

-// Returns the type of data filp->private_data contains to and if ptr_val !=
-// NULL returns the value of the pointer.
-uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val);
-
-// Returns the pointer stored in filp->private_data if the type
-// matches, otherwise returns NULL.
-void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type);
-
-// Reads the first word in the supplied struct page.
-static inline void uvm_touch_page(struct page *page)
-{
-    char *mapping;
-
-    UVM_ASSERT(page);
-
-    mapping = (char *) kmap(page);
-    (void)READ_ONCE(*mapping);
-    kunmap(page);
-}
+// Like uvm_file_is_nvidia_uvm(), but further requires that the input file
+// represent a UVM VA space (has fd type UVM_FD_VA_SPACE).
+bool uvm_file_is_nvidia_uvm_va_space(struct file *filp);

 // Return true if the VMA is one used by UVM managed allocations.
 bool uvm_vma_is_managed(struct vm_area_struct *vma);

-static bool uvm_platform_uses_canonical_form_address(void)
-{
-    if (NVCPU_IS_PPC64LE)
-        return false;
-
-    return true;
-}
-
 // Similar to the GPU MMU HAL num_va_bits(), it returns the CPU's num_va_bits().
 static NvU32 uvm_cpu_num_va_bits(void)
 {
@@ -410,7 +371,7 @@ static void uvm_get_unaddressable_range(NvU32 num_va_bits, NvU64 *first, NvU64 *

    // Maxwell GPUs (num_va_bits == 40b) do not support canonical form address
    // even when plugged into platforms using it.
-    if (uvm_platform_uses_canonical_form_address() && num_va_bits > 40) {
+    if (num_va_bits > 40) {
        *first = 1ULL << (num_va_bits - 1);
        *outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits));
    }
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.c
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021-2024 NVIDIA Corporation
+    Copyright (c) 2021-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -532,7 +532,7 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
 {
    NV_STATUS status;
    NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
-    UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
+    UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;

    // There is no dedicated lock for the CSL context associated with replayable
    // faults. The mutual exclusion required by the RM CSL API is enforced by
@@ -571,7 +571,7 @@ void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status;
    NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
-    UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
+    UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;

    // See comment in uvm_conf_computing_fault_decrypt
    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
@@ -708,7 +708,12 @@ void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu)

 bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu)
 {
-    return gpu->channel_manager->conf_computing.key_rotation_enabled;
+    UVM_ASSERT(gpu);
+
+    // If the channel_manager is not set, we're in channel manager destroy
+    // path after the pointer was NULL-ed. Chances are that other key rotation
+    // infrastructure is not available either. Disallow the key rotation.
+    return gpu->channel_manager && gpu->channel_manager->conf_computing.key_rotation_enabled;
 }

 bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool)
--- a/kernel-open/nvidia-uvm/uvm_fd_type.c
+++ b/kernel-open/nvidia-uvm/uvm_fd_type.c
@@ -0,0 +1,114 @@
+/*******************************************************************************
+    Copyright (c) 2024 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "uvm_fd_type.h"
+#include "uvm_common.h"
+#include "uvm_linux.h"
+#include "uvm_va_space.h"
+#include "uvm_test_file.h"
+
+uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
+{
+    unsigned long uptr;
+    uvm_fd_type_t type;
+    void *ptr;
+
+    UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
+
+    uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
+    type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
+    ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
+    BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
+
+    switch (type) {
+        case UVM_FD_UNINITIALIZED:
+        case UVM_FD_INITIALIZING:
+            UVM_ASSERT(!ptr);
+            break;
+
+        case UVM_FD_VA_SPACE:
+            UVM_ASSERT(ptr);
+            BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
+            break;
+
+        case UVM_FD_MM:
+            UVM_ASSERT(ptr);
+            BUILD_BUG_ON(__alignof__(struct file) < (1UL << UVM_FD_TYPE_BITS));
+            break;
+
+        case UVM_FD_TEST:
+            UVM_ASSERT(ptr);
+            BUILD_BUG_ON(__alignof__(uvm_test_file_t) < (1UL << UVM_FD_TYPE_BITS));
+            break;
+
+        default:
+            UVM_ASSERT(0);
+    }
+
+    if (ptr_val)
+        *ptr_val = ptr;
+
+    return type;
+}
+
+void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type)
+{
+    void *ptr;
+
+    UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
+
+    if (uvm_fd_type(filp, &ptr) == type)
+        return ptr;
+    else
+        return NULL;
+}
+
+uvm_fd_type_t uvm_fd_type_init_cas(struct file *filp)
+{
+    long old = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
+    return (uvm_fd_type_t)(old & UVM_FD_TYPE_MASK);
+}
+
+NV_STATUS uvm_fd_type_init(struct file *filp)
+{
+    uvm_fd_type_t old = uvm_fd_type_init_cas(filp);
+
+    if (old != UVM_FD_UNINITIALIZED)
+        return NV_ERR_IN_USE;
+
+    return NV_OK;
+}
+
+void uvm_fd_type_set(struct file *filp, uvm_fd_type_t type, void *ptr)
+{
+    void *tmp_ptr;
+    UVM_ASSERT(uvm_fd_type(filp, &tmp_ptr) == UVM_FD_INITIALIZING);
+    UVM_ASSERT(!tmp_ptr);
+
+    if (type == UVM_FD_UNINITIALIZED)
+        UVM_ASSERT(!ptr);
+
+    UVM_ASSERT(((uintptr_t)ptr & type) == 0);
+
+    atomic_long_set_release((atomic_long_t *)&filp->private_data, (uintptr_t)ptr | type);
+}
--- a/kernel-open/nvidia-uvm/uvm_fd_type.h
+++ b/kernel-open/nvidia-uvm/uvm_fd_type.h
@@ -0,0 +1,69 @@
+/*******************************************************************************
+    Copyright (c) 2024 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be
+        included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+#ifndef __UVM_FD_TYPE_H__
+#define __UVM_FD_TYPE_H__
+
+#include "nvstatus.h"
+
+typedef enum
+{
+    UVM_FD_UNINITIALIZED,
+    UVM_FD_INITIALIZING,
+    UVM_FD_VA_SPACE,
+    UVM_FD_MM,
+    UVM_FD_TEST,
+    UVM_FD_COUNT
+} uvm_fd_type_t;
+
+// This should be large enough to fit the valid values from uvm_fd_type_t above.
+// Note we can't use order_base_2(UVM_FD_COUNT) to define this because our code
+// coverage tool fails when the preprocessor expands that to a huge mess of
+// ternary operators.
+#define UVM_FD_TYPE_BITS 3
+#define UVM_FD_TYPE_MASK ((1UL << UVM_FD_TYPE_BITS) - 1)
+
+struct file;
+
+// Returns the type of data filp->private_data contains and if ptr_val != NULL
+// returns the value of the pointer.
+uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val);
+
+// Returns the pointer stored in filp->private_data if the type matches,
+// otherwise returns NULL.
+void *uvm_fd_get_type(struct file *filp, uvm_fd_type_t type);
+
+// Does atomic CAS on filp->private_data, expecting UVM_FD_UNINITIALIZED and
+// swapping in UVM_FD_INITIALIZING. Returns the old type regardless of CAS
+// success.
+uvm_fd_type_t uvm_fd_type_init_cas(struct file *filp);
+
+// Like uvm_fd_type_init_cas() but returns NV_OK on CAS success and
+// NV_ERR_IN_USE on CAS failure.
+NV_STATUS uvm_fd_type_init(struct file *filp);
+
+// Assigns {type, ptr} to filp. filp's current type must be UVM_FD_INITIALIZING.
+// If the new type is UVM_FD_UNINITIALIZED, ptr must be NULL.
+void uvm_fd_type_set(struct file *filp, uvm_fd_type_t type, void *ptr);
+
+#endif // __UVM_FD_TYPE_H__
--- a/kernel-open/nvidia-uvm/uvm_forward_decl.h
+++ b/kernel-open/nvidia-uvm/uvm_forward_decl.h
@@ -93,13 +93,12 @@ typedef struct uvm_service_block_context_struct uvm_service_block_context_t;

 typedef struct uvm_ats_fault_invalidate_struct uvm_ats_fault_invalidate_t;

-typedef struct uvm_replayable_fault_buffer_info_struct uvm_replayable_fault_buffer_info_t;
-typedef struct uvm_non_replayable_fault_buffer_info_struct uvm_non_replayable_fault_buffer_info_t;
+typedef struct uvm_replayable_fault_buffer_struct uvm_replayable_fault_buffer_t;
+typedef struct uvm_non_replayable_fault_buffer_struct uvm_non_replayable_fault_buffer_t;
 typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
+typedef struct uvm_access_counter_buffer_struct uvm_access_counter_buffer_t;
 typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
 typedef struct uvm_pmm_sysmem_mappings_struct uvm_pmm_sysmem_mappings_t;

 typedef struct uvm_reverse_map_struct uvm_reverse_map_t;
-
-typedef struct uvm_ibm_npu_struct uvm_ibm_npu_t;
 #endif //__UVM_FORWARD_DECL_H__
--- a/kernel-open/nvidia-uvm/uvm_global.c
+++ b/kernel-open/nvidia-uvm/uvm_global.c
@@ -194,6 +194,12 @@ NV_STATUS uvm_global_init(void)
        goto error;
    }

+    status = uvm_access_counters_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_access_counters_init failed: %s\n", nvstatusToString(status));
+        goto error;
+    }
+
    // This sets up the ISR (interrupt service routine), by hooking into RM's
    // top-half ISR callback. As soon as this call completes, GPU interrupts
    // will start arriving, so it's important to be prepared to receive
@@ -224,8 +230,8 @@ void uvm_global_exit(void)
    nv_kthread_q_stop(&g_uvm_global.deferred_release_q);

    uvm_unregister_callbacks();
+    uvm_access_counters_exit();
    uvm_service_block_context_exit();
-
    uvm_perf_heuristics_exit();
    uvm_perf_events_exit();
    uvm_migrate_exit();
@@ -287,7 +293,7 @@ static NV_STATUS uvm_suspend(void)
    //   * Flush relevant kthread queues (bottom half, etc.)

    // Some locks acquired by this function, such as pm.lock, are released
-    // by uvm_resume().  This is contrary to the lock tracking code's
+    // by uvm_resume(). This is contrary to the lock tracking code's
    // expectations, so lock tracking is disabled.
    uvm_thread_context_lock_disable_tracking();

@@ -304,7 +310,7 @@ static NV_STATUS uvm_suspend(void)
        gpu = uvm_gpu_get(gpu_id);

        // Since fault buffer state may be lost across sleep cycles, UVM must
-        // ensure any outstanding replayable faults are dismissed.  The RM
+        // ensure any outstanding replayable faults are dismissed. The RM
        // guarantees that all user channels have been preempted before
        // uvm_suspend() is called, which implies that no user channels can be
        // stalled on faults when this point is reached.
@@ -330,7 +336,7 @@ static NV_STATUS uvm_suspend(void)
    }

    // Acquire each VA space's lock in write mode to lock out VMA open and
-    // release callbacks.  These entry points do not have feasible early exit
+    // release callbacks. These entry points do not have feasible early exit
    // options, and so aren't suitable for synchronization with pm.lock.
    uvm_mutex_lock(&g_uvm_global.va_spaces.lock);

@@ -360,7 +366,7 @@ static NV_STATUS uvm_resume(void)
    g_uvm_global.pm.is_suspended = false;

    // Some locks released by this function, such as pm.lock, were acquired
-    // by uvm_suspend().  This is contrary to the lock tracking code's
+    // by uvm_suspend(). This is contrary to the lock tracking code's
    // expectations, so lock tracking is disabled.
    uvm_thread_context_lock_disable_tracking();

@@ -392,7 +398,7 @@ static NV_STATUS uvm_resume(void)
    uvm_thread_context_lock_enable_tracking();

    // Force completion of any release callbacks successfully queued for
-    // deferred completion while suspended.  The deferred release
+    // deferred completion while suspended. The deferred release
    // queue is not guaranteed to remain empty following this flush since
    // some threads that failed to acquire pm.lock in uvm_release() may
    // not have scheduled their handlers yet.
@@ -424,7 +430,8 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
    }
    else {
        UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
-                nvstatusToString(error), nvstatusToString(previous_error));
+                nvstatusToString(error),
+                nvstatusToString(previous_error));
    }

    nvUvmInterfaceReportFatalError(error);
--- a/kernel-open/nvidia-uvm/uvm_global.h
+++ b/kernel-open/nvidia-uvm/uvm_global.h
@@ -31,7 +31,6 @@
 #include "uvm_processors.h"
 #include "uvm_gpu.h"
 #include "uvm_lock.h"
-#include "uvm_ats_ibm.h"

 // Global state of the uvm driver
 struct uvm_global_struct
@@ -124,12 +123,6 @@ struct uvm_global_struct
        bool enabled;
    } ats;

-#if UVM_IBM_NPU_SUPPORTED()
-    // On IBM systems this array tracks the active NPUs (the NPUs which are
-    // attached to retained GPUs).
-    uvm_ibm_npu_t npus[NV_MAX_NPUS];
-#endif
-
    // List of all active VA spaces
    struct
    {
--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
@@ -149,7 +149,7 @@ static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
 }

 // Return a PASID to use with the internal address space (AS), or -1 if not
-// supported. This  PASID is needed to enable ATS in the internal AS, but it is
+// supported. This PASID is needed to enable ATS in the internal AS, but it is
 // not used in address translation requests, which only translate GPA->SPA.
 // The buffer management thus remains the same: DMA mapped GPA addresses can
 // be accessed by the GPU, while unmapped addresses can not and any access is
@@ -358,10 +358,11 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
    addr_shift = gpu_addr_shift;

    // Pascal+ GPUs are capable of accessing kernel pointers in various modes
-    // by applying the same upper-bit checks that x86, ARM, and Power
-    // processors do. x86 and ARM use canonical form addresses. For ARM, even
+    // by applying the same upper-bit checks that x86 or ARM CPU processors do.
+    // The x86 and ARM platforms use canonical form addresses. For ARM, even
    // with Top-Byte Ignore enabled, the following logic validates addresses
-    // from the kernel VA range. PowerPC does not use canonical form address.
+    // from the kernel VA range.
+    //
    // The following diagram illustrates the valid (V) VA regions that can be
    // mapped (or addressed) by the GPU/CPU when the CPU uses canonical form.
    // (C) regions are only accessible by the CPU. Similarly, (G) regions
@@ -389,8 +390,8 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
    //                 |VVVVVVVVVVVVVVVV|                 |VVVVVVVVVVVVVVVV|
    //               0 +----------------+               0 +----------------+

-    // On canonical form address platforms and Pascal+ GPUs.
-    if (uvm_platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
+    // On Pascal+ GPUs.
+    if (gpu_addr_shift > 40) {
        // On x86, when cpu_addr_shift > gpu_addr_shift, it means the CPU uses
        // 5-level paging and the GPU is pre-Hopper. On Pascal-Ada GPUs (49b
        // wide VA) we set addr_shift to match a 4-level paging x86 (48b wide).
@@ -431,30 +432,28 @@ NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
    NvU8 addr_shift;
    NvU64 input_addr = addr;

-    if (uvm_platform_uses_canonical_form_address()) {
-        // When the CPU VA width is larger than GPU's, it means that:
-        // On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
-        // On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
-        // We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
-        // behavior of CPUs with smaller (than GPU) VA widths.
-        gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
-        cpu_addr_shift = uvm_cpu_num_va_bits();
+    // When the CPU VA width is larger than GPU's, it means that:
+    // On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
+    // On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
+    // We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
+    // behavior of CPUs with smaller (than GPU) VA widths.
+    gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
+    cpu_addr_shift = uvm_cpu_num_va_bits();

-        if (cpu_addr_shift > gpu_addr_shift)
-            addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
-        else if (gpu_addr_shift == 57)
-            addr_shift = gpu_addr_shift;
-        else
-            addr_shift = cpu_addr_shift;
+    if (cpu_addr_shift > gpu_addr_shift)
+        addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
+    else if (gpu_addr_shift == 57)
+        addr_shift = gpu_addr_shift;
+    else
+        addr_shift = cpu_addr_shift;

-        addr = (NvU64)((NvS64)(addr << (64 - addr_shift)) >> (64 - addr_shift));
+    addr = (NvU64)((NvS64)(addr << (64 - addr_shift)) >> (64 - addr_shift));

-        // This protection acts on when the address is not covered by the GPU's
-        // OOR_ADDR_CHECK. This can only happen when OOR_ADDR_CHECK is in
-        // permissive (NO_CHECK) mode.
-        if ((addr << (64 - gpu_addr_shift)) != (input_addr << (64 - gpu_addr_shift)))
-            return input_addr;
-    }
+    // This protection acts on when the address is not covered by the GPU's
+    // OOR_ADDR_CHECK. This can only happen when OOR_ADDR_CHECK is in
+    // permissive (NO_CHECK) mode.
+    if ((addr << (64 - gpu_addr_shift)) != (input_addr << (64 - gpu_addr_shift)))
+        return input_addr;

    return addr;
 }
@@ -485,7 +484,7 @@ static void gpu_info_print_ce_caps(uvm_gpu_t *gpu, struct seq_file *s)
            continue;

        UVM_SEQ_OR_DBG_PRINT(s, " ce %u pce mask 0x%08x grce %u shared %u sysmem read %u sysmem write %u sysmem %u "
-                             "nvlink p2p %u p2p %u\n",
+                             "nvlink p2p %u p2p %u secure %u\n",
                             i,
                             ce_caps->cePceMask,
                             ce_caps->grce,
@@ -494,7 +493,8 @@ static void gpu_info_print_ce_caps(uvm_gpu_t *gpu, struct seq_file *s)
                             ce_caps->sysmemWrite,
                             ce_caps->sysmem,
                             ce_caps->nvlinkP2p,
-                             ce_caps->p2p);
+                             ce_caps->p2p,
+                             ce_caps->secure);
    }

 out:
@@ -538,7 +538,9 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
    NvU64 num_pages_in;
    NvU64 num_pages_out;
    NvU64 mapped_cpu_pages_size;
-    NvU32 get, put;
+    NvU32 get;
+    NvU32 put;
+    NvU32 i;
    unsigned int cpu;

    UVM_SEQ_OR_DBG_PRINT(s, "GPU %s\n", uvm_gpu_name(gpu));
@@ -593,9 +595,6 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
                             window_size / (1024 * 1024));
    }

-    if (gpu->parent->npu)
-        UVM_SEQ_OR_DBG_PRINT(s, "npu_domain                             %d\n", gpu->parent->npu->pci_domain);
-
    UVM_SEQ_OR_DBG_PRINT(s, "interrupts                             %llu\n", gpu->parent->isr.interrupt_count);

    if (gpu->parent->isr.replayable_faults.handling) {
@@ -608,19 +607,19 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
                                 gpu->parent->isr.replayable_faults.stats.cpu_exec_count[cpu]);
        }
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_buffer_entries       %u\n",
-                             gpu->parent->fault_buffer_info.replayable.max_faults);
+                             gpu->parent->fault_buffer.replayable.max_faults);
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_cached_get           %u\n",
-                             gpu->parent->fault_buffer_info.replayable.cached_get);
+                             gpu->parent->fault_buffer.replayable.cached_get);
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_cached_put           %u\n",
-                             gpu->parent->fault_buffer_info.replayable.cached_put);
+                             gpu->parent->fault_buffer.replayable.cached_put);
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_get                  %u\n",
                             gpu->parent->fault_buffer_hal->read_get(gpu->parent));
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_put                  %u\n",
                             gpu->parent->fault_buffer_hal->read_put(gpu->parent));
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_fault_batch_size     %u\n",
-                             gpu->parent->fault_buffer_info.max_batch_size);
+                             gpu->parent->fault_buffer.max_batch_size);
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_replay_policy        %s\n",
-                             uvm_perf_fault_replay_policy_string(gpu->parent->fault_buffer_info.replayable.replay_policy));
+                             uvm_perf_fault_replay_policy_string(gpu->parent->fault_buffer.replayable.replay_policy));
        UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_num_faults           %llu\n",
                             gpu->parent->stats.num_replayable_faults);
    }
@@ -634,32 +633,35 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
                                 gpu->parent->isr.non_replayable_faults.stats.cpu_exec_count[cpu]);
        }
        UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults_buffer_entries   %u\n",
-                             gpu->parent->fault_buffer_info.non_replayable.max_faults);
+                             gpu->parent->fault_buffer.non_replayable.max_faults);
        UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults_num_faults       %llu\n",
                             gpu->parent->stats.num_non_replayable_faults);
    }

-    if (gpu->parent->isr.access_counters.handling_ref_count > 0) {
-        UVM_SEQ_OR_DBG_PRINT(s, "access_counters_bh                     %llu\n",
-                             gpu->parent->isr.access_counters.stats.bottom_half_count);
-        UVM_SEQ_OR_DBG_PRINT(s, "access_counters_bh/cpu\n");
-        for_each_cpu(cpu, &gpu->parent->isr.access_counters.stats.cpus_used_mask) {
-            UVM_SEQ_OR_DBG_PRINT(s, "    cpu%02u                              %llu\n",
-                                 cpu,
-                                 gpu->parent->isr.access_counters.stats.cpu_exec_count[cpu]);
+    for (i = 0; i < gpu_info->accessCntrBufferCount; i++) {
+        if (gpu->parent->access_counters_supported && gpu->parent->isr.access_counters[i].handling_ref_count > 0) {
+            UVM_SEQ_OR_DBG_PRINT(s, "access_counters_notif_buffer_index     %u\n", i);
+            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_bh                   %llu\n",
+                                 gpu->parent->isr.access_counters[i].stats.bottom_half_count);
+            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_bh/cpu\n");
+            for_each_cpu(cpu, &gpu->parent->isr.access_counters[i].stats.cpus_used_mask) {
+                UVM_SEQ_OR_DBG_PRINT(s, "    cpu%02u                              %llu\n",
+                                     cpu,
+                                     gpu->parent->isr.access_counters[i].stats.cpu_exec_count[cpu]);
+            }
+            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_buffer_entries       %u\n",
+                                 gpu->parent->access_counter_buffer[i].max_notifications);
+            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_cached_get           %u\n",
+                                 gpu->parent->access_counter_buffer[i].cached_get);
+            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_cached_put           %u\n",
+                                 gpu->parent->access_counter_buffer[i].cached_put);
+
+            get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferGet);
+            put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferPut);
+
+            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_get                  %u\n", get);
+            UVM_SEQ_OR_DBG_PRINT(s, "  access_counters_put                  %u\n", put);
        }
-        UVM_SEQ_OR_DBG_PRINT(s, "access_counters_buffer_entries         %u\n",
-                             gpu->parent->access_counter_buffer_info.max_notifications);
-        UVM_SEQ_OR_DBG_PRINT(s, "access_counters_cached_get             %u\n",
-                             gpu->parent->access_counter_buffer_info.cached_get);
-        UVM_SEQ_OR_DBG_PRINT(s, "access_counters_cached_put             %u\n",
-                             gpu->parent->access_counter_buffer_info.cached_put);
-
-        get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer_info.rm_info.pAccessCntrBufferGet);
-        put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer_info.rm_info.pAccessCntrBufferPut);
-
-        UVM_SEQ_OR_DBG_PRINT(s, "access_counters_get                    %u\n", get);
-        UVM_SEQ_OR_DBG_PRINT(s, "access_counters_put                    %u\n", put);
    }

    num_pages_out = atomic64_read(&gpu->parent->stats.num_pages_out);
@@ -694,18 +696,18 @@ gpu_fault_stats_print_common(uvm_parent_gpu_t *parent_gpu, struct seq_file *s)

    UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults      %llu\n", parent_gpu->stats.num_replayable_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "duplicates             %llu\n",
-                         parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults);
+                         parent_gpu->fault_buffer.replayable.stats.num_duplicate_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "faults_by_access_type:\n");
    UVM_SEQ_OR_DBG_PRINT(s, "  prefetch             %llu\n",
-                         parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults);
+                         parent_gpu->fault_buffer.replayable.stats.num_prefetch_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "  read                 %llu\n",
-                         parent_gpu->fault_buffer_info.replayable.stats.num_read_faults);
+                         parent_gpu->fault_buffer.replayable.stats.num_read_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "  write                %llu\n",
-                         parent_gpu->fault_buffer_info.replayable.stats.num_write_faults);
+                         parent_gpu->fault_buffer.replayable.stats.num_write_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "  atomic               %llu\n",
-                         parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults);
-    num_pages_out = atomic64_read(&parent_gpu->fault_buffer_info.replayable.stats.num_pages_out);
-    num_pages_in = atomic64_read(&parent_gpu->fault_buffer_info.replayable.stats.num_pages_in);
+                         parent_gpu->fault_buffer.replayable.stats.num_atomic_faults);
+    num_pages_out = atomic64_read(&parent_gpu->fault_buffer.replayable.stats.num_pages_out);
+    num_pages_in = atomic64_read(&parent_gpu->fault_buffer.replayable.stats.num_pages_in);
    UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
    UVM_SEQ_OR_DBG_PRINT(s, "  num_pages_in         %llu (%llu MB)\n", num_pages_in,
                         (num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
@@ -713,25 +715,25 @@ gpu_fault_stats_print_common(uvm_parent_gpu_t *parent_gpu, struct seq_file *s)
                         (num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
    UVM_SEQ_OR_DBG_PRINT(s, "replays:\n");
    UVM_SEQ_OR_DBG_PRINT(s, "  start                %llu\n",
-                         parent_gpu->fault_buffer_info.replayable.stats.num_replays);
+                         parent_gpu->fault_buffer.replayable.stats.num_replays);
    UVM_SEQ_OR_DBG_PRINT(s, "  start_ack_all        %llu\n",
-                         parent_gpu->fault_buffer_info.replayable.stats.num_replays_ack_all);
+                         parent_gpu->fault_buffer.replayable.stats.num_replays_ack_all);
    UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults  %llu\n", parent_gpu->stats.num_non_replayable_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "faults_by_access_type:\n");
    UVM_SEQ_OR_DBG_PRINT(s, "  read                 %llu\n",
-                         parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults);
+                         parent_gpu->fault_buffer.non_replayable.stats.num_read_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "  write                %llu\n",
-                         parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults);
+                         parent_gpu->fault_buffer.non_replayable.stats.num_write_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "  atomic               %llu\n",
-                         parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults);
+                         parent_gpu->fault_buffer.non_replayable.stats.num_atomic_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "faults_by_addressing:\n");
    UVM_SEQ_OR_DBG_PRINT(s, "  virtual              %llu\n",
                         parent_gpu->stats.num_non_replayable_faults -
-                         parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults);
+                         parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults);
    UVM_SEQ_OR_DBG_PRINT(s, "  physical             %llu\n",
-                         parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults);
-    num_pages_out = atomic64_read(&parent_gpu->fault_buffer_info.non_replayable.stats.num_pages_out);
-    num_pages_in = atomic64_read(&parent_gpu->fault_buffer_info.non_replayable.stats.num_pages_in);
+                         parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults);
+    num_pages_out = atomic64_read(&parent_gpu->fault_buffer.non_replayable.stats.num_pages_out);
+    num_pages_in = atomic64_read(&parent_gpu->fault_buffer.non_replayable.stats.num_pages_in);
    UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
    UVM_SEQ_OR_DBG_PRINT(s, "  num_pages_in         %llu (%llu MB)\n", num_pages_in,
                         (num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
@@ -743,16 +745,25 @@ static void gpu_access_counters_print_common(uvm_parent_gpu_t *parent_gpu, struc
 {
    NvU64 num_pages_in;
    NvU64 num_pages_out;
+    NvU32 i;

    UVM_ASSERT(uvm_procfs_is_debug_enabled());

-    num_pages_out = atomic64_read(&parent_gpu->access_counter_buffer_info.stats.num_pages_out);
-    num_pages_in = atomic64_read(&parent_gpu->access_counter_buffer_info.stats.num_pages_in);
-    UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
-    UVM_SEQ_OR_DBG_PRINT(s, "  num_pages_in         %llu (%llu MB)\n", num_pages_in,
-                         (num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
-    UVM_SEQ_OR_DBG_PRINT(s, "  num_pages_out        %llu (%llu MB)\n", num_pages_out,
-                         (num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
+    // procfs_files are created before gpu_init_isr, we need to check if the
+    // access_counter_buffer is allocated.
+    if (parent_gpu->access_counter_buffer) {
+        for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++) {
+            uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[i];
+
+            num_pages_out = atomic64_read(&access_counters->stats.num_pages_out);
+            num_pages_in = atomic64_read(&access_counters->stats.num_pages_in);
+            UVM_SEQ_OR_DBG_PRINT(s, "migrations - buffer index %u:\n", i);
+            UVM_SEQ_OR_DBG_PRINT(s, "  num_pages_in         %llu (%llu MB)\n", num_pages_in,
+                                 (num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
+            UVM_SEQ_OR_DBG_PRINT(s, "  num_pages_out        %llu (%llu MB)\n", num_pages_out,
+                                 (num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
+        }
+    }
 }

 // This function converts an index of 2D array of size [N x N] into an index
@@ -892,7 +903,7 @@ static int nv_procfs_read_gpu_info(struct seq_file *s, void *v)
    uvm_gpu_t *gpu = (uvm_gpu_t *)s->private;

    if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
-            return -EAGAIN;
+        return -EAGAIN;

    gpu_info_print_common(gpu, s);

@@ -911,7 +922,7 @@ static int nv_procfs_read_gpu_fault_stats(struct seq_file *s, void *v)
    uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)s->private;

    if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
-            return -EAGAIN;
+        return -EAGAIN;

    gpu_fault_stats_print_common(parent_gpu, s);

@@ -930,7 +941,7 @@ static int nv_procfs_read_gpu_access_counters(struct seq_file *s, void *v)
    uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)s->private;

    if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
-            return -EAGAIN;
+        return -EAGAIN;

    gpu_access_counters_print_common(parent_gpu, s);

@@ -1027,7 +1038,7 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
 {
    struct proc_dir_entry *gpu_base_dir_entry;
    char symlink_name[16]; // Hold a uvm_gpu_id_t value in decimal.
-    char uuid_buffer[max(UVM_PARENT_GPU_UUID_STRING_LENGTH, UVM_GPU_UUID_STRING_LENGTH)];
+    char uuid_buffer[NV_MAX(UVM_PARENT_GPU_UUID_STRING_LENGTH, UVM_GPU_UUID_STRING_LENGTH)];
    char gpu_dir_name[sizeof(symlink_name) + sizeof(uuid_buffer) + 1];

    if (!uvm_procfs_is_enabled())
@@ -1182,7 +1193,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
    uvm_uuid_copy(&parent_gpu->uuid, gpu_uuid);
    uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
    uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
-    uvm_sema_init(&parent_gpu->isr.access_counters.service_lock, 1, UVM_LOCK_ORDER_ISR);
+    uvm_mutex_init(&parent_gpu->access_counters_enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
    uvm_spin_lock_irqsave_init(&parent_gpu->isr.interrupts_lock, UVM_LOCK_ORDER_LEAF);
    uvm_spin_lock_init(&parent_gpu->instance_ptr_table_lock, UVM_LOCK_ORDER_LEAF);
    uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
@@ -1221,7 +1232,7 @@ static uvm_gpu_t *alloc_gpu(uvm_parent_gpu_t *parent_gpu, uvm_gpu_id_t gpu_id)

    // Initialize enough of the gpu struct for remove_gpu to be called
    gpu->magic = UVM_GPU_MAGIC_VALUE;
-    uvm_spin_lock_init(&gpu->peer_info.peer_gpus_lock, UVM_LOCK_ORDER_LEAF);
+    uvm_spin_lock_init(&gpu->peer_info.peer_gpu_lock, UVM_LOCK_ORDER_LEAF);

    sub_processor_index = uvm_id_sub_processor_index(gpu_id);
    parent_gpu->gpus[sub_processor_index] = gpu;
@@ -1235,13 +1246,15 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
    NvU32 num_entries;
    NvU64 va_size;
    NvU64 va_per_entry;
+    NvU64 physical_address;
+    NvU64 dma_address;
    uvm_mmu_page_table_alloc_t *tree_alloc;

    status = uvm_page_tree_init(gpu,
                                NULL,
                                UVM_PAGE_TREE_TYPE_KERNEL,
                                gpu->big_page.internal_size,
-                                uvm_get_page_tree_location(gpu->parent),
+                                uvm_get_page_tree_location(gpu),
                                &gpu->address_space_tree);
    if (status != NV_OK) {
        UVM_ERR_PRINT("Initializing the page tree failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
@@ -1265,12 +1278,17 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
                   gpu->parent->rm_va_size,
                   va_per_entry);

-    tree_alloc = uvm_page_tree_pdb(&gpu->address_space_tree);
+    tree_alloc = uvm_page_tree_pdb_internal(&gpu->address_space_tree);
+    if (tree_alloc->addr.aperture == UVM_APERTURE_VID)
+        physical_address = tree_alloc->addr.address;
+    else
+        physical_address = page_to_phys(tree_alloc->handle.page);
    status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
-                                                               tree_alloc->addr.address,
+                                                               physical_address,
                                                               num_entries,
                                                               tree_alloc->addr.aperture == UVM_APERTURE_VID,
-                                                               gpu_get_internal_pasid(gpu)));
+                                                               gpu_get_internal_pasid(gpu),
+                                                               &dma_address));
    if (status != NV_OK) {
        UVM_ERR_PRINT("nvUvmInterfaceSetPageDirectory() failed: %s, GPU %s\n",
                      nvstatusToString(status),
@@ -1278,6 +1296,9 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
        return status;
    }

+    if (tree_alloc->addr.aperture == UVM_APERTURE_SYS)
+        gpu->address_space_tree.pdb_rm_dma_address = uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_address);
+
    gpu->rm_address_space_moved_to_page_tree = true;

    return NV_OK;
@@ -1390,13 +1411,12 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
    parent_gpu->egm.enabled = gpu_info->egmEnabled;
    parent_gpu->egm.local_peer_id = gpu_info->egmPeerId;
    parent_gpu->egm.base_address = gpu_info->egmBaseAddr;
+    parent_gpu->access_counters_supported = (gpu_info->accessCntrBufferCount != 0);

    status = uvm_rm_locked_call(nvUvmInterfaceGetFbInfo(parent_gpu->rm_device, &fb_info));
    if (status != NV_OK)
        return status;

-    parent_gpu->sli_enabled = (gpu_info->subdeviceCount > 1);
-
    if (!fb_info.bZeroFb)
        parent_gpu->max_allocatable_address = fb_info.maxAllocatableAddress;

@@ -1616,7 +1636,7 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
    // trackers.
    if (sync_replay_tracker) {
        uvm_parent_gpu_replayable_faults_isr_lock(parent_gpu);
-        status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.replayable.replay_tracker);
+        status = uvm_tracker_wait(&parent_gpu->fault_buffer.replayable.replay_tracker);
        uvm_parent_gpu_replayable_faults_isr_unlock(parent_gpu);

        if (status != NV_OK)
@@ -1627,7 +1647,7 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
    // VA block trackers, too.
    if (sync_clear_faulted_tracker) {
        uvm_parent_gpu_non_replayable_faults_isr_lock(parent_gpu);
-        status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.non_replayable.clear_faulted_tracker);
+        status = uvm_tracker_wait(&parent_gpu->fault_buffer.non_replayable.clear_faulted_tracker);
        uvm_parent_gpu_non_replayable_faults_isr_unlock(parent_gpu);

        if (status != NV_OK)
@@ -1635,13 +1655,20 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
    }

    // Sync the access counter clear tracker too.
-    if (parent_gpu->access_counters_supported) {
-        uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
-        status = uvm_tracker_wait(&parent_gpu->access_counter_buffer_info.clear_tracker);
-        uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
+    if (parent_gpu->access_counters_supported && parent_gpu->access_counter_buffer) {
+        NvU32 notif_buf_index;
+        for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
+            uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[notif_buf_index];

-        if (status != NV_OK)
-            UVM_ASSERT(status == uvm_global_get_status());
+            if (access_counters->rm_info.accessCntrBufferHandle != 0) {
+                uvm_access_counters_isr_lock(access_counters);
+                status = uvm_tracker_wait(&access_counters->clear_tracker);
+                uvm_access_counters_isr_unlock(access_counters);
+
+                if (status != NV_OK)
+                    UVM_ASSERT(status == uvm_global_get_status());
+            }
+        }
    }
 }

@@ -1680,15 +1707,11 @@ static void deinit_parent_gpu(uvm_parent_gpu_t *parent_gpu)
    UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->instance_ptr_table));
    UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->tsg_table));

-    // Access counters should have been disabled when the GPU is no longer
-    // registered in any VA space.
-    UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
+    deinit_parent_procfs_files(parent_gpu);

    // Return ownership to RM
    uvm_parent_gpu_deinit_isr(parent_gpu);

-    deinit_parent_procfs_files(parent_gpu);
-
    uvm_pmm_devmem_deinit(parent_gpu);
    uvm_ats_remove_gpu(parent_gpu);

@@ -1794,14 +1817,14 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
        switch (fault_entry->fault_access_type)
        {
            case UVM_FAULT_ACCESS_TYPE_READ:
-                ++parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults;
+                ++parent_gpu->fault_buffer.non_replayable.stats.num_read_faults;
                break;
            case UVM_FAULT_ACCESS_TYPE_WRITE:
-                ++parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults;
+                ++parent_gpu->fault_buffer.non_replayable.stats.num_write_faults;
                break;
            case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
            case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
-                ++parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults;
+                ++parent_gpu->fault_buffer.non_replayable.stats.num_atomic_faults;
                break;
            default:
                UVM_ASSERT_MSG(false, "Invalid access type for non-replayable faults\n");
@@ -1809,7 +1832,7 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
        }

        if (!fault_entry->is_virtual)
-            ++parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults;
+            ++parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults;

        ++parent_gpu->stats.num_non_replayable_faults;

@@ -1821,23 +1844,23 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
    switch (fault_entry->fault_access_type)
    {
        case UVM_FAULT_ACCESS_TYPE_PREFETCH:
-            ++parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults;
+            ++parent_gpu->fault_buffer.replayable.stats.num_prefetch_faults;
            break;
        case UVM_FAULT_ACCESS_TYPE_READ:
-            ++parent_gpu->fault_buffer_info.replayable.stats.num_read_faults;
+            ++parent_gpu->fault_buffer.replayable.stats.num_read_faults;
            break;
        case UVM_FAULT_ACCESS_TYPE_WRITE:
-            ++parent_gpu->fault_buffer_info.replayable.stats.num_write_faults;
+            ++parent_gpu->fault_buffer.replayable.stats.num_write_faults;
            break;
        case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
        case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
-            ++parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults;
+            ++parent_gpu->fault_buffer.replayable.stats.num_atomic_faults;
            break;
        default:
            break;
    }
    if (is_duplicate || fault_entry->filtered)
-        ++parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults;
+        ++parent_gpu->fault_buffer.replayable.stats.num_duplicate_faults;

    ++parent_gpu->stats.num_replayable_faults;
 }
@@ -1901,21 +1924,29 @@ static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_

    if (gpu_dst) {
        atomic64_add(pages, &gpu_dst->parent->stats.num_pages_in);
-        if (is_replayable_fault)
-            atomic64_add(pages, &gpu_dst->parent->fault_buffer_info.replayable.stats.num_pages_in);
-        else if (is_non_replayable_fault)
-            atomic64_add(pages, &gpu_dst->parent->fault_buffer_info.non_replayable.stats.num_pages_in);
-        else if (is_access_counter)
-            atomic64_add(pages, &gpu_dst->parent->access_counter_buffer_info.stats.num_pages_in);
+        if (is_replayable_fault) {
+            atomic64_add(pages, &gpu_dst->parent->fault_buffer.replayable.stats.num_pages_in);
+        }
+        else if (is_non_replayable_fault) {
+            atomic64_add(pages, &gpu_dst->parent->fault_buffer.non_replayable.stats.num_pages_in);
+        }
+        else if (is_access_counter) {
+            NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
+            atomic64_add(pages, &gpu_dst->parent->access_counter_buffer[index].stats.num_pages_in);
+        }
    }
    if (gpu_src) {
        atomic64_add(pages, &gpu_src->parent->stats.num_pages_out);
-        if (is_replayable_fault)
-            atomic64_add(pages, &gpu_src->parent->fault_buffer_info.replayable.stats.num_pages_out);
-        else if (is_non_replayable_fault)
-            atomic64_add(pages, &gpu_src->parent->fault_buffer_info.non_replayable.stats.num_pages_out);
-        else if (is_access_counter)
-            atomic64_add(pages, &gpu_src->parent->access_counter_buffer_info.stats.num_pages_out);
+        if (is_replayable_fault) {
+            atomic64_add(pages, &gpu_src->parent->fault_buffer.replayable.stats.num_pages_out);
+        }
+        else if (is_non_replayable_fault) {
+            atomic64_add(pages, &gpu_src->parent->fault_buffer.non_replayable.stats.num_pages_out);
+        }
+        else if (is_access_counter) {
+            NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
+            atomic64_add(pages, &gpu_src->parent->access_counter_buffer[index].stats.num_pages_out);
+        }
    }
 }

@@ -1929,8 +1960,9 @@ static void uvm_param_conf(void)
    }
    else {
        if (strcmp(uvm_peer_copy, UVM_PARAM_PEER_COPY_PHYSICAL) != 0) {
-            pr_info("Invalid value for uvm_peer_copy = %s, using %s instead.\n",
-                    uvm_peer_copy, UVM_PARAM_PEER_COPY_PHYSICAL);
+            UVM_INFO_PRINT("Invalid value for uvm_peer_copy = %s, using %s instead.\n",
+                           uvm_peer_copy,
+                           UVM_PARAM_PEER_COPY_PHYSICAL);
        }

        g_uvm_global.peer_copy_mode = UVM_GPU_PEER_COPY_MODE_PHYSICAL;
@@ -2397,6 +2429,7 @@ static NV_STATUS peers_init(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *pe
 {
    NV_STATUS status;

+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
    UVM_ASSERT(peer_caps->ref_count == 0);

    status = parent_peers_retain(gpu0->parent, gpu1->parent);
@@ -2419,25 +2452,13 @@ static NV_STATUS peers_init(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *pe
    UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
    UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);

-    // In the case of NVLINK peers, this initialization will happen during
-    // add_gpu. As soon as the peer info table is assigned below, the access
-    // counter bottom half could start operating on the GPU being newly
-    // added and inspecting the peer caps, so all of the appropriate
-    // initialization must happen before this point.
-    uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
-
+    uvm_spin_lock(&gpu0->peer_info.peer_gpu_lock);
    uvm_processor_mask_set(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
-    UVM_ASSERT(gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] == NULL);
-    gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = gpu1;
-
-    uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
-    uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
+    uvm_spin_unlock(&gpu0->peer_info.peer_gpu_lock);

+    uvm_spin_lock(&gpu1->peer_info.peer_gpu_lock);
    uvm_processor_mask_set(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
-    UVM_ASSERT(gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] == NULL);
-    gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = gpu0;
-
-    uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
+    uvm_spin_unlock(&gpu1->peer_info.peer_gpu_lock);

    return NV_OK;

@@ -2465,18 +2486,18 @@ static NV_STATUS peers_retain(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)

 static void peers_destroy(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *peer_caps)
 {
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
    uvm_mmu_destroy_peer_identity_mappings(gpu0, gpu1);
    uvm_mmu_destroy_peer_identity_mappings(gpu1, gpu0);

-    uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
+    uvm_spin_lock(&gpu0->peer_info.peer_gpu_lock);
    uvm_processor_mask_clear(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
-    gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = NULL;
-    uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
+    uvm_spin_unlock(&gpu0->peer_info.peer_gpu_lock);

-    uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
+    uvm_spin_lock(&gpu1->peer_info.peer_gpu_lock);
    uvm_processor_mask_clear(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
-    gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = NULL;
-    uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
+    uvm_spin_unlock(&gpu1->peer_info.peer_gpu_lock);

    // Flush the access counter buffer to avoid getting stale notifications for
    // accesses to GPUs to which peer access is being disabled. This is also
@@ -2690,7 +2711,7 @@ static void remove_gpu(uvm_gpu_t *gpu)
    uvm_processor_mask_clear(&g_uvm_global.retained_gpus, gpu->id);

    // If the parent is being freed, stop scheduling new bottom halves and
-    // update relevant software state.  Else flush any pending bottom halves
+    // update relevant software state. Else flush any pending bottom halves
    // before continuing.
    if (free_parent)
        uvm_parent_gpu_disable_isr(parent_gpu);
@@ -2713,6 +2734,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
                         const UvmGpuInfo *gpu_info,
                         const UvmGpuPlatformInfo *gpu_platform_info,
                         uvm_parent_gpu_t *parent_gpu,
+                         const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
                         uvm_gpu_t **gpu_out)
 {
    NV_STATUS status;
@@ -2725,6 +2747,9 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
        status = alloc_parent_gpu(gpu_uuid, uvm_parent_gpu_id_from_gpu_id(gpu_id), &parent_gpu);
        if (status != NV_OK)
            return status;
+
+        if (uvm_enable_builtin_tests)
+            parent_gpu->test = *parent_gpu_error;
    }

    gpu = alloc_gpu(parent_gpu, gpu_id);
@@ -2794,7 +2819,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
        // Clear the interrupt bit and force the re-evaluation of the interrupt
        // condition to ensure that we don't miss any pending interrupt
        parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
-                                                              parent_gpu->fault_buffer_info.replayable.cached_get);
+                                                              parent_gpu->fault_buffer.replayable.cached_get);
    }

    // Access counters are enabled on demand
@@ -2837,6 +2862,7 @@ error:
 // the partition.
 static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
                                           const uvm_rm_user_object_t *user_rm_device,
+                                           const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
                                           uvm_gpu_t **gpu_out)
 {
    NV_STATUS status = NV_OK;
@@ -2888,7 +2914,7 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
        if (status != NV_OK)
            goto error_unregister;

-        status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, &gpu);
+        status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, parent_gpu_error, &gpu);
        if (status != NV_OK)
            goto error_unregister;
    }
@@ -2913,11 +2939,12 @@ error_free_gpu_info:

 NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
                                 const uvm_rm_user_object_t *user_rm_device,
+                                 const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
                                 uvm_gpu_t **gpu_out)
 {
    NV_STATUS status;
    uvm_mutex_lock(&g_uvm_global.global_lock);
-    status = gpu_retain_by_uuid_locked(gpu_uuid, user_rm_device, gpu_out);
+    status = gpu_retain_by_uuid_locked(gpu_uuid, user_rm_device, parent_gpu_error, gpu_out);
    uvm_mutex_unlock(&g_uvm_global.global_lock);
    return status;
 }
@@ -3072,118 +3099,81 @@ bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
            return (address.address >= gpu->parent->peer_va_base &&
                    address.address < (gpu->parent->peer_va_base + gpu->parent->peer_va_size));
        }
-    } else {
+    }
+    else {
        uvm_parent_gpu_t *parent_gpu;
        phys_addr_t phys_addr;

        if (uvm_aperture_is_peer(address.aperture)) {
-            bool is_peer = true;
            uvm_parent_processor_mask_t parent_gpus;
            uvm_parent_gpu_t *parent_peer_gpu;

+            // Local EGM accesses don't go over NVLINK
            if (gpu->parent->egm.enabled && address.aperture == gpu->parent->egm.local_peer_id)
                return false;

-            // EGM uses peer IDs but they are different from VIDMEM peer IDs.
-            // Check if the address aperture is an EGM aperture.
+            uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
            uvm_parent_gpus_from_processor_mask(&parent_gpus, &gpu->peer_info.peer_gpu_mask);
-            uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
            for_each_parent_gpu_in_mask(parent_peer_gpu, &parent_gpus) {
-                uvm_aperture_t egm_peer_aperture;
-
                if (!parent_peer_gpu->egm.enabled)
                    continue;

-                egm_peer_aperture = uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu);
-
-                if (address.aperture == egm_peer_aperture) {
-                    is_peer = false;
-                    break;
-                }
+                // EGM uses peer IDs but they are different from VIDMEM peer
+                // IDs.
+                // Check if the address aperture is an EGM aperture.
+                // We should not use remote EGM addresses internally until
+                // NVLINK STO handling is updated to handle EGM.
+                // TODO: Bug: 5068688 [UVM] Detect STO and prevent data leaks
+                //                    when accessing EGM memory
+                // TODO: Bug: 5007527 [UVM] Extend STO recovery to EGM enabled
+                //                    systems
+                UVM_ASSERT(address.aperture != uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu));
            }

-            uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
+            uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
+
+            return true;
+        } else if (address.aperture == UVM_APERTURE_SYS) {
+            bool is_peer = false;
+
+            // GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
+            // either inline, or via ATS.
+            phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
+
+            // Exposed coherent vidmem can be accessed via sys aperture
+            uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
+            for_each_parent_gpu(parent_gpu) {
+                if (parent_gpu == gpu->parent)
+                    continue;
+
+                if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
+                    phys_addr <= parent_gpu->system_bus.memory_window_end) {
+                    is_peer = true;
+                }
+            }
+            uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
            return is_peer;
        }

-        if (address.aperture != UVM_APERTURE_SYS)
-            return false;
-
-        // GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
-        // either inline, or via ATS.
-        phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
-
-        // Exposed coherent vidmem can be accessed via sys aperture
-        uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
-        for_each_parent_gpu(parent_gpu) {
-            if (parent_gpu == gpu->parent)
-                continue;
-
-            if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
-                phys_addr <= parent_gpu->system_bus.memory_window_end) {
-                uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
-                return true;
-            }
-        }
-        uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
+        UVM_ASSERT(address.aperture == UVM_APERTURE_VID);
    }

    return false;
 }

-uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu)
+uvm_aperture_t uvm_get_page_tree_location(const uvm_gpu_t *gpu)
 {
-    // See comment in page_tree_set_location
-    if (uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu) || g_uvm_global.conf_computing_enabled)
+    // See comments in page_tree_set_location
+    if (uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent) || g_uvm_global.conf_computing_enabled)
        return UVM_APERTURE_VID;

+    if (!gpu->mem_info.size)
+        return UVM_APERTURE_SYS;
+

    return UVM_APERTURE_DEFAULT;
 }

-uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr)
-{
-    uvm_processor_id_t id = UVM_ID_INVALID;
-
-    // TODO: Bug 1899622: On P9 systems with multiple CPU sockets, SYS aperture
-    // is also reported for accesses to remote GPUs connected to a different CPU
-    // NUMA domain. We will need to determine the actual processor id using the
-    // reported physical address.
-    if (addr.aperture == UVM_APERTURE_SYS)
-        return UVM_ID_CPU;
-    else if (addr.aperture == UVM_APERTURE_VID)
-        return gpu->id;
-
-    uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
-
-    for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
-        uvm_gpu_t *other_gpu = gpu->peer_info.peer_gpus[uvm_id_gpu_index(id)];
-
-        UVM_ASSERT(other_gpu);
-        UVM_ASSERT(!uvm_gpus_are_smc_peers(gpu, other_gpu));
-
-        if (uvm_parent_gpus_are_nvswitch_connected(gpu->parent, other_gpu->parent)) {
-            // NVSWITCH connected systems use an extended physical address to
-            // map to peers.  Find the physical memory 'slot' containing the
-            // given physical address to find the peer gpu that owns the
-            // physical address
-            NvU64 fabric_window_end = other_gpu->parent->nvswitch_info.fabric_memory_window_start +
-                                      other_gpu->mem_info.max_allocatable_address;
-
-            if (other_gpu->parent->nvswitch_info.fabric_memory_window_start <= addr.address &&
-                fabric_window_end >= addr.address)
-                break;
-        }
-        else if (uvm_gpu_peer_aperture(gpu, other_gpu) == addr.aperture) {
-            break;
-        }
-    }
-
-    uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
-
-    return id;
-}
-
 static NvU64 instance_ptr_to_key(uvm_gpu_phys_address_t instance_ptr)
 {
    NvU64 key;
@@ -3570,20 +3560,19 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare

    *out_va_space = NULL;
    *out_gpu = NULL;
-    UVM_ASSERT(entry->address.is_virtual);

    uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);

-    user_channel = instance_ptr_to_user_channel(parent_gpu, entry->virtual_info.instance_ptr);
+    user_channel = instance_ptr_to_user_channel(parent_gpu, entry->instance_ptr);
    if (!user_channel) {
        status = NV_ERR_INVALID_CHANNEL;
        goto exit_unlock;
    }

    if (!user_channel->in_subctx) {
-        UVM_ASSERT_MSG(entry->virtual_info.ve_id == 0,
+        UVM_ASSERT_MSG(entry->ve_id == 0,
                       "Access counter packet contains SubCTX %u for channel not in subctx\n",
-                       entry->virtual_info.ve_id);
+                       entry->ve_id);

        gpu_va_space = user_channel->gpu_va_space;
        UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
@@ -3591,7 +3580,7 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
        *out_gpu = gpu_va_space->gpu;
    }
    else {
-        gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->virtual_info.ve_id);
+        gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->ve_id);
        if (gpu_va_space) {
            *out_va_space = gpu_va_space->va_space;
            *out_gpu = gpu_va_space->gpu;
@@ -3638,7 +3627,7 @@ static NvU64 gpu_addr_to_dma_addr(uvm_parent_gpu_t *parent_gpu, NvU64 gpu_addr)
 // dma_addressable_start (in bifSetupDmaWindow_IMPL()) and hence when
 // referencing sysmem from the GPU, dma_addressable_start should be
 // subtracted from the DMA address we get from the OS.
-static NvU64 dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr)
+NvU64 uvm_parent_gpu_dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr)
 {
    NvU64 gpu_addr = dma_addr - parent_gpu->dma_addressable_start;
    UVM_ASSERT(dma_addr >= gpu_addr);
@@ -3646,32 +3635,40 @@ static NvU64 dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr)
    return gpu_addr;
 }

-void *uvm_parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU64 *dma_address_out)
+static void *parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU64 *dma_address_out)
 {
    NvU64 dma_addr;
    void *cpu_addr;

    cpu_addr = dma_alloc_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, &dma_addr, gfp_flags);
-
    if (!cpu_addr)
        return cpu_addr;

-    *dma_address_out = dma_addr_to_gpu_addr(parent_gpu, dma_addr);
+    *dma_address_out = uvm_parent_gpu_dma_addr_to_gpu_addr(parent_gpu, dma_addr);
    atomic64_add(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
    return cpu_addr;
 }

-void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address)
+NV_STATUS uvm_gpu_dma_alloc_page(uvm_gpu_t *gpu, gfp_t gfp_flags, void **cpu_addr_out, NvU64 *dma_address_out)
+{
+    void *cpu_addr = parent_gpu_dma_alloc_page(gpu->parent, gfp_flags, dma_address_out);
+    if (!cpu_addr)
+        return NV_ERR_NO_MEMORY;
+
+    // TODO: Bug 4868590: Issue GPA invalidate here
+
+    *cpu_addr_out = cpu_addr;
+    return NV_OK;
+}
+
+void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *cpu_addr, NvU64 dma_address)
 {
    dma_address = gpu_addr_to_dma_addr(parent_gpu, dma_address);
-    dma_free_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, va, dma_address);
+    dma_free_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, cpu_addr, dma_address);
    atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
 }

-NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu,
-                                       struct page *page,
-                                       size_t size,
-                                       NvU64 *dma_address_out)
+static NV_STATUS parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
 {
    NvU64 dma_addr;

@@ -3694,11 +3691,20 @@ NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu,
    }

    atomic64_add(size, &parent_gpu->mapped_cpu_pages_size);
-    *dma_address_out = dma_addr_to_gpu_addr(parent_gpu, dma_addr);
+    *dma_address_out = uvm_parent_gpu_dma_addr_to_gpu_addr(parent_gpu, dma_addr);

    return NV_OK;
 }

+NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out)
+{
+    NV_STATUS status = parent_gpu_map_cpu_pages(gpu->parent, page, size, dma_address_out);
+
+    // TODO: Bug 4868590: Issue GPA invalidate here
+
+    return status;
+}
+
 void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size)
 {
    UVM_ASSERT(PAGE_ALIGNED(size));
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -189,6 +189,9 @@ struct uvm_service_block_context_struct

    // Prefetch temporary state.
    uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
+
+    // Access counters notification buffer index.
+    NvU32 access_counters_buffer_index;
 };

 typedef struct
@@ -197,8 +200,8 @@ typedef struct
    {
        struct
        {
-            // Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned region
-            // of a SAM VMA. Used for batching ATS faults in a vma.
+            // Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned
+            // region of a SAM VMA. Used for batching ATS faults in a vma.
            uvm_page_mask_t prefetch_only_fault_mask;

            // Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region
@@ -350,7 +353,7 @@ typedef struct
    // entries from the GPU buffer
    NvU32 max_batch_size;

-    struct uvm_replayable_fault_buffer_info_struct
+    struct uvm_replayable_fault_buffer_struct
    {
        // Maximum number of faults entries that can be stored in the buffer
        NvU32 max_faults;
@@ -414,7 +417,7 @@ typedef struct
        uvm_ats_fault_invalidate_t ats_invalidate;
    } replayable;

-    struct uvm_non_replayable_fault_buffer_info_struct
+    struct uvm_non_replayable_fault_buffer_struct
    {
        // Maximum number of faults entries that can be stored in the buffer
        NvU32 max_faults;
@@ -468,7 +471,7 @@ typedef struct

    // Timestamp when prefetch faults where disabled last time
    NvU64 disable_prefetch_faults_timestamp;
-} uvm_fault_buffer_info_t;
+} uvm_fault_buffer_t;

 struct uvm_access_counter_service_batch_context_struct
 {
@@ -476,30 +479,14 @@ struct uvm_access_counter_service_batch_context_struct

    NvU32 num_cached_notifications;

-    struct
-    {
-        uvm_access_counter_buffer_entry_t   **notifications;
+    uvm_access_counter_buffer_entry_t **notifications;

-        NvU32                             num_notifications;
+    NvU32 num_notifications;

-        // Boolean used to avoid sorting the fault batch by instance_ptr if we
-        // determine at fetch time that all the access counter notifications in
-        // the batch report the same instance_ptr
-        bool is_single_instance_ptr;
-    } virt;
-
-    struct
-    {
-        uvm_access_counter_buffer_entry_t    **notifications;
-        uvm_reverse_map_t                      *translations;
-
-        NvU32                              num_notifications;
-
-        // Boolean used to avoid sorting the fault batch by aperture if we
-        // determine at fetch time that all the access counter notifications in
-        // the batch report the same aperture
-        bool                              is_single_aperture;
-    } phys;
+    // Boolean used to avoid sorting the fault batch by instance_ptr if we
+    // determine at fetch time that all the access counter notifications in
+    // the batch report the same instance_ptr
+    bool is_single_instance_ptr;

    // Helper page mask to compute the accessed pages within a VA block
    uvm_page_mask_t accessed_pages;
@@ -514,31 +501,15 @@ struct uvm_access_counter_service_batch_context_struct
    NvU32 batch_id;
 };

-typedef struct
+struct uvm_access_counter_buffer_struct
 {
-    // Values used to configure access counters in RM
-    struct
-    {
-        UVM_ACCESS_COUNTER_GRANULARITY  granularity;
-        UVM_ACCESS_COUNTER_USE_LIMIT    use_limit;
-    } rm;
+    uvm_parent_gpu_t *parent_gpu;

-    // The following values are precomputed by the access counter notification
-    // handling code. See comments for UVM_MAX_TRANSLATION_SIZE in
-    // uvm_gpu_access_counters.c for more details.
-    NvU64 translation_size;
-
-    NvU64 translations_per_counter;
-
-    NvU64 sub_granularity_region_size;
-
-    NvU64 sub_granularity_regions_per_translation;
-} uvm_gpu_access_counter_type_config_t;
-
-typedef struct
-{
    UvmGpuAccessCntrInfo rm_info;

+    // Access counters may have multiple notification buffers.
+    NvU32 index;
+
    NvU32 max_notifications;

    NvU32 max_batch_size;
@@ -560,10 +531,22 @@ typedef struct
    // may override it to try different configuration values.
    struct
    {
-        uvm_gpu_access_counter_type_config_t mimc;
-        uvm_gpu_access_counter_type_config_t momc;
+        // Values used to configure access counters in RM
+        struct
+        {
+            UVM_ACCESS_COUNTER_GRANULARITY granularity;
+        } rm;

-        NvU32                                threshold;
+        // The following values are precomputed by the access counter
+        // notification handling code. See comments for UVM_MAX_TRANSLATION_SIZE
+        // in uvm_gpu_access_counters.c for more details.
+        NvU64 translation_size;
+
+        NvU64 sub_granularity_region_size;
+
+        NvU64 sub_granularity_regions_per_translation;
+
+        NvU32 threshold;
    } current_config;

    // Access counter statistics
@@ -575,7 +558,7 @@ typedef struct
    } stats;

    // Ignoring access counters means that notifications are left in the HW
-    // buffer without being serviced.  Requests to ignore access counters
+    // buffer without being serviced. Requests to ignore access counters
    // are counted since the suspend path inhibits access counter interrupts,
    // and the resume path needs to know whether to reenable them.
    NvU32 notifications_ignored_count;
@@ -583,13 +566,25 @@ typedef struct
    // Context structure used to service a GPU access counter batch
    uvm_access_counter_service_batch_context_t batch_service_context;

-    // VA space that reconfigured the access counters configuration, if any.
-    // Used in builtin tests only, to avoid reconfigurations from different
-    // processes
-    //
-    // Locking: both readers and writers must hold the access counters ISR lock
-    uvm_va_space_t *reconfiguration_owner;
-} uvm_access_counter_buffer_info_t;
+    struct
+    {
+        // VA space that reconfigured the access counters configuration, if any.
+        // Used in builtin tests only, to avoid reconfigurations from different
+        // processes.
+        //
+        // Locking: both readers and writers must hold the access counters ISR
+        // lock.
+        uvm_va_space_t *reconfiguration_owner;
+
+        // The service access counters loop breaks after processing the first
+        // batch. It will be retriggered if there are pending notifications, but
+        // it releases the ISR service lock to check certain races that would be
+        // difficult to hit otherwise.
+        bool one_iteration_per_batch;
+        NvU32 sleep_per_iteration_us;
+    } test;
+
+};

 typedef struct
 {
@@ -745,15 +740,11 @@ struct uvm_gpu_struct

    struct
    {
-        // Mask of peer_gpus set
+        // Mask of peer_gpus set.
        uvm_processor_mask_t peer_gpu_mask;

-        // lazily-populated array of peer GPUs, indexed by the peer's GPU index
-        uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
-
-        // Leaf spinlock used to synchronize access to the peer_gpus table so
-        // that it can be safely accessed from the access counters bottom half
-        uvm_spinlock_t peer_gpus_lock;
+        // Leaf spinlock used to synchronize access to peer_gpu_mask.
+        uvm_spinlock_t peer_gpu_lock;
    } peer_info;

    // Maximum number of subcontexts supported
@@ -957,6 +948,16 @@ struct uvm_gpu_struct
    uvm_mutex_t device_p2p_lock;
 };

+typedef struct
+{
+    bool access_counters_alloc_buffer;
+    bool access_counters_alloc_block_context;
+    bool isr_access_counters_alloc;
+    bool isr_access_counters_alloc_stats_cpu;
+    bool access_counters_batch_context_notifications;
+    bool access_counters_batch_context_notification_cache;
+} uvm_test_parent_gpu_inject_error_t;
+
 // In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
 // parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
 // (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
@@ -965,8 +966,8 @@ struct uvm_gpu_struct
 struct uvm_parent_gpu_struct
 {
    // Reference count for how many places are holding on to a parent GPU
-    // (internal to the UVM driver).  This includes any GPUs we know about, not
-    // just GPUs that are registered with a VA space.  Most GPUs end up being
+    // (internal to the UVM driver). This includes any GPUs we know about, not
+    // just GPUs that are registered with a VA space. Most GPUs end up being
    // registered, but there are brief periods when they are not registered,
    // such as during interrupt handling, and in add_gpu() or remove_gpu().
    nv_kref_t gpu_kref;
@@ -976,7 +977,7 @@ struct uvm_parent_gpu_struct

    uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];

-    // Bitmap of valid child entries in the gpus[] table.  Used to retrieve a
+    // Bitmap of valid child entries in the gpus[] table. Used to retrieve a
    // usable child GPU in bottom-halves.
    DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);

@@ -1000,17 +1001,6 @@ struct uvm_parent_gpu_struct
    // nvUvmInterfaceUnregisterGpu()).
    struct pci_dev *pci_dev;

-    // NVLINK Processing Unit (NPU) on PowerPC platforms. The NPU is a
-    // collection of CPU-side PCI devices which bridge GPU NVLINKs and the CPU
-    // memory bus.
-    //
-    // There is one PCI device per NVLINK. A set of NVLINKs connects to a single
-    // GPU, and all NVLINKs for a given socket are collected logically under
-    // this UVM NPU because some resources (such as register mappings) are
-    // shared by all those NVLINKs. This means multiple GPUs may connect to the
-    // same UVM NPU.
-    uvm_ibm_npu_t *npu;
-
    // On kernels with NUMA support, this entry contains the closest CPU NUMA
    // node to this GPU. Otherwise, the value will be -1.
    int closest_cpu_numa_node;
@@ -1033,13 +1023,12 @@ struct uvm_parent_gpu_struct
    // dma_addressable_start (in bifSetupDmaWindow_IMPL()) and hence when
    // referencing sysmem from the GPU, dma_addressable_start should be
    // subtracted from the physical address. The DMA mapping helpers like
-    // uvm_parent_gpu_map_cpu_pages() and uvm_parent_gpu_dma_alloc_page() take
-    // care of that.
+    // uvm_gpu_map_cpu_pages() and uvm_gpu_dma_alloc_page() take care of that.
    NvU64 dma_addressable_start;
    NvU64 dma_addressable_limit;

    // Total size (in bytes) of physically mapped (with
-    // uvm_parent_gpu_map_cpu_pages) sysmem pages, used for leak detection.
+    // uvm_gpu_map_cpu_pages) sysmem pages, used for leak detection.
    atomic64_t mapped_cpu_pages_size;

    // Hardware Abstraction Layer
@@ -1079,9 +1068,9 @@ struct uvm_parent_gpu_struct

    bool access_counters_supported;

-    // If this is true, physical address based access counter notifications are
-    // potentially generated. If false, only virtual address based notifications
-    // are generated (assuming access_counters_supported is true too).
+    // TODO: Bug 4637114: [UVM] Remove support for physical access counter
+    // notifications. Always set to false, until we remove the PMM reverse
+    // mapping code.
    bool access_counters_can_use_physical_addresses;

    bool fault_cancel_va_supported;
@@ -1144,6 +1133,13 @@ struct uvm_parent_gpu_struct
    // Indicates whether the GPU can map sysmem with pages larger than 4k
    bool can_map_sysmem_with_large_pages;

+    // An integrated GPU has no vidmem and coherent access to sysmem. Note
+    // integrated GPUs have a write-back L2 cache (cf. discrete GPUs
+    // write-through cache.)
+    // TODO: Bug 5023085: this should be queried from RM instead of determined
+    // by UVM.
+    bool is_integrated_gpu;
+
    struct
    {
        // If true, the granularity of key rotation is a single channel. If
@@ -1205,17 +1201,17 @@ struct uvm_parent_gpu_struct
    // Interrupt handling state and locks
    uvm_isr_info_t isr;

-    // Fault buffer info. This is only valid if supports_replayable_faults is
-    // set to true.
-    uvm_fault_buffer_info_t fault_buffer_info;
+    // This is only valid if supports_replayable_faults is set to true.
+    uvm_fault_buffer_t fault_buffer;

    // PMM lazy free processing queue.
    // TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
    nv_kthread_q_t lazy_free_q;

-    // Access counter buffer info. This is only valid if
-    // supports_access_counters is set to true.
-    uvm_access_counter_buffer_info_t access_counter_buffer_info;
+    // This is only valid if supports_access_counters is set to true. This array
+    // has rm_info.accessCntrBufferCount entries.
+    uvm_access_counter_buffer_t *access_counter_buffer;
+    uvm_mutex_t access_counters_enablement_lock;

    // Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
    NvU32 utlb_per_gpc_count;
@@ -1264,9 +1260,6 @@ struct uvm_parent_gpu_struct
    uvm_rb_tree_t instance_ptr_table;
    uvm_spinlock_t instance_ptr_table_lock;

-    // This is set to true if the GPU belongs to an SLI group.
-    bool sli_enabled;
-
    struct
    {
        bool supported;
@@ -1348,8 +1341,12 @@ struct uvm_parent_gpu_struct
        // GPUs.
        NvU64 base_address;
    } egm;
+
+    uvm_test_parent_gpu_inject_error_t test;
 };

+NvU64 uvm_parent_gpu_dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr);
+
 static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
 {
    return parent_gpu->name;
@@ -1395,10 +1392,10 @@ typedef struct
    //   detected to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was
    //   called.
    //
-    // - The peer_gpus_lock is held on one of the GPUs. In this case, the other
-    //   GPU must be read from the original GPU's peer_gpus table. The fields
-    //   will not change while the lock is held, but they may no longer be valid
-    //   because the other GPU might be in teardown.
+    // - The peer_gpu_lock is held on one of the GPUs. In this case, the other
+    //   GPU must be referred from the original GPU's peer_gpu_mask reference.
+    //   The fields will not change while the lock is held, but they may no
+    //   longer be valid because the other GPU might be in teardown.

    // This field is used to determine when this struct has been initialized
    // (ref_count != 0). NVLink peers are initialized at GPU registration time.
@@ -1510,7 +1507,7 @@ uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
 uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);

 // Like uvm_parent_gpu_get_by_uuid(), but this variant does not assertion-check
-// that the caller is holding the global_lock.  This is a narrower-purpose
+// that the caller is holding the global_lock. This is a narrower-purpose
 // function, and is only intended for use by the top-half ISR, or other very
 // limited cases.
 uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);
@@ -1521,6 +1518,7 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_u
 // LOCKING: Takes and releases the global lock for the caller.
 NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
                                 const uvm_rm_user_object_t *user_rm_device,
+                                 const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
                                 uvm_gpu_t **gpu_out);

 // Retain a gpu which is known to already be retained. Does NOT require the
@@ -1578,10 +1576,6 @@ uvm_gpu_address_t uvm_gpu_peer_copy_address(uvm_gpu_t *owning_gpu, NvU64 address
 // The two GPUs must have different parents.
 NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);

-// Get the processor id accessible by the given GPU for the given physical
-// address.
-uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
-
 // Get the EGM aperture for local_gpu to use to map memory resident on the CPU
 // NUMA node that remote_gpu is attached to.
 // Note that local_gpu can be equal to remote_gpu when memory is resident in
@@ -1655,7 +1649,8 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_

 // Check whether the provided address points to peer memory:
 // * Physical address using one of the PEER apertures
-// * Physical address using SYS aperture that belongs to an exposed coherent memory
+// * Physical address using SYS aperture that belongs to an exposed coherent
+//   memory
 // * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
 bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);

@@ -1684,24 +1679,25 @@ NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu);
 // Check for NVLINK errors without calling into RM
 //
 // Calling into RM is problematic in many places, this check is always safe to
-// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK error
-// and it's required to call uvm_gpu_check_nvlink_error() to be sure.
+// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK
+// error and it's required to call uvm_gpu_check_nvlink_error() to be sure.
 NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu);

-// Map size bytes of contiguous sysmem on the GPU for physical access
+// Map size bytes of contiguous sysmem on the GPU for physical access.
 //
 // size has to be aligned to PAGE_SIZE.
 //
 // Returns the physical address of the pages that can be used to access them on
-// the GPU.
-NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
+// the GPU. This address is usable by any GPU under the same parent for the
+// lifetime of that parent.
+NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out);

-// Unmap num_pages pages previously mapped with uvm_parent_gpu_map_cpu_pages().
+// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
 void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);

-static NV_STATUS uvm_parent_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
+static NV_STATUS uvm_gpu_map_cpu_page(uvm_gpu_t *gpu, struct page *page, NvU64 *dma_address_out)
 {
-    return uvm_parent_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
+    return uvm_gpu_map_cpu_pages(gpu, page, PAGE_SIZE, dma_address_out);
 }

 static void uvm_parent_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
@@ -1712,16 +1708,15 @@ static void uvm_parent_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dm
 // Allocate and map a page of system DMA memory on the GPU for physical access
 //
 // Returns
-// - the address of the page that can be used to access them on
-//   the GPU in the dma_address_out parameter.
 // - the address of allocated memory in CPU virtual address space.
-void *uvm_parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
-                                    gfp_t gfp_flags,
-                                    NvU64 *dma_address_out);
+// - the address of the page that can be used to access them on
+//   the GPU in the dma_address_out parameter. This address is usable by any GPU
+//   under the same parent for the lifetime of that parent.
+NV_STATUS uvm_gpu_dma_alloc_page(uvm_gpu_t *gpu, gfp_t gfp_flags, void **cpu_addr_out, NvU64 *dma_address_out);

 // Unmap and free size bytes of contiguous sysmem DMA previously allocated
-// with uvm_parent_gpu_map_cpu_pages().
-void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);
+// with uvm_gpu_dma_alloc_page().
+void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *cpu_addr, NvU64 dma_address);

 // Returns whether the given range is within the GPU's addressable VA ranges.
 // It requires the input 'addr' to be in canonical form for platforms compliant
@@ -1742,8 +1737,6 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
 // The GPU must be initialized before calling this function.
 bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);

-bool uvm_platform_uses_canonical_form_address(void);
-
 // Returns addr's canonical form for host systems that use canonical form
 // addresses.
 NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
@@ -1786,7 +1779,7 @@ static bool uvm_parent_gpu_needs_proxy_channel_pool(const uvm_parent_gpu_t *pare
    return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu);
 }

-uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu);
+uvm_aperture_t uvm_get_page_tree_location(const uvm_gpu_t *gpu);

 // Add the given instance pointer -> user_channel mapping to this GPU. The
 // bottom half GPU page fault handler uses this to look up the VA space for GPU
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2024 NVIDIA Corporation
+    Copyright (c) 2017-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -27,11 +27,11 @@
 #include "uvm_forward_decl.h"
 #include "uvm_test_ioctl.h"

-NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
-void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
-bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
+NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index);

-void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu);
+void uvm_service_access_counters(uvm_access_counter_buffer_t *access_counters);

 void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);

@@ -46,17 +46,23 @@ void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
 //
 // When uningoring, the interrupt conditions will be re-evaluated to trigger
 // processing of buffered notifications, if any exist.
+//
+// All parent_gpu's notifications buffers are affected.
 void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore);

 // Return whether the VA space has access counter migrations enabled. The
 // caller must ensure that the VA space cannot go away.
 bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space);

-// Global perf initialization/cleanup functions
+// Global access counters initialization/cleanup functions.
+NV_STATUS uvm_access_counters_init(void);
+void uvm_access_counters_exit(void);
+
+// Global perf initialization/cleanup functions.
 NV_STATUS uvm_perf_access_counters_init(void);
 void uvm_perf_access_counters_exit(void);

-// VA space Initialization/cleanup functions. See comments in
+// VA space initialization/cleanup functions. See comments in
 // uvm_perf_heuristics.h
 NV_STATUS uvm_perf_access_counters_load(uvm_va_space_t *va_space);
 void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);
@@ -72,17 +78,18 @@ bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
 // counters are currently enabled. The hardware notifications and interrupts on
 // the GPU are enabled the first time any VA space invokes
 // uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
-// uvm_parent_gpu_access_counters_disable().
+// uvm_gpu_access_counters_disable().
 //
 // Locking: the VA space lock must not be held by the caller since these
 // functions may take the access counters ISR lock.
 NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
-void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu, uvm_va_space_t *va_space);
+void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);

 NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
                                                      struct file *filp);
 NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
 NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
 NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
+NV_STATUS uvm_test_query_access_counters(UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS *params, struct file *filp);

 #endif // __UVM_GPU_ACCESS_COUNTERS_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -154,62 +154,73 @@ static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_
    return 1;
 }

-static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
+static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
 {
    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
+    UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
+
+    // On Volta, accessCntrBufferCount is > 0, but we don't support access
+    // counters in UVM (access_counters_supported is cleared during HAL
+    // initialization.) This check prevents the top-half from accessing
+    // unallocated memory.
+    if (!parent_gpu->access_counters_supported)
+        return 0;

    if (parent_gpu->isr.is_suspended)
        return 0;

-    if (!parent_gpu->isr.access_counters.handling_ref_count)
+    if (!parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count)
        return 0;

-    if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
+    if (down_trylock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem) != 0)
        return 0;

-    if (!uvm_parent_gpu_access_counters_pending(parent_gpu)) {
-        up(&parent_gpu->isr.access_counters.service_lock.sem);
+    if (!uvm_parent_gpu_access_counters_pending(parent_gpu, notif_buf_index)) {
+        up(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem);
        return 0;
    }

    nv_kref_get(&parent_gpu->gpu_kref);

    // Interrupts need to be disabled to avoid an interrupt storm
-    uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
+    uvm_access_counters_intr_disable(&parent_gpu->access_counter_buffer[notif_buf_index]);

    nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
-                                 &parent_gpu->isr.access_counters.bottom_half_q_item);
+                                 &parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item);

    return 1;
 }

-// This is called from RM's top-half ISR (see: the nvidia_isr() function), and UVM is given a
-// chance to handle the interrupt, before most of the RM processing. UVM communicates what it
-// did, back to RM, via the return code:
+// This is called from RM's top-half ISR (see: the nvidia_isr() function), and
+// UVM is given a chance to handle the interrupt, before most of the RM
+// processing. UVM communicates what it did, back to RM, via the return code:
 //
 //     NV_OK:
 //         UVM handled an interrupt.
 //
 //     NV_WARN_MORE_PROCESSING_REQUIRED:
-//         UVM did not schedule a bottom half, because it was unable to get the locks it
-//         needed, but there is still UVM work to be done. RM will return "not handled" to the
-//         Linux kernel, *unless* RM handled other faults in its top half. In that case, the
-//         fact that UVM did not handle its interrupt is lost. However, life and interrupt
-//         processing continues anyway: the GPU will soon raise another interrupt, because
-//         that's what it does when there are replayable page faults remaining (GET != PUT in
-//         the fault buffer).
+//         UVM did not schedule a bottom half, because it was unable to get the
+//         locks it needed, but there is still UVM work to be done. RM will
+//         return "not handled" to the Linux kernel, *unless* RM handled other
+//         faults in its top half. In that case, the fact that UVM did not
+//         handle its interrupt is lost. However, life and interrupt processing
+//         continues anyway: the GPU will soon raise another interrupt, because
+//         that's what it does when there are replayable page faults remaining
+//         (GET != PUT in the fault buffer).
 //
 //     NV_ERR_NO_INTR_PENDING:
-//         UVM did not find any work to do. Currently this is handled in RM in exactly the same
-//         way as NV_WARN_MORE_PROCESSING_REQUIRED is handled. However, the extra precision is
-//         available for the future. RM's interrupt handling tends to evolve as new chips and
-//         new interrupts get created.
+//         UVM did not find any work to do. Currently this is handled in RM in
+//         exactly the same way as NV_WARN_MORE_PROCESSING_REQUIRED is handled.
+//         However, the extra precision is available for the future. RM's
+//         interrupt handling tends to evolve as new chips and new interrupts
+//         get created.

 static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
 {
    uvm_parent_gpu_t *parent_gpu;
    unsigned num_handlers_scheduled = 0;
    NV_STATUS status = NV_OK;
+    NvU32 i;

    if (!in_interrupt() && in_atomic()) {
        // Early-out if we're not in interrupt context, but memory allocations
@@ -243,14 +254,16 @@ static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
    nv_kref_get(&parent_gpu->gpu_kref);
    uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);

-    // Now that we got a GPU object, lock it so that it can't be removed without us noticing.
+    // Now that we got a GPU object, lock it so that it can't be removed without
+    // us noticing.
    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

    ++parent_gpu->isr.interrupt_count;

    num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
    num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
-    num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);
+    for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++)
+        num_handlers_scheduled += schedule_access_counters_handler(parent_gpu, i);

    if (num_handlers_scheduled == 0) {
        if (parent_gpu->isr.is_suspended)
@@ -288,6 +301,55 @@ static NV_STATUS init_queue_on_node(nv_kthread_q_t *queue, const char *name, int
    return errno_to_nv_status(nv_kthread_q_init(queue, name));
 }

+static NV_STATUS uvm_isr_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
+{
+    NV_STATUS status = NV_OK;
+    uvm_va_block_context_t *block_context;
+
+    UVM_ASSERT(parent_gpu->access_counters_supported);
+    UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
+
+    uvm_sema_init(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, 1, UVM_LOCK_ORDER_ISR);
+
+    status = uvm_parent_gpu_init_access_counters(parent_gpu, notif_buf_index);
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s, notif buf index: %u\n",
+                      nvstatusToString(status),
+                      uvm_parent_gpu_name(parent_gpu),
+                      notif_buf_index);
+        return status;
+    }
+
+    if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_block_context)
+        return NV_ERR_NO_MEMORY;
+
+    block_context = uvm_va_block_context_alloc(NULL);
+    if (!block_context)
+        return NV_ERR_NO_MEMORY;
+
+    parent_gpu->access_counter_buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
+        block_context;
+
+    nv_kthread_q_item_init(&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item,
+                           access_counters_isr_bottom_half_entry,
+                           &parent_gpu->access_counter_buffer[notif_buf_index]);
+
+    // Access counters interrupts are initially disabled. They are
+    // dynamically enabled when the GPU is registered on a VA space.
+    parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count = 0;
+
+    if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc_stats_cpu)
+        return NV_ERR_NO_MEMORY;
+
+    parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count =
+        uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count) *
+                          num_possible_cpus());
+    if (!parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count)
+        return NV_ERR_NO_MEMORY;
+
+    return NV_OK;
+}
+
 NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status = NV_OK;
@@ -316,7 +378,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
        if (!block_context)
            return NV_ERR_NO_MEMORY;

-        parent_gpu->fault_buffer_info.replayable.block_service_context.block_context = block_context;
+        parent_gpu->fault_buffer.replayable.block_service_context.block_context = block_context;

        parent_gpu->isr.replayable_faults.handling = true;

@@ -344,7 +406,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
            if (!block_context)
                return NV_ERR_NO_MEMORY;

-            parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context = block_context;
+            parent_gpu->fault_buffer.non_replayable.block_service_context.block_context = block_context;

            parent_gpu->isr.non_replayable_faults.handling = true;

@@ -361,32 +423,31 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
        }

        if (parent_gpu->access_counters_supported) {
-            status = uvm_parent_gpu_init_access_counters(parent_gpu);
-            if (status != NV_OK) {
-                UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
-                              nvstatusToString(status),
-                              uvm_parent_gpu_name(parent_gpu));
-                return status;
+            NvU32 index_count = parent_gpu->rm_info.accessCntrBufferCount;
+            NvU32 notif_buf_index;
+
+            UVM_ASSERT(index_count > 0);
+
+            if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_buffer)
+                return NV_ERR_NO_MEMORY;
+
+            parent_gpu->access_counter_buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counter_buffer) *
+                                                                  index_count);
+            if (!parent_gpu->access_counter_buffer)
+                return NV_ERR_NO_MEMORY;
+
+            if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc)
+                return NV_ERR_NO_MEMORY;
+
+            parent_gpu->isr.access_counters = uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters) * index_count);
+            if (!parent_gpu->isr.access_counters)
+                return NV_ERR_NO_MEMORY;
+
+            for (notif_buf_index = 0; notif_buf_index < index_count; notif_buf_index++) {
+                status = uvm_isr_init_access_counters(parent_gpu, notif_buf_index);
+                if (status != NV_OK)
+                    return status;
            }
-
-            block_context = uvm_va_block_context_alloc(NULL);
-            if (!block_context)
-                return NV_ERR_NO_MEMORY;
-
-            parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context =
-                block_context;
-
-            nv_kthread_q_item_init(&parent_gpu->isr.access_counters.bottom_half_q_item,
-                                   access_counters_isr_bottom_half_entry,
-                                   parent_gpu);
-
-            // Access counters interrupts are initially disabled. They are
-            // dynamically enabled when the GPU is registered on a VA space.
-            parent_gpu->isr.access_counters.handling_ref_count = 0;
-            parent_gpu->isr.access_counters.stats.cpu_exec_count =
-                uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters.stats.cpu_exec_count) * num_possible_cpus());
-            if (!parent_gpu->isr.access_counters.stats.cpu_exec_count)
-                return NV_ERR_NO_MEMORY;
        }
    }

@@ -401,7 +462,15 @@ void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)

 void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
+    NvU32 notif_buf_index;
+
+    if (parent_gpu->isr.access_counters) {
+        for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
+            UVM_ASSERT_MSG(parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count == 0,
+                           "notif buf index: %u\n",
+                           notif_buf_index);
+        }
+    }

    // Now that the GPU is safely out of the global table, lock the GPU and mark
    // it as no longer handling interrupts so the top half knows not to schedule
@@ -459,24 +528,38 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
    }

    if (parent_gpu->access_counters_supported) {
-        // It is safe to deinitialize access counters even if they have not been
-        // successfully initialized.
-        uvm_parent_gpu_deinit_access_counters(parent_gpu);
-        block_context =
-            parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
-        uvm_va_block_context_free(block_context);
+        NvU32 notif_buf_index;
+
+        for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
+            // It is safe to deinitialize access counters even if they have not
+            // been successfully initialized.
+            uvm_parent_gpu_deinit_access_counters(parent_gpu, notif_buf_index);
+
+            if (parent_gpu->access_counter_buffer) {
+                uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counter_buffer[notif_buf_index];
+                block_context = access_counter->batch_service_context.block_service_context.block_context;
+                uvm_va_block_context_free(block_context);
+            }
+
+            if (parent_gpu->isr.access_counters)
+                uvm_kvfree(parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count);
+        }
+
+        uvm_kvfree(parent_gpu->isr.access_counters);
+        uvm_kvfree(parent_gpu->access_counter_buffer);
    }

    if (parent_gpu->non_replayable_faults_supported) {
-        block_context = parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context;
+        block_context = parent_gpu->fault_buffer.non_replayable.block_service_context.block_context;
        uvm_va_block_context_free(block_context);
+
+        uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
    }

-    block_context = parent_gpu->fault_buffer_info.replayable.block_service_context.block_context;
+    block_context = parent_gpu->fault_buffer.replayable.block_service_context.block_context;
    uvm_va_block_context_free(block_context);
+
    uvm_kvfree(parent_gpu->isr.replayable_faults.stats.cpu_exec_count);
-    uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
-    uvm_kvfree(parent_gpu->isr.access_counters.stats.cpu_exec_count);
 }

 uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
@@ -584,25 +667,29 @@ static void non_replayable_faults_isr_bottom_half_entry(void *args)

 static void access_counters_isr_bottom_half(void *args)
 {
-    uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
+    uvm_access_counter_buffer_t *access_counters = (uvm_access_counter_buffer_t *)args;
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;
    unsigned int cpu;

    UVM_ASSERT(parent_gpu->access_counters_supported);
+    UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);

-    uvm_record_lock(&parent_gpu->isr.access_counters.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
+    uvm_record_lock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, UVM_LOCK_FLAGS_MODE_SHARED);

    // Multiple bottom halves for counter notifications can be running
-    // concurrently, but only one can be running this function for a given GPU
-    // since we enter with the access_counters_isr_lock held.
+    // concurrently, but only one per-notification-buffer (i.e.,
+    // notif_buf_index) can be running this function for a given GPU since we
+    // enter with the per-notification-buffer access_counters_isr_lock held.
    cpu = get_cpu();
-    ++parent_gpu->isr.access_counters.stats.bottom_half_count;
-    cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters.stats.cpus_used_mask);
-    ++parent_gpu->isr.access_counters.stats.cpu_exec_count[cpu];
+    ++parent_gpu->isr.access_counters[notif_buf_index].stats.bottom_half_count;
+    cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters[notif_buf_index].stats.cpus_used_mask);
+    ++parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count[cpu];
    put_cpu();

-    uvm_parent_gpu_service_access_counters(parent_gpu);
+    uvm_service_access_counters(access_counters);

-    uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
+    uvm_access_counters_isr_unlock(access_counters);

    uvm_parent_gpu_kref_put(parent_gpu);
 }
@@ -725,7 +812,7 @@ void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
        // clear_replayable_faults is a no-op for architectures that don't
        // support pulse-based interrupts.
        parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
-                                                              parent_gpu->fault_buffer_info.replayable.cached_get);
+                                                              parent_gpu->fault_buffer.replayable.cached_get);
    }

    // This unlock call has to be out-of-order unlock due to interrupts_lock
@@ -751,37 +838,41 @@ void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gp
    uvm_up(&parent_gpu->isr.non_replayable_faults.service_lock);
 }

-void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
+void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters)
 {
    // See comments in uvm_parent_gpu_replayable_faults_isr_lock
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;

    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

-    uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
+    uvm_access_counters_intr_disable(access_counters);

    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);

-    uvm_down(&parent_gpu->isr.access_counters.service_lock);
+    uvm_down(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
 }

-void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
+void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters)
 {
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;
+    uvm_access_counter_buffer_hal_t *ac_hal = parent_gpu->access_counter_buffer_hal;
+
    UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);

    // See comments in uvm_parent_gpu_replayable_faults_isr_unlock

    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

-    uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
+    uvm_access_counters_intr_enable(access_counters);

-    if (parent_gpu->isr.access_counters.handling_ref_count > 0) {
-        parent_gpu->access_counter_buffer_hal->clear_access_counter_notifications(parent_gpu,
-                                                                                  parent_gpu->access_counter_buffer_info.cached_get);
-    }
+    if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0)
+        ac_hal->clear_access_counter_notifications(access_counters, access_counters->cached_get);

    // This unlock call has to be out-of-order unlock due to interrupts_lock
    // still being held. Otherwise, it would result in a lock order violation.
-    uvm_up_out_of_order(&parent_gpu->isr.access_counters.service_lock);
+    uvm_up_out_of_order(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);

    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
 }
@@ -806,8 +897,11 @@ static void uvm_parent_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *paren
        parent_gpu->fault_buffer_hal->enable_replayable_faults(parent_gpu);
 }

-void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
+void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters)
 {
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;
+
    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);

    // The read of handling_ref_count could race with a write from
@@ -815,24 +909,27 @@ void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
    // ISR lock. But those functions are invoked with the interrupt disabled
    // (disable_intr_ref_count > 0), so the check always returns false when the
    // race occurs
-    if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
-        parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
-        parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(parent_gpu);
+    if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
+        parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
+        parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(access_counters);
    }

-    ++parent_gpu->isr.access_counters.disable_intr_ref_count;
+    ++parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
 }

-void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
+void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters)
 {
+    uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
+    NvU32 notif_buf_index = access_counters->index;
+
    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
-    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
-    UVM_ASSERT(parent_gpu->isr.access_counters.disable_intr_ref_count > 0);
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
+    UVM_ASSERT(parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count > 0);

-    --parent_gpu->isr.access_counters.disable_intr_ref_count;
+    --parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;

-    if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
-        parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
-        parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(parent_gpu);
+    if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
+        parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
+        parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(access_counters);
    }
 }
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2024 NVIDIA Corporation
+    Copyright (c) 2016-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -70,8 +70,8 @@ typedef struct

    struct
    {
-        // Number of the bottom-half invocations for this interrupt on a GPU over
-        // its lifetime
+        // Number of the bottom-half invocations for this interrupt on a GPU
+        // over its lifetime.
        NvU64 bottom_half_count;

        // A bitmask of the CPUs on which the bottom half has executed. The
@@ -110,20 +110,20 @@ typedef struct
    // bottom-half per interrupt type.
    nv_kthread_q_t bottom_half_q;

-    // Protects the state of interrupts (enabled/disabled) and whether the GPU is
-    // currently handling them. Taken in both interrupt and process context.
+    // Protects the state of interrupts (enabled/disabled) and whether the GPU
+    // is currently handling them. Taken in both interrupt and process context.
    uvm_spinlock_irqsave_t interrupts_lock;

    uvm_intr_handler_t replayable_faults;
    uvm_intr_handler_t non_replayable_faults;
-    uvm_intr_handler_t access_counters;
+    uvm_intr_handler_t *access_counters;

    // Kernel thread used to kill channels on fatal non-replayable faults.
    // This is needed because we cannot call into RM from the bottom-half to
    // avoid deadlocks.
    nv_kthread_q_t kill_channel_q;

-    // Number of top-half ISRs called for this GPU over its lifetime
+    // Number of top-half ISRs called for this GPU over its lifetime.
    NvU64 interrupt_count;
 } uvm_isr_info_t;

@@ -133,7 +133,7 @@ NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);
 // Initialize ISR handling state
 NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);

-// Flush any currently scheduled bottom halves.  This is called during GPU
+// Flush any currently scheduled bottom halves. This is called during GPU
 // removal.
 void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);

@@ -146,7 +146,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
 void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);

 // Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
-// half thread.  This will also disable replayable page fault interrupts (if
+// half thread. This will also disable replayable page fault interrupts (if
 // supported by the GPU) because the top half attempts to take this lock, and we
 // would cause an interrupt storm if we didn't disable them first.
 //
@@ -154,49 +154,48 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
 void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);

 // Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
-// re-enable replayable page fault interrupts.  Unlike
-// uvm_parent_gpu_replayable_faults_isr_lock(), which should only called from
+// re-enable replayable page fault interrupts. Unlike
+// uvm_parent_gpu_replayable_faults_isr_lock(), which should only be called from
 // non-top/bottom half threads, this can be called by any thread.
 void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);

 // Lock/unlock routines for non-replayable faults. These do not need to prevent
 // interrupt storms since the GPU fault buffers for non-replayable faults are
-// managed by RM.  Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
+// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
 // under the parent need to have been previously retained.
 void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
 void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);

 // See uvm_parent_gpu_replayable_faults_isr_lock/unlock
-void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
-void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
+void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters);
+void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters);

 // Increments the reference count tracking whether access counter interrupts
 // should be disabled. The caller is guaranteed that access counter interrupts
 // are disabled upon return. Interrupts might already be disabled prior to
 // making this call. Each call is ref-counted, so this must be paired with a
-// call to uvm_parent_gpu_access_counters_intr_enable().
+// call to uvm_access_counters_intr_enable().
 //
 // parent_gpu->isr.interrupts_lock must be held to call this function.
-void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
+void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters);

 // Decrements the reference count tracking whether access counter interrupts
 // should be disabled. Only once the count reaches 0 are the HW interrupts
 // actually enabled, so this call does not guarantee that the interrupts have
 // been re-enabled upon return.
 //
-// uvm_parent_gpu_access_counters_intr_disable() must have been called prior to
-// calling this function.
+// uvm_access_counters_intr_disable() must have been called prior to calling
+// this function.
 //
 // NOTE: For pulse-based interrupts, the caller is responsible for re-arming
 // the interrupt.
 //
 // parent_gpu->isr.interrupts_lock must be held to call this function.
-void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
+void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters);

 // Return the first valid GPU given the parent GPU or NULL if no MIG instances
 // are registered. This should only be called from bottom halves or if the
 // g_uvm_global.global_lock is held so that the returned pointer remains valid.
-//
 uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu);

 #endif // __UVM_GPU_ISR_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2024 NVIDIA Corporation
+    Copyright (c) 2017-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -119,18 +119,18 @@
 // calling uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults on failure.
 NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;

    UVM_ASSERT(parent_gpu->non_replayable_faults_supported);

    non_replayable_faults->shadow_buffer_copy = NULL;
    non_replayable_faults->fault_cache        = NULL;

-    non_replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize /
+    non_replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize /
                                        parent_gpu->fault_buffer_hal->entry_size(parent_gpu);

    non_replayable_faults->shadow_buffer_copy =
-        uvm_kvmalloc_zero(parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize);
+        uvm_kvmalloc_zero(parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize);
    if (!non_replayable_faults->shadow_buffer_copy)
        return NV_ERR_NO_MEMORY;

@@ -147,7 +147,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_

 void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;

    if (non_replayable_faults->fault_cache) {
        UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->clear_faulted_tracker));
@@ -170,7 +170,7 @@ bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)

    UVM_ASSERT(parent_gpu->isr.non_replayable_faults.handling);

-    status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
+    status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
                                                         &has_pending_faults);
    UVM_ASSERT(status == NV_OK);

@@ -182,14 +182,14 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
    NV_STATUS status;
    NvU32 i;
    NvU32 entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
    char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
    uvm_fault_buffer_entry_t *fault_entry = non_replayable_faults->fault_cache;

    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.non_replayable_faults.service_lock));
    UVM_ASSERT(parent_gpu->non_replayable_faults_supported);

-    status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
+    status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
                                                  current_hw_entry,
                                                  cached_faults);

@@ -267,7 +267,7 @@ static NV_STATUS clear_faulted_method_on_gpu(uvm_user_channel_t *user_channel,
    uvm_gpu_t *gpu = user_channel->gpu;
    NV_STATUS status;
    uvm_push_t push;
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;

    UVM_ASSERT(!fault_entry->is_fatal);

@@ -355,7 +355,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_va_block_t *va_block,
    uvm_processor_id_t new_residency;
    bool read_duplicate;
    uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
    const uvm_va_policy_t *policy;

    UVM_ASSERT(!fault_entry->is_fatal);
@@ -450,7 +450,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
    NV_STATUS status, tracker_status;
    uvm_va_block_retry_t va_block_retry;
    uvm_gpu_t *gpu = fault_entry->gpu;
-    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
+    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.non_replayable.block_service_context;

    service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
    service_context->num_retries = 0;
@@ -467,7 +467,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
                                                                             service_context,
                                                                             hmm_migratable));

-    tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer_info.non_replayable.fault_service_tracker,
+    tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer.non_replayable.fault_service_tracker,
                                                  &va_block->tracker);

    uvm_mutex_unlock(&va_block->lock);
@@ -507,7 +507,7 @@ static void schedule_kill_channel(uvm_fault_buffer_entry_t *fault_entry, uvm_use
 {
    uvm_va_space_t *va_space = fault_entry->va_space;
    uvm_parent_gpu_t *parent_gpu = fault_entry->gpu->parent;
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
    void *packet = (char *)non_replayable_faults->shadow_buffer_copy +
                   (fault_entry->non_replayable.buffer_index * parent_gpu->fault_buffer_hal->entry_size(parent_gpu));

@@ -551,7 +551,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
 {
    uvm_va_space_t *va_space = gpu_va_space->va_space;
    uvm_gpu_t *gpu = gpu_va_space->gpu;
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
    uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
    NV_STATUS status = lookup_status;
    NV_STATUS fatal_fault_status = NV_ERR_INVALID_ADDRESS;
@@ -588,7 +588,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,

        ats_invalidate->tlb_batch_pending = false;

-        va_range_next = uvm_va_space_iter_first(va_space, fault_entry->fault_address, ~0ULL);
+        va_range_next = uvm_va_space_iter_gmmu_mappable_first(va_space, fault_entry->fault_address);

        // The VA isn't managed. See if ATS knows about it.
        vma = find_vma_intersection(mm, fault_address, fault_address + 1);
@@ -649,7 +649,7 @@ static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,
    struct mm_struct *mm;
    uvm_gpu_va_space_t *gpu_va_space;
    uvm_gpu_t *gpu;
-    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
+    uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
    uvm_va_block_context_t *va_block_context = non_replayable_faults->block_service_context.block_context;

    status = uvm_parent_gpu_fault_entry_to_va_space(parent_gpu,
@@ -757,7 +757,7 @@ exit_no_channel:
 static NV_STATUS service_fault(uvm_parent_gpu_t *parent_gpu, uvm_fault_buffer_entry_t *fault_entry)
 {
    uvm_service_block_context_t *service_context =
-        &parent_gpu->fault_buffer_info.non_replayable.block_service_context;
+        &parent_gpu->fault_buffer.non_replayable.block_service_context;
    NV_STATUS status;
    bool hmm_migratable = true;

@@ -794,7 +794,7 @@ void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent
        // non-replayable faults since getting multiple faults on the same
        // memory region is not very likely
        for (i = 0; i < cached_faults; ++i) {
-            status = service_fault(parent_gpu, &parent_gpu->fault_buffer_info.non_replayable.fault_cache[i]);
+            status = service_fault(parent_gpu, &parent_gpu->fault_buffer.non_replayable.fault_cache[i]);
            if (status != NV_OK)
                return;
        }
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2024 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -119,7 +119,7 @@ module_param(uvm_perf_fault_coalesce, uint, S_IRUGO);
 // the power management resume path.
 static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;

    // Read the current get/put pointers, as this might not be the first time
    // we take control of the fault buffer since the GPU was initialized,
@@ -129,7 +129,7 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
    replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);

    // (Re-)enable fault prefetching
-    if (parent_gpu->fault_buffer_info.prefetch_faults_enabled)
+    if (parent_gpu->fault_buffer.prefetch_faults_enabled)
        parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
    else
        parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
@@ -140,28 +140,28 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status = NV_OK;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;

-    UVM_ASSERT(parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize %
+    UVM_ASSERT(parent_gpu->fault_buffer.rm_info.replayable.bufferSize %
               parent_gpu->fault_buffer_hal->entry_size(parent_gpu) == 0);

-    replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize /
+    replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.replayable.bufferSize /
                                    parent_gpu->fault_buffer_hal->entry_size(parent_gpu);

    // Check provided module parameter value
-    parent_gpu->fault_buffer_info.max_batch_size = max(uvm_perf_fault_batch_count,
-                                                       (NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
-    parent_gpu->fault_buffer_info.max_batch_size = min(parent_gpu->fault_buffer_info.max_batch_size,
-                                                       replayable_faults->max_faults);
+    parent_gpu->fault_buffer.max_batch_size = max(uvm_perf_fault_batch_count,
+                                                  (NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
+    parent_gpu->fault_buffer.max_batch_size = min(parent_gpu->fault_buffer.max_batch_size,
+                                                  replayable_faults->max_faults);

-    if (parent_gpu->fault_buffer_info.max_batch_size != uvm_perf_fault_batch_count) {
-        pr_info("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
-                uvm_parent_gpu_name(parent_gpu),
-                uvm_perf_fault_batch_count,
-                UVM_PERF_FAULT_BATCH_COUNT_MIN,
-                replayable_faults->max_faults,
-                parent_gpu->fault_buffer_info.max_batch_size);
+    if (parent_gpu->fault_buffer.max_batch_size != uvm_perf_fault_batch_count) {
+        UVM_INFO_PRINT("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
+                       uvm_parent_gpu_name(parent_gpu),
+                       uvm_perf_fault_batch_count,
+                       UVM_PERF_FAULT_BATCH_COUNT_MIN,
+                       replayable_faults->max_faults,
+                       parent_gpu->fault_buffer.max_batch_size);
    }

    batch_context->fault_cache = uvm_kvmalloc_zero(replayable_faults->max_faults * sizeof(*batch_context->fault_cache));
@@ -198,22 +198,22 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
                                           UVM_PERF_FAULT_REPLAY_POLICY_DEFAULT;

    if (replayable_faults->replay_policy != uvm_perf_fault_replay_policy) {
-        pr_info("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
-                uvm_parent_gpu_name(parent_gpu),
-                uvm_perf_fault_replay_policy,
-                replayable_faults->replay_policy);
+        UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
+                       uvm_parent_gpu_name(parent_gpu),
+                       uvm_perf_fault_replay_policy,
+                       replayable_faults->replay_policy);
    }

    replayable_faults->replay_update_put_ratio = min(uvm_perf_fault_replay_update_put_ratio, 100u);
    if (replayable_faults->replay_update_put_ratio != uvm_perf_fault_replay_update_put_ratio) {
-        pr_info("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
-                uvm_parent_gpu_name(parent_gpu),
-                uvm_perf_fault_replay_update_put_ratio,
-                replayable_faults->replay_update_put_ratio);
+        UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
+                       uvm_parent_gpu_name(parent_gpu),
+                       uvm_perf_fault_replay_update_put_ratio,
+                       replayable_faults->replay_update_put_ratio);
    }

    // Re-enable fault prefetching just in case it was disabled in a previous run
-    parent_gpu->fault_buffer_info.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
+    parent_gpu->fault_buffer.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;

    fault_buffer_reinit_replayable_faults(parent_gpu);

@@ -222,7 +222,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp

 static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;

    if (batch_context->fault_cache) {
@@ -230,9 +230,9 @@ static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
        uvm_tracker_deinit(&replayable_faults->replay_tracker);
    }

-    if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
+    if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
        // Re-enable prefetch faults in case we disabled them
-        if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer_info.prefetch_faults_enabled)
+        if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer.prefetch_faults_enabled)
            parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
    }

@@ -252,7 +252,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
    UVM_ASSERT(parent_gpu->replayable_faults_supported);

    status = uvm_rm_locked_call(nvUvmInterfaceInitFaultInfo(parent_gpu->rm_device,
-                                                            &parent_gpu->fault_buffer_info.rm_info));
+                                                            &parent_gpu->fault_buffer.rm_info));
    if (status != NV_OK) {
        UVM_ERR_PRINT("Failed to init fault buffer info from RM: %s, GPU %s\n",
                      nvstatusToString(status),
@@ -262,7 +262,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
        // when it returns an error. Set the buffer handle to zero as it is
        // used by the deinitialization logic to determine if it was correctly
        // initialized.
-        parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
+        parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
        goto fail;
    }

@@ -304,24 +304,25 @@ void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)

    fault_buffer_deinit_replayable_faults(parent_gpu);

-    if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
+    if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
        status = uvm_rm_locked_call(nvUvmInterfaceOwnPageFaultIntr(parent_gpu->rm_device, NV_FALSE));
        UVM_ASSERT(status == NV_OK);

        uvm_rm_locked_call_void(nvUvmInterfaceDestroyFaultInfo(parent_gpu->rm_device,
-                                                               &parent_gpu->fault_buffer_info.rm_info));
+                                                               &parent_gpu->fault_buffer.rm_info));

-        parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
+        parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
    }
 }

 bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;

    UVM_ASSERT(parent_gpu->replayable_faults_supported);

-    // Fast path 1: we left some faults unserviced in the buffer in the last pass
+    // Fast path 1: we left some faults unserviced in the buffer in the last
+    // pass
    if (replayable_faults->cached_get != replayable_faults->cached_put)
        return true;

@@ -357,7 +358,7 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
 {
    NV_STATUS status;
    uvm_push_t push;
-    uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer_info.replayable.replay_tracker;
+    uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer.replayable.replay_tracker;

    UVM_ASSERT(tracker != NULL);

@@ -443,7 +444,7 @@ static NV_STATUS cancel_fault_precise_va(uvm_fault_buffer_entry_t *fault_entry,
    uvm_gpu_t *gpu = fault_entry->gpu;
    uvm_gpu_phys_address_t pdb;
    uvm_push_t push;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    NvU64 offset;

    UVM_ASSERT(gpu->parent->replayable_faults_supported);
@@ -452,7 +453,7 @@ static NV_STATUS cancel_fault_precise_va(uvm_fault_buffer_entry_t *fault_entry,

    gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
    UVM_ASSERT(gpu_va_space);
-    pdb = uvm_page_tree_pdb(&gpu_va_space->page_tables)->addr;
+    pdb = uvm_page_tree_pdb_address(&gpu_va_space->page_tables);

    // Record fatal fault event
    uvm_tools_record_gpu_fatal_fault(gpu->id, va_space, fault_entry, fault_entry->fatal_reason);
@@ -505,7 +506,7 @@ static NV_STATUS push_replay_on_gpu(uvm_gpu_t *gpu,
 {
    NV_STATUS status;
    uvm_push_t push;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    uvm_tracker_t *tracker = NULL;

    if (batch_context)
@@ -556,7 +557,7 @@ static NV_STATUS push_replay_on_parent_gpu(uvm_parent_gpu_t *parent_gpu,

 static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
 {
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;

    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));

@@ -589,7 +590,7 @@ static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu, hw_f
        return NV_OK;

    is_flush_mode_move = (NvBool) (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
-    status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer_info.rm_info, is_flush_mode_move);
+    status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer.rm_info, is_flush_mode_move);

    UVM_ASSERT(status == NV_OK);

@@ -618,7 +619,7 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
    NvU32 get;
    NvU32 put;
    uvm_spin_loop_t spin;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    NV_STATUS status;

    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
@@ -852,7 +853,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
    uvm_fault_buffer_entry_t *fault_cache;
    uvm_spin_loop_t spin;
    NV_STATUS status = NV_OK;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    const bool in_pascal_cancel_path = (!parent_gpu->fault_cancel_va_supported && fetch_mode == FAULT_FETCH_MODE_ALL);
    const bool may_filter = uvm_perf_fault_coalesce && !in_pascal_cancel_path;

@@ -887,7 +888,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,

    // Parse until get != put and have enough space to cache.
    while ((get != put) &&
-           (fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer_info.max_batch_size)) {
+           (fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer.max_batch_size)) {
        bool is_same_instance_ptr = true;
        uvm_fault_buffer_entry_t *current_entry = &fault_cache[fault_index];
        uvm_fault_utlb_info_t *current_tlb;
@@ -1385,7 +1386,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
    uvm_page_index_t last_page_index;
    NvU32 page_fault_count = 0;
    uvm_range_group_range_iter_t iter;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    uvm_fault_buffer_entry_t **ordered_fault_cache = batch_context->ordered_fault_cache;
    uvm_fault_buffer_entry_t *first_fault_entry = ordered_fault_cache[first_fault_index];
    uvm_service_block_context_t *block_context = &replayable_faults->block_service_context;
@@ -1612,7 +1613,7 @@ static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
    NV_STATUS status;
    uvm_va_block_retry_t va_block_retry;
    NV_STATUS tracker_status;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    uvm_service_block_context_t *fault_block_context = &replayable_faults->block_service_context;

    fault_block_context->operation = UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS;
@@ -1803,7 +1804,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
    uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
    uvm_gpu_t *gpu = gpu_va_space->gpu;
    bool replay_per_va_block =
-                        (gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
+                        (gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);

    UVM_ASSERT(vma);

@@ -1851,8 +1852,8 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,

        page_index = (fault_address - sub_batch_base) / PAGE_SIZE;

-        // Do not check for coalesced access type. If there are multiple different
-        // accesses to an address, we can disregard the prefetch one.
+        // Do not check for coalesced access type. If there are multiple
+        // different accesses to an address, we can disregard the prefetch one.
        if ((access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) &&
            (uvm_fault_access_type_mask_highest(current_entry->access_type_mask) == UVM_FAULT_ACCESS_TYPE_PREFETCH))
            uvm_page_mask_set(prefetch_only_fault_mask, page_index);
@@ -1956,19 +1957,19 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
    uvm_va_block_t *va_block;
    uvm_gpu_t *gpu = gpu_va_space->gpu;
    uvm_va_block_context_t *va_block_context =
-        gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
+        gpu->parent->fault_buffer.replayable.block_service_context.block_context;
    uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[fault_index];
    struct mm_struct *mm = va_block_context->mm;
    NvU64 fault_address = current_entry->fault_address;

    (*block_faults) = 0;

-    va_range_next = uvm_va_space_iter_first(va_space, fault_address, ~0ULL);
+    va_range_next = uvm_va_space_iter_gmmu_mappable_first(va_space, fault_address);
    if (va_range_next && (fault_address >= va_range_next->node.start)) {
        UVM_ASSERT(fault_address < va_range_next->node.end);

        va_range = va_range_next;
-        va_range_next = uvm_va_space_iter_next(va_range_next, ~0ULL);
+        va_range_next = uvm_va_range_gmmu_mappable_next(va_range);
    }

    if (va_range)
@@ -1985,7 +1986,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
        NvU64 outer = ~0ULL;

         UVM_ASSERT(replay_per_va_block ==
-                    (gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
+                    (gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));

        // Limit outer to the minimum of next va_range.start and first
        // fault_address' next UVM_GMMU_ATS_GRANULARITY alignment so that it's
@@ -2046,8 +2047,8 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
    uvm_gpu_t *gpu = batch_context->fatal_gpu;
    uvm_gpu_va_space_t *gpu_va_space = NULL;
    struct mm_struct *mm;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
-    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.replayable.block_service_context;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
+    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.replayable.block_service_context;
    uvm_va_block_context_t *va_block_context = service_context->block_context;

    UVM_ASSERT(va_space);
@@ -2155,7 +2156,7 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
            ++i;
        }
        else {
-            uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
+            uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer.replayable.ats_invalidate;
            NvU32 block_faults;
            const bool hmm_migratable = true;

@@ -2236,12 +2237,12 @@ static NV_STATUS service_fault_batch(uvm_parent_gpu_t *parent_gpu,
    NvU32 i;
    uvm_va_space_t *va_space = NULL;
    uvm_gpu_va_space_t *prev_gpu_va_space = NULL;
-    uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer_info.replayable.ats_invalidate;
+    uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer.replayable.ats_invalidate;
    struct mm_struct *mm = NULL;
    const bool replay_per_va_block = service_mode != FAULT_SERVICE_MODE_CANCEL &&
-                                     parent_gpu->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
+                                     parent_gpu->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
    uvm_service_block_context_t *service_context =
-        &parent_gpu->fault_buffer_info.replayable.block_service_context;
+        &parent_gpu->fault_buffer.replayable.block_service_context;
    uvm_va_block_context_t *va_block_context = service_context->block_context;
    bool hmm_migratable = true;

@@ -2711,8 +2712,9 @@ static void cancel_fault_batch(uvm_parent_gpu_t *parent_gpu,
 //   5- Fetch all faults from buffer
 //   6- Check what uTLBs are in lockdown mode and can be cancelled
 //   7- Preprocess faults (order per va_space, fault address, access type)
-//   8- Service all non-fatal faults and mark all non-serviceable faults as fatal
-//      6.1- If fatal faults are not found, we are done
+//   8- Service all non-fatal faults and mark all non-serviceable faults as
+//      fatal.
+//      8.1- If fatal faults are not found, we are done
 //   9- Search for a uTLB which can be targeted for cancel, as described in
 //      try_to_cancel_utlbs. If found, cancel it.
 // END LOOP
@@ -2726,14 +2728,14 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
 {
    NV_STATUS status;
    NV_STATUS tracker_status;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
    bool first = true;

    UVM_ASSERT(gpu->parent->replayable_faults_supported);

    // 1) Disable prefetching to avoid new requests keep coming and flooding
    //    the buffer
-    if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
+    if (gpu->parent->fault_buffer.prefetch_faults_enabled)
        gpu->parent->arch_hal->disable_prefetch_faults(gpu->parent);

    while (1) {
@@ -2847,7 +2849,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
    }

    // 10) Re-enable prefetching
-    if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
+    if (gpu->parent->fault_buffer.prefetch_faults_enabled)
        gpu->parent->arch_hal->enable_prefetch_faults(gpu->parent);

    if (status == NV_OK)
@@ -2884,16 +2886,16 @@ static void enable_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu,
    // comment in mark_fault_invalid_prefetch(..).
    // Some tests rely on this logic (and ratio) to correctly disable prefetch
    // fault reporting. If the logic changes, the tests will have to be changed.
-    if (parent_gpu->fault_buffer_info.prefetch_faults_enabled &&
+    if (parent_gpu->fault_buffer.prefetch_faults_enabled &&
        uvm_perf_reenable_prefetch_faults_lapse_msec > 0 &&
-        ((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer_info.max_batch_size * 2) ||
+        ((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer.max_batch_size * 2) ||
         (uvm_enable_builtin_tests &&
          parent_gpu->rm_info.isSimulated &&
          batch_context->num_invalid_prefetch_faults > 5))) {
        uvm_parent_gpu_disable_prefetch_faults(parent_gpu);
    }
-    else if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
-        NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp;
+    else if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
+        NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer.disable_prefetch_faults_timestamp;

        // Reenable prefetch faults after some time
        if (lapse > ((NvU64)uvm_perf_reenable_prefetch_faults_lapse_msec * (1000 * 1000)))
@@ -2907,7 +2909,7 @@ void uvm_parent_gpu_service_replayable_faults(uvm_parent_gpu_t *parent_gpu)
    NvU32 num_batches = 0;
    NvU32 num_throttled = 0;
    NV_STATUS status = NV_OK;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
+    uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
    uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;

    UVM_ASSERT(parent_gpu->replayable_faults_supported);
@@ -3030,9 +3032,9 @@ void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
    UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
    UVM_ASSERT(parent_gpu->prefetch_fault_supported);

-    if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
+    if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
        parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
-        parent_gpu->fault_buffer_info.prefetch_faults_enabled = true;
+        parent_gpu->fault_buffer.prefetch_faults_enabled = true;
    }
 }

@@ -3041,10 +3043,10 @@ void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
    UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
    UVM_ASSERT(parent_gpu->prefetch_fault_supported);

-    if (parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
+    if (parent_gpu->fault_buffer.prefetch_faults_enabled) {
        parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
-        parent_gpu->fault_buffer_info.prefetch_faults_enabled = false;
-        parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp = NV_GETTIME();
+        parent_gpu->fault_buffer.prefetch_faults_enabled = false;
+        parent_gpu->fault_buffer.disable_prefetch_faults_timestamp = NV_GETTIME();
    }
 }

--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
@@ -792,7 +792,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
    //
    // Notably as of 4.3, atomic64_set_release() and atomic64_read_acquire()
    // have been added that are exactly what we need and could be slightly
-    // faster on arm and powerpc than the implementation below. But at least in
+    // faster on arm than the implementation below. But at least in
    // 4.3 the implementation looks broken for arm32 (it maps directly to
    // smp_load_acquire() and that doesn't support 64-bit reads on 32-bit
    // architectures) so instead of dealing with that just use a slightly bigger
--- a/kernel-open/nvidia-uvm/uvm_hal.c
+++ b/kernel-open/nvidia-uvm/uvm_hal.c
@@ -217,7 +217,6 @@ static uvm_hal_class_ops_t host_table[] =
            .clear_faulted_channel_method = uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported,
            .clear_faulted_channel_register = uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported,
            .access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
-            .access_counter_clear_type = uvm_hal_maxwell_access_counter_clear_type_unsupported,
            .access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
            .get_time = uvm_hal_maxwell_get_time,
        }
@@ -254,9 +253,6 @@ static uvm_hal_class_ops_t host_table[] =
            .replay_faults = uvm_hal_volta_replay_faults,
            .cancel_faults_va = uvm_hal_volta_cancel_faults_va,
            .clear_faulted_channel_method = uvm_hal_volta_host_clear_faulted_channel_method,
-            .access_counter_clear_all = uvm_hal_volta_access_counter_clear_all,
-            .access_counter_clear_type = uvm_hal_volta_access_counter_clear_type,
-            .access_counter_clear_targeted = uvm_hal_volta_access_counter_clear_targeted,
            .semaphore_timestamp = uvm_hal_volta_host_semaphore_timestamp,
        }
    },
@@ -271,6 +267,8 @@ static uvm_hal_class_ops_t host_table[] =
            .tlb_invalidate_all = uvm_hal_turing_host_tlb_invalidate_all,
            .tlb_invalidate_va = uvm_hal_turing_host_tlb_invalidate_va,
            .tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
+            .access_counter_clear_all = uvm_hal_turing_access_counter_clear_all,
+            .access_counter_clear_targeted = uvm_hal_turing_access_counter_clear_targeted,
        }
    },
    {
@@ -409,6 +407,32 @@ static uvm_hal_class_ops_t arch_table[] =
    },
 };

+// chip_table[] is different from the other class op tables - it is used to
+// apply chip specific overrides to arch ops. This means unlike the other class
+// op tables, parent_id does not refer to a preceding entry within the table
+// itself. parent_id is an architecture (not a chip id) and instead refers to an
+// entry in arch_table[]. This means that arch_table[] must be initialized
+// before chip_table[]. chip_table[] must be initialized using
+// ops_init_from_table(arch_table) instead of ops_init_from_parent().
+// TODO: BUG 5044266: the chip ops should be separated from the arch ops.
+static uvm_hal_class_ops_t chip_table[] =
+{
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 | NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB10B,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
+        .u.arch_ops = {
+            .mmu_mode_hal = uvm_hal_mmu_mode_blackwell_integrated,
+        }
+    },
+    {
+        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 | NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GB20B,
+        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200,
+        .u.arch_ops = {
+            .mmu_mode_hal = uvm_hal_mmu_mode_blackwell_integrated,
+        }
+    },
+};
+
 static uvm_hal_class_ops_t fault_buffer_table[] =
 {
    {
@@ -537,22 +561,19 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
    {
        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
-        .u.access_counter_buffer_ops = {
-            .enable_access_counter_notifications  = uvm_hal_volta_enable_access_counter_notifications,
-            .disable_access_counter_notifications = uvm_hal_volta_disable_access_counter_notifications,
-            .clear_access_counter_notifications = uvm_hal_volta_clear_access_counter_notifications,
-            .parse_entry = uvm_hal_volta_access_counter_buffer_parse_entry,
-            .entry_is_valid = uvm_hal_volta_access_counter_buffer_entry_is_valid,
-            .entry_clear_valid = uvm_hal_volta_access_counter_buffer_entry_clear_valid,
-            .entry_size = uvm_hal_volta_access_counter_buffer_entry_size,
-        }
+        .u.access_counter_buffer_ops = {}
    },
    {
        .id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
        .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
        .u.access_counter_buffer_ops = {
+            .enable_access_counter_notifications  = uvm_hal_turing_enable_access_counter_notifications,
            .disable_access_counter_notifications = uvm_hal_turing_disable_access_counter_notifications,
            .clear_access_counter_notifications = uvm_hal_turing_clear_access_counter_notifications,
+            .parse_entry = uvm_hal_turing_access_counter_buffer_parse_entry,
+            .entry_is_valid = uvm_hal_turing_access_counter_buffer_entry_is_valid,
+            .entry_clear_valid = uvm_hal_turing_access_counter_buffer_entry_clear_valid,
+            .entry_size = uvm_hal_turing_access_counter_buffer_entry_size,
        }
    },
    {
@@ -675,33 +696,35 @@ static inline void op_copy(uvm_hal_class_ops_t *dst, uvm_hal_class_ops_t *src, N
    memcpy(m_dst, m_src, sizeof(void *));
 }

-static inline NV_STATUS ops_init_from_parent(uvm_hal_class_ops_t *table,
-                                             NvU32 row_count,
-                                             NvLength op_count,
-                                             NvLength op_offset)
+static inline NV_STATUS ops_init_from_table(uvm_hal_class_ops_t *dest_table,
+                                            NvU32 dest_row_count,
+                                            uvm_hal_class_ops_t *src_table,
+                                            NvU32 src_row_count,
+                                            NvLength op_count,
+                                            NvLength op_offset)
 {
    NvLength i;

-    for (i = 0; i < row_count; i++) {
+    for (i = 0; i < dest_row_count; i++) {
        NvLength j;
        uvm_hal_class_ops_t *parent = NULL;

-        if (table[i].parent_id != 0) {
-            parent = ops_find_by_id(table, i, table[i].parent_id);
+        if (dest_table[i].parent_id != 0) {
+            parent = ops_find_by_id(src_table, src_row_count, dest_table[i].parent_id);
            if (parent == NULL)
                return NV_ERR_INVALID_CLASS;

            // Go through all the ops and assign from parent's corresponding op
            // if NULL
            for (j = 0; j < op_count; j++) {
-                if (op_is_null(table + i, j, op_offset))
-                    op_copy(table + i, parent, j, op_offset);
+                if (op_is_null(dest_table + i, j, op_offset))
+                    op_copy(dest_table + i, parent, j, op_offset);
            }
        }

        // At this point, it is an error to have missing HAL operations
        for (j = 0; j < op_count; j++) {
-            if (op_is_null(table + i, j, op_offset))
+            if (op_is_null(dest_table + i, j, op_offset))
                return NV_ERR_INVALID_STATE;
        }
    }
@@ -709,6 +732,19 @@ static inline NV_STATUS ops_init_from_parent(uvm_hal_class_ops_t *table,
    return NV_OK;
 }

+static inline NV_STATUS ops_init_from_parent(uvm_hal_class_ops_t *table,
+                                             NvU32 row_count,
+                                             NvLength op_count,
+                                             NvLength op_offset)
+{
+    return ops_init_from_table(table,
+                               row_count,
+                               table,
+                               row_count,
+                               op_count,
+                               op_offset);
+}
+
 NV_STATUS uvm_hal_init_table(void)
 {
    NV_STATUS status;
@@ -737,6 +773,18 @@ NV_STATUS uvm_hal_init_table(void)
        return status;
    }

+    // chip_table[] must be initialized after arch_table[].
+    status = ops_init_from_table(chip_table,
+                                 ARRAY_SIZE(chip_table),
+                                 arch_table,
+                                 ARRAY_SIZE(arch_table),
+                                 ARCH_OP_COUNT,
+                                 offsetof(uvm_hal_class_ops_t, u.arch_ops));
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("ops_init_from_table(chip_table) failed: %s\n", nvstatusToString(status));
+        return status;
+    }
+
    status = ops_init_from_parent(fault_buffer_table,
                                  ARRAY_SIZE(fault_buffer_table),
                                  FAULT_BUFFER_OP_COUNT,
@@ -802,6 +850,13 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->arch_hal = &class_ops->u.arch_ops;

+    // Apply per chip overrides if required
+    class_ops = ops_find_by_id(chip_table,
+                               ARRAY_SIZE(chip_table),
+                               gpu_info->gpuArch | gpu_info->gpuImplementation);
+    if (class_ops)
+        parent_gpu->arch_hal = &class_ops->u.arch_ops;
+
    class_ops = ops_find_by_id(fault_buffer_table, ARRAY_SIZE(fault_buffer_table), gpu_info->gpuArch);
    if (class_ops == NULL) {
        UVM_ERR_PRINT("Fault buffer HAL not found, GPU %s, arch: 0x%X\n",
@@ -843,10 +898,14 @@ static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
    // Computing.
    //
    // TODO: Bug 200692962: Add support for access counters in vGPU
-    if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled) {
+    if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled)
        parent_gpu->access_counters_supported = false;
-        parent_gpu->access_counters_can_use_physical_addresses = false;
-    }
+
+
+    // TODO: Bug 4637114: [UVM] Remove support for physical access counter
+    // notifications. Always set to false, until we remove the PMM reverse
+    // mapping code.
+    parent_gpu->access_counters_can_use_physical_addresses = false;
 }

 void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
@@ -1042,36 +1101,15 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
    UVM_DBG_PRINT("    timestamp:                    %llu\n", entry->timestamp);
 }

-const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type)
-{
-    BUILD_BUG_ON(UVM_ACCESS_COUNTER_TYPE_MAX != 2);
-
-    switch (access_counter_type) {
-        UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MIMC);
-        UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MOMC);
-        UVM_ENUM_STRING_DEFAULT();
-    }
-}
-
 void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
 {
-    if (!entry->address.is_virtual) {
-        UVM_DBG_PRINT("physical address: {0x%llx:%s}\n",
-                      entry->address.address,
-                      uvm_aperture_string(entry->address.aperture));
-    }
-    else {
-        UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
-        UVM_DBG_PRINT("    instance_ptr    {0x%llx:%s}\n",
-                      entry->virtual_info.instance_ptr.address,
-                      uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
-        UVM_DBG_PRINT("    mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
-        UVM_DBG_PRINT("    mmu_engine_id   %u\n", entry->virtual_info.mmu_engine_id);
-        UVM_DBG_PRINT("    ve_id           %u\n", entry->virtual_info.ve_id);
-    }
-
-    UVM_DBG_PRINT("    is_virtual      %u\n", entry->address.is_virtual);
-    UVM_DBG_PRINT("    counter_type    %s\n", uvm_access_counter_type_string(entry->counter_type));
+    UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address);
+    UVM_DBG_PRINT("    instance_ptr    {0x%llx:%s}\n",
+                  entry->instance_ptr.address,
+                  uvm_aperture_string(entry->instance_ptr.aperture));
+    UVM_DBG_PRINT("    mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->mmu_engine_type));
+    UVM_DBG_PRINT("    mmu_engine_id   %u\n", entry->mmu_engine_id);
+    UVM_DBG_PRINT("    ve_id           %u\n", entry->ve_id);
    UVM_DBG_PRINT("    counter_value   %u\n", entry->counter_value);
    UVM_DBG_PRINT("    subgranularity  0x%08x\n", entry->sub_granularity);
    UVM_DBG_PRINT("    bank            %u\n", entry->bank);
--- a/kernel-open/nvidia-uvm/uvm_hal.h
+++ b/kernel-open/nvidia-uvm/uvm_hal.h
@@ -494,6 +494,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU64 big_page_size);
 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size);
 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size);
 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size);
+uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell_integrated(NvU64 big_page_size);

 void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
@@ -686,54 +687,52 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry);
 void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry);

 // Access counters
-typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
-typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
-typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
+typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
+typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters, NvU32 get);

 // Parse the entry on the given buffer index. This also clears the valid bit of
 // the entry in the buffer.
-typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_parent_gpu_t *parent_gpu,
+typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_access_counter_buffer_t *access_counters,
                                                            NvU32 index,
                                                            uvm_access_counter_buffer_entry_t *buffer_entry);
-typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
-typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_access_counter_buffer_t *access_counters,
+                                                               NvU32 index);
+typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_access_counter_buffer_t *access_counters,
+                                                                  NvU32 index);
 typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
 typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push);
-typedef void (*uvm_hal_access_counter_clear_type_t)(uvm_push_t *push, uvm_access_counter_type_t type);
 typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push,
                                                        const uvm_access_counter_buffer_entry_t *buffer_entry);

-void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
-void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
+void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
+void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                    NvU32 get);
+void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
                                                                   NvU32 index,
                                                                   uvm_access_counter_buffer_entry_t *buffer_entry);
-bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
-void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
+bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                      NvU32 index);
+void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                         NvU32 index);
 NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
 void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push);
-void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type);
 void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
                                                               const uvm_access_counter_buffer_entry_t *buffer_entry);

-void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
-void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
-                                                     NvU32 index,
-                                                     uvm_access_counter_buffer_entry_t *buffer_entry);
-bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
-void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
-NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
-
-void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push);
-void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type);
-void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
-                                                 const uvm_access_counter_buffer_entry_t *buffer_entry);
-
-void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
-void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
+void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
+void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
+void uvm_hal_turing_clear_access_counter_notifications(uvm_access_counter_buffer_t *access_counters, NvU32 get);
+void uvm_hal_turing_access_counter_buffer_parse_entry(uvm_access_counter_buffer_t *access_counters,
+                                                      NvU32 index,
+                                                      uvm_access_counter_buffer_entry_t *buffer_entry);
+bool uvm_hal_turing_access_counter_buffer_entry_is_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
+void uvm_hal_turing_access_counter_buffer_entry_clear_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
+NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
+void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push);
+void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
+                                                  const uvm_access_counter_buffer_entry_t *buffer_entry);

 // The source and destination addresses must be 16-byte aligned. Note that the
 // best performance is achieved with 256-byte alignment. The decrypt size must
@@ -786,7 +785,6 @@ struct uvm_host_hal_struct
    uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_method;
    uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register;
    uvm_hal_access_counter_clear_all_t access_counter_clear_all;
-    uvm_hal_access_counter_clear_type_t access_counter_clear_type;
    uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted;
    uvm_hal_get_time_t get_time;
 };
@@ -866,7 +864,8 @@ struct uvm_sec2_hal_struct

 typedef struct
 {
-    // id is either a hardware class or GPU architecture
+    // TODO: BUG 5044266: the chip ops should be separated from the arch ops.
+    // id is either a hardware class, a chip or a GPU architecture
    NvU32 id;
    NvU32 parent_id;
    union
@@ -877,7 +876,7 @@ typedef struct
        // ce_ops: id is a hardware class
        uvm_ce_hal_t ce_ops;

-        // arch_ops: id is an architecture
+        // arch_ops: id is an architecture or a chip
        uvm_arch_hal_t arch_ops;

        // fault_buffer_ops: id is an architecture
--- a/kernel-open/nvidia-uvm/uvm_hal_types.h
+++ b/kernel-open/nvidia-uvm/uvm_hal_types.h
@@ -471,69 +471,34 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
    return max(membar_1, membar_2);
 }

-typedef enum
-{
-    UVM_ACCESS_COUNTER_TYPE_MIMC = 0,
-    UVM_ACCESS_COUNTER_TYPE_MOMC,
-
-    UVM_ACCESS_COUNTER_TYPE_MAX,
-} uvm_access_counter_type_t;
-
-const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type);
-
 struct uvm_access_counter_buffer_entry_struct
 {
-    // Whether this counter refers to outbound accesses to remote GPUs or
-    // sysmem (MIMC), or it refers to inbound accesses from CPU or a non-peer
-    // GPU (whose accesses are routed through the CPU, too) to vidmem (MOMC)
-    uvm_access_counter_type_t counter_type;
-
    // Address of the region for which a notification was sent
-    uvm_gpu_address_t address;
+    NvU64 address;

-    union
-    {
-        // These fields are only valid if address.is_virtual is true
-        struct
-        {
-            // Instance pointer of one of the channels in the TSG that triggered
-            // the notification.
-            uvm_gpu_phys_address_t instance_ptr;
+    // Instance pointer of one of the channels in the TSG that triggered
+    // the notification.
+    uvm_gpu_phys_address_t instance_ptr;

-            uvm_mmu_engine_type_t mmu_engine_type;
+    uvm_mmu_engine_type_t mmu_engine_type;

-            NvU32 mmu_engine_id;
+    NvU32 mmu_engine_id;

-            // Identifier of the subcontext that performed the memory accesses
-            // that triggered the notification. This value, combined with the
-            // instance_ptr, is needed to obtain the GPU VA space of the process
-            // that triggered the notification.
-            NvU32 ve_id;
+    // Identifier of the subcontext that performed the memory accesses
+    // that triggered the notification. This value, combined with the
+    // instance_ptr, is needed to obtain the GPU VA space of the process
+    // that triggered the notification.
+    NvU32 ve_id;

-            // VA space for the address that triggered the notification
-            uvm_va_space_t *va_space;
-        } virtual_info;
+    // VA space for the address that triggered the notification
+    uvm_va_space_t *va_space;

-        // These fields are only valid if address.is_virtual is false
-        struct
-        {
-            // Processor id where data is resident
-            //
-            // Although this information is not tied to a VA space, we can use
-            // a regular processor id because P2P is not allowed between
-            // partitioned GPUs.
-            uvm_processor_id_t resident_id;
-
-        } physical_info;
-    };
-
-    // This is the GPU that triggered the notification. Note that physical
-    // address based notifications are only supported on non-MIG-capable GPUs.
+    // This is the GPU that triggered the notification.
    uvm_gpu_t *gpu;

    // Number of times the tracked region was accessed since the last time it
    // was cleared. Counter values saturate at the maximum value supported by
-    // the GPU (2^16 - 1 in Volta)
+    // the GPU (2^16 - 1 on Turing)
    NvU32 counter_value;

    // When the granularity of the tracked regions is greater than 64KB, the
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
@@ -34,8 +34,9 @@ MODULE_PARM_DESC(uvm_disable_hmm,
                 "enabled if is not supported in this driver build "
                 "configuration, or if ATS settings conflict with HMM.");
 #else
-// So far, we've only tested HMM on x86_64, so disable it by default everywhere
-// else.
+// TODO: Bug 4103580: UVM: HMM: implement HMM support on ARM64 (aarch64)
+// So far, we've only tested HMM on x86_64 and aarch64 and it is broken on
+// aarch64 so disable it by default everywhere except x86_64.
 static bool uvm_disable_hmm = true;
 MODULE_PARM_DESC(uvm_disable_hmm,
                 "Force-disable HMM functionality in the UVM driver. "
@@ -186,7 +187,7 @@ static NV_STATUS hmm_copy_devmem_page(struct page *dst_page, struct page *src_pa
    if (status != NV_OK)
        goto out;

-    status = uvm_parent_gpu_map_cpu_pages(gpu->parent, dst_page, PAGE_SIZE, &dma_addr);
+    status = uvm_gpu_map_cpu_page(gpu, dst_page, &dma_addr);
    if (status != NV_OK)
        goto out_unmap_gpu;

--- a/kernel-open/nvidia-uvm/uvm_hopper.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper.c
@@ -50,12 +50,10 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_hopper_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
-                                                          parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
-                                                                           (sizeof(dummy->fault_source.utlb_id) * 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Hopper covers 64 PB and that's the minimum
@@ -97,10 +95,6 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->non_replayable_faults_supported = true;

-    parent_gpu->access_counters_supported = true;
-
-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
--- a/kernel-open/nvidia-uvm/uvm_hopper_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_ce.c
@@ -393,9 +393,13 @@ bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push,
 bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
 {
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
-    const bool peer_copy = uvm_gpu_address_is_peer(gpu, dst) || uvm_gpu_address_is_peer(gpu, src);

-    if (push->channel && peer_copy && !uvm_channel_is_p2p(push->channel)) {
+    if (uvm_gpu_address_is_peer(gpu, src)) {
+        UVM_ERR_PRINT("Peer copy from peer address (0x%llx) is not allowed!", src.address);
+        return false;
+    }
+
+    if (push->channel && uvm_gpu_address_is_peer(gpu, dst) && !uvm_channel_is_p2p(push->channel)) {
        UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
                      src.address,
                      dst.address);
--- a/kernel-open/nvidia-uvm/uvm_hopper_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_mmu.c
@@ -212,7 +212,13 @@ static NvU64 make_pte_hopper(uvm_aperture_t aperture, NvU64 address, uvm_prot_t

 static NvU64 make_sked_reflected_pte_hopper(void)
 {
+    // On discrete GPUs, SKED Reflected PTEs may use either the local aperture
+    // or the system non coherent aperture. However, integrated GPUs may only
+    // use the system non-coherent aperture. We always use the system
+    // non-coherent aperture as that is common to both discrete and integrated
+    // GPUs.
    return HWCONST64(_MMU_VER3, PTE, VALID, TRUE) |
+           HWCONST64(_MMU_VER3, PTE, APERTURE, SYSTEM_NON_COHERENT_MEMORY) |
           HWVALUE64(_MMU_VER3, PTE, PCF, pte_pcf(UVM_PROT_READ_WRITE_ATOMIC, UVM_MMU_PTE_FLAGS_NONE)) |
           HWVALUE64(_MMU_VER3, PTE, KIND, NV_MMU_PTE_KIND_SMSKED_MESSAGE);
 }
@@ -323,11 +329,6 @@ static NvU32 pde_pcf(bool valid, pde_type_t pde_type, uvm_page_directory_t *dir,
    if (!g_uvm_global.ats.enabled)
        return pcf[pde_type][ATS_ALLOWED];

-    // We assume all supported ATS platforms use canonical form address.
-    // See comments in uvm_gpu.c:uvm_gpu_can_address() and in
-    // uvm_mmu.c:page_tree_ats_init();
-    UVM_ASSERT(uvm_platform_uses_canonical_form_address());
-
    // Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
    // ATS and GMMU page tables. For managed memory we need to prevent this
    // parallel lookup since we would not get any GPU fault if the CPU has
--- a/kernel-open/nvidia-uvm/uvm_ioctl.h
+++ b/kernel-open/nvidia-uvm/uvm_ioctl.h
@@ -526,25 +526,6 @@ typedef struct
    NV_STATUS rmStatus;                       // OUT
 } UVM_MEM_MAP_PARAMS;

-//
-// UvmDebugAccessMemory
-//
-#define UVM_DEBUG_ACCESS_MEMORY                                       UVM_IOCTL_BASE(36)
-
-typedef struct
-{
-#ifdef __linux__
-    NvS32               sessionIndex;                    // IN
-#endif
-    NvU64               baseAddress   NV_ALIGN_BYTES(8); // IN
-    NvU64               sizeInBytes   NV_ALIGN_BYTES(8); // IN
-    NvU32               accessType;                      // IN (UvmDebugAccessType)
-    NvU64               buffer        NV_ALIGN_BYTES(8); // IN/OUT
-    NvBool              isBitmaskSet;                    // OUT
-    NvU64               bitmask       NV_ALIGN_BYTES(8); // IN/OUT
-    NV_STATUS           rmStatus;                        // OUT
-} UVM_DEBUG_ACCESS_MEMORY_PARAMS;
-
 //
 // UvmRegisterGpu
 //
@@ -1009,20 +990,35 @@ typedef struct
 //
 #define UVM_POPULATE_PAGEABLE                                         UVM_IOCTL_BASE(71)

-// Allow population of managed ranges.
-//
-// The UVM driver must have builtin tests enabled for the API to use the
-// following two flags.
+// Allow population of managed ranges. The goal is to validate that it is
+// possible to populate pageable ranges backed by VMAs with the VM_MIXEDMAP or
+// VM_DONTEXPAND special flags set. But since there is no portable way to force
+// allocation of such memory from user space, and it is not safe to change the
+// flags of an already-created VMA from kernel space, we take advantage of the
+// fact that managed ranges have both special flags set at creation time (see
+// uvm_mmap).
 #define UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED              0x00000001

 // By default UVM_POPULATE_PAGEABLE returns an error if the destination vma
 // does not have read permission. This flag skips that check.
 #define UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK            0x00000002

-#define UVM_POPULATE_PAGEABLE_FLAGS_TEST_ALL    (UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED | \
+// By default UVM_POPULATE_PAGEABLE returns an error if the destination vma
+// is VM_IO or VM_PFNMAP. This flag skips that check.
+#define UVM_POPULATE_PAGEABLE_FLAG_ALLOW_SPECIAL              0x00000004
+
+// These flags are used internally within the driver and are not allowed from
+// user space.
+#define UVM_POPULATE_PAGEABLE_FLAGS_INTERNAL    UVM_POPULATE_PAGEABLE_FLAG_ALLOW_SPECIAL
+
+// These flags are allowed from user space only when builtin tests are enabled.
+// Some of them may also be used internally within the driver in non-test use
+// cases.
+#define UVM_POPULATE_PAGEABLE_FLAGS_TEST        (UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED | \
                                                 UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK)

-#define UVM_POPULATE_PAGEABLE_FLAGS_ALL         UVM_POPULATE_PAGEABLE_FLAGS_TEST_ALL
+#define UVM_POPULATE_PAGEABLE_FLAGS_ALL         (UVM_POPULATE_PAGEABLE_FLAGS_INTERNAL | \
+                                                 UVM_POPULATE_PAGEABLE_FLAGS_TEST)

 typedef struct
 {
@@ -1142,7 +1138,6 @@ typedef struct
    NV_STATUS rmStatus;     // OUT
 } UVM_IS_8_SUPPORTED_PARAMS;

-
 #ifdef __cplusplus
 }
 #endif
--- a/kernel-open/nvidia-uvm/uvm_kvmalloc.c
+++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2020 NVIDIA Corporation
+    Copyright (c) 2016-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -111,13 +111,13 @@ void uvm_kvmalloc_exit(void)
        return;

    if (atomic_long_read(&g_uvm_leak_checker.bytes_allocated) > 0) {
-        printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
-        printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "Memory leak of %lu bytes detected.%s\n",
-                      atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
-                      uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
+        UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
+        UVM_INFO_PRINT("Memory leak of %lu bytes detected.%s\n",
+                        atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
+                        uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
                        " insmod with uvm_leak_checker=2 for detailed information." :
                        "");
-        printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
+        UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");

        if (g_uvm_global.unload_state.ptr)
            *g_uvm_global.unload_state.ptr |= UVM_TEST_UNLOAD_STATE_MEMORY_LEAK;
@@ -129,12 +129,12 @@ void uvm_kvmalloc_exit(void)
        uvm_rb_tree_for_each_safe(node, next, &g_uvm_leak_checker.allocation_info) {
            uvm_kvmalloc_info_t *info = container_of(node, uvm_kvmalloc_info_t, node);

-            printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "    Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
-                   uvm_kvsize((void *)((uintptr_t)info->node.key)),
-                   kbasename(info->file),
-                   info->line,
-                   info->function,
-                   info->node.key);
+            UVM_INFO_PRINT("    Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
+                            uvm_kvsize((void *)((uintptr_t)info->node.key)),
+                            kbasename(info->file),
+                            info->line,
+                            info->function,
+                            info->node.key);

            // Free so we don't keep eating up memory while debugging. Note that
            // this also removes the entry from the table, frees info, and drops
--- a/kernel-open/nvidia-uvm/uvm_linux.h
+++ b/kernel-open/nvidia-uvm/uvm_linux.h
@@ -76,14 +76,16 @@
 #include <linux/ratelimit.h>
 #endif

-#if defined(NV_PNV_NPU2_INIT_CONTEXT_PRESENT)
-#include <asm/powernv.h>
-#endif
-
 #if defined(NV_LINUX_SCHED_TASK_STACK_H_PRESENT)
 #include <linux/sched/task_stack.h>
 #endif

+#include "linux/bitmap.h"
+#include "linux/bitops.h"
+#include "linux/gfp.h"
+#include "linux/pagemap.h"
+#include "linux/types.h"
+
 #if !defined(NV_SG_DMA_PAGE_ITER_PRESENT)
 #include <linux/scatterlist.h>
 #endif
@@ -183,7 +185,7 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
            printk(fmt, ##__VA_ARGS__); \
    } while (0)

-#define NV_UVM_GFP_FLAGS (GFP_KERNEL)
+#define NV_UVM_GFP_FLAGS (GFP_KERNEL | __GFP_NOMEMALLOC)

 #if defined(NVCPU_X86)
 /* Some old IA32 kernels don't have 64/64 division routines,
--- a/kernel-open/nvidia-uvm/uvm_lock.c
+++ b/kernel-open/nvidia-uvm/uvm_lock.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -27,12 +27,13 @@

 const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
 {
-    BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 36);
+    BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 37);

    switch (lock_order) {
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL_PM);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ACCESS_COUNTERS);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ISR);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_MMAP_LOCK);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACES_LIST);
--- a/kernel-open/nvidia-uvm/uvm_lock.h
+++ b/kernel-open/nvidia-uvm/uvm_lock.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -69,6 +69,17 @@
 //
 //      This should be taken whenever global GPU state might need to be modified.
 //
+// - Access counters VA space enablement state lock
+//      Order: UVM_LOCK_ORDER_ACCESS_COUNTERS
+//      Exclusive lock (mutex)
+//
+//      This protects VA space state associated with access counters enablement.
+//      Blackwell+ GPUs may have multiple access counters notification buffers
+//      and their "atomic" enablement is protected by this lock.
+//
+//      This should be taken whenever VA space access counters state might need
+//      to be modified.
+//
 // - GPU ISR lock
 //      Order: UVM_LOCK_ORDER_ISR
 //      Exclusive lock (mutex) per gpu
@@ -487,6 +498,7 @@ typedef enum
    UVM_LOCK_ORDER_INVALID = 0,
    UVM_LOCK_ORDER_GLOBAL_PM,
    UVM_LOCK_ORDER_GLOBAL,
+    UVM_LOCK_ORDER_ACCESS_COUNTERS,
    UVM_LOCK_ORDER_ISR,
    UVM_LOCK_ORDER_MMAP_LOCK,
    UVM_LOCK_ORDER_VA_SPACES_LIST,
@@ -742,7 +754,8 @@ bool __uvm_locking_initialized(void);
        ret;                                            \
    })

-// Helper for calling a UVM-RM interface function that returns void with lock recording
+// Helper for calling a UVM-RM interface function that returns void with lock
+// recording
 #define uvm_rm_locked_call_void(call) ({                \
        uvm_record_lock_rm_all();                       \
        call;                                           \
--- a/kernel-open/nvidia-uvm/uvm_map_external.c
+++ b/kernel-open/nvidia-uvm/uvm_map_external.c
@@ -947,13 +947,15 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_external_t *ext
        goto error;
    }

-    // Check for the maximum page size for the mapping of vidmem allocations,
-    // the vMMU segment size may limit the range of page sizes.
-    biggest_mapping_page_size = uvm_mmu_biggest_page_size_up_to(&gpu_va_space->page_tables,
-                                                                mapping_gpu->mem_info.max_vidmem_page_size);
-    if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
-        (mapping_page_size > biggest_mapping_page_size))
-        mapping_page_size = biggest_mapping_page_size;
+    if (mapping_gpu->mem_info.size) {
+        // Check for the maximum page size for the mapping of vidmem
+        // allocations, the vMMU segment size may limit the range of page sizes.
+        biggest_mapping_page_size = uvm_mmu_biggest_page_size_up_to(&gpu_va_space->page_tables,
+                                                                    mapping_gpu->mem_info.max_vidmem_page_size);
+        if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
+            (mapping_page_size > biggest_mapping_page_size))
+            mapping_page_size = biggest_mapping_page_size;
+    }

    mem_info.pageSize = mapping_page_size;

--- a/kernel-open/nvidia-uvm/uvm_maxwell.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell.c
@@ -61,10 +61,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->non_replayable_faults_supported = false;

-    parent_gpu->access_counters_supported = false;
-
-    parent_gpu->access_counters_can_use_physical_addresses = false;
-
    parent_gpu->fault_cancel_va_supported = false;

    parent_gpu->scoped_atomics_supported = false;
--- a/kernel-open/nvidia-uvm/uvm_maxwell_access_counter_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_access_counter_buffer.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021 NVIDIA Corporation
+    Copyright (c) 2021-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -24,25 +24,29 @@
 #include "uvm_gpu.h"
 #include "uvm_hal.h"

-void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
+void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters)
 {
    UVM_ASSERT_MSG(false,
-                   "enable_access_counter_notifications is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "enable_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }

-void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
+void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters)
 {
    UVM_ASSERT_MSG(false,
-                   "disable_access_counter_notifications is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "disable_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }

-void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
+void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                    NvU32 get)
 {
    UVM_ASSERT_MSG(false,
-                   "clear_access_counter_notifications is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "clear_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }

 NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
@@ -53,26 +57,31 @@ NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gp
    return 0;
 }

-bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
+bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                      NvU32 index)
 {
    UVM_ASSERT_MSG(false,
-                   "access_counter_buffer_entry_is_valid is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "access_counter_buffer_entry_is_valid is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
    return false;
 }

-void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
+void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
+                                                                         NvU32 index)
 {
    UVM_ASSERT_MSG(false,
-                   "access_counter_buffer_entry_clear_valid is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "access_counter_buffer_entry_clear_valid is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }

-void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
+void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
                                                                   NvU32 index,
                                                                   uvm_access_counter_buffer_entry_t *buffer_entry)
 {
    UVM_ASSERT_MSG(false,
-                   "access_counter_buffer_parse_entry is not supported on GPU: %s.\n",
-                   uvm_parent_gpu_name(parent_gpu));
+                   "access_counter_buffer_parse_entry is not supported on GPU: %s notif buf index: %u.\n",
+                   uvm_parent_gpu_name(access_counters->parent_gpu),
+                   access_counters->index);
 }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Andy Ritger	e00332b05f	575.51.03	2025-05-01 22:14:31 -07:00
Bernhard Stoeckner	4159579888	575.51.02	2025-04-17 19:35:59 +02:00
Bernhard Stoeckner	e8113f665d	570.133.20	2025-04-17 17:56:49 +02:00
Bernhard Stoeckner	c5e439fea4	570.133.07	2025-03-19 14:13:05 +01:00
Bernhard Stoeckner	25bef4626e	570.124.06	2025-03-03 19:08:20 +01:00